ggboxplot significance bracket position too high - r

I'm using the ggpubr libray. For some reason, the locations of the brackets are too high. Is there any way to put them a little lower. I don't want to position each one by one.
library(ggpubr)
set.seed(1)
df <- data.frame(
Value = c(rnorm(50),rnorm(50)+10,rnorm(50)+20),
Group = sample(1:4, 150, replace = T),
Class = rep(LETTERS[1:3],each=50),
M = sample(LETTERS[25:26], 150, replace = T))
df %>% ggboxplot(x = "Group", y = "Value",
facet.by = c("Class", "M"), scales="free_y") +
stat_compare_means(comparisons = list(c("1", "2"), c("2","3"),c("1","4")))

Here is a quick and dirty way...just export the plot as pdf with a much higher hegiht, you can also do this by pdf(height = ...)
then you got:

Related

Issue on boxplot in R language

May I ask how can I distribute each of these four to two boxplots which contain the pulse meter of male and female.
islands = read.csv('Data.csv')
boxplot(islands$Pulse.meter.First..0m, islands$Pulse.meter.25m, islands$Pulse.meter.Second..0m, islands$Pulse.meter.25m.1)
Things like
boxplot(islands$Pulse.meter.25m ~ islands$Sex)
can distinguish them, but not working for four of them in the same time
before
Wanna boxplot like this
Here is an example using random data, since you hadn't provided data to download. The key is to first transform the data from the 'wide' format as you currently have the data, with a column per value, to a 'long' format, where all values are in the same column with an additional label column. Then the interaction function can be used to create an interaction between the pulse meter type and sex.
# example data with random values
islands <- data.frame(Sex = rep(c('Male', 'Female'), 15),
Pulse.meter.First..0m = rnorm(30, mean = 2),
Pulse.meter.25m = rnorm(30, mean = 1),
Pulse.meter.Second..0m = rnorm(30, mean = 3),
Pulse.meter.25m.1 = rnorm(30, mean = 4))
# reshape from wide to long
islands_long <- reshape(islands,
direction = "long",
varying = 2:5,
v.names = "value",
times = names(islands)[2:5],
timevar = 'measurement')
# plot the boxplot, 'cex.axis' decrease the font size so all the x-axis labels are visible
boxplot(value ~ interaction(Sex, measurement), data = islands_long, pars=list(cex.axis=0.5))
This generates:
library(ggplot2)
library(dplyr)
library(tidyverse)
df <- data.frame(
Gender = sample(c("Male", "Female"), 20, replace = TRUE),
Pulse.meter.First..0m = sample(10:60, 20, replace = FALSE),
Pulse.meter.25m = sample(30:60, 20, replace = FALSE),
Pulse.meter.Second..0m = sample(30:60, 20, replace = FALSE),
Pulse.meter.25m.1 = sample(10:60, 20, replace = FALSE)
)
df <- df %>%
group_by(Gender) %>%
pivot_longer(cols = Pulse.meter.First..0m:Pulse.meter.25m.1, names_to = "Pulse_meter", values_to = "Count") %>%
unite("Groups", Gender:Pulse_meter)
df$Groups <- factor(df$Groups, levels=c("Female_Pulse.meter.First..0m", "Male_Pulse.meter.First..0m",
"Female_Pulse.meter.25m","Male_Pulse.meter.25m",
"Female_Pulse.meter.Second..0m","Male_Pulse.meter.Second..0m",
"Female_Pulse.meter.25m.1","Male_Pulse.meter.25m.1"))
ggplot(data = df, aes(x= Groups, y = Count)) +
geom_boxplot() +
scale_x_discrete(labels=c("(F,0m)","(M,0m)","(F,25m)","(M,25m)", "(F,second_0m)", "(M,second_0m)",
"(F,25m.1)","(M,25m.1)")) +
labs(y="Counts") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

Making groups from 2 different datasets in ggboxplot using R

I currently have been making box plots to represent my data to show the difference between RATE for 2 different treatments, CALC_ACT = Yes or No, but have only been using one dataset at a time.
I currently have two different datasets that I want to compare but don't know how to put them in the one boxplot.
I have shown below how I am using ggboxplot to represent a single dataset (PatientData).
What I would like is for that dataset to be grouped together and then the same data from my second dataset (PatientData2) to be on the plot next to it with the label of the dataset underneath each section.
Hopefully this makes sense... any tips?
PatientData <- data.frame(PATIENT_ID = c(1,1,2,2,3,3,4,4), CALC_ACT = c("No","Yes","No","Yes","No","Yes","No","Yes"), RATE = c(1,0.1,0.5,0.6,0.8,1,0.5,0.4))
PatientData2 <- data.frame(PATIENT_ID = c(5,5,6,6,7,7,8,8), CALC_ACT = c("No","Yes","No","Yes","No","Yes","No","Yes"), RATE = c(.4,1,0.5,0.6,0.3,0.8,0.6,0.4))
ggboxplot(PatientData, x = "CALC_ACT", y = "RATE",
color = "CALC_ACT", palette = c("#00AFBB", "#E7B800"),
order = c("No", "Yes"),
ylab = "Rate", xlab = "Calcium")
You can combine the two datasets and plot the boxplot.
library(tidyverse)
bind_rows(lst(PatientData, PatientData2), .id = 'Dataset') %>%
unite('CALC_ACT', Dataset, CALC_ACT) %>%
ggplot(aes(CALC_ACT,RATE, color = CALC_ACT)) + geom_boxplot()
If I understand you correctly, I believe this is the solution:
library(tidyverse)
#install.packages("ggpubr")
library(ggpubr)
PatientData <- data.frame(
PATIENT_ID = c(1,1,2,2,3,3,4,4),
CALC_ACT = c("No","Yes","No","Yes","No","Yes","No","Yes"),
RATE = c(1,0.1,0.5,0.6,0.8,1,0.5,0.4)
)
PatientData2 <- data.frame(
PATIENT_ID = c(5,5,6,6,7,7,8,8),
CALC_ACT = c("No","Yes","No","Yes","No","Yes","No","Yes"),
RATE = c(.4,1,0.5,0.6,0.3,0.8,0.6,0.4)
)
combined <- bind_rows(list(PatientData = PatientData,
PatientData2 = PatientData2),
.id = "Source")
ggboxplot(combined, x = "CALC_ACT", y = "RATE",
facet.by = "Source", color = "CALC_ACT",
palette = c("#00AFBB", "#E7B800"),
order = c("No", "Yes"),
ylab = "Rate", xlab = "Calcium",
strip.position = "bottom")

Multigroup frequency with ggplot

I'm trying to replicate this histogram in R.
Here is how to mock my dataset:
dft <- data.frame(
menutype = sample(c(1,2,4,5,6,8,12), 120, replace = T),
Belief = sample(c(0,1), 120, replace = T),
Choice = sample(c(0,1), 120, replace = T)
)
Here is my code :
library(ggplot2)
library(dplyr)
library(tidyr)
library(MASS)
df <- data.frame(
menutype = factor(df$menutype, labels = c("GUILT" , "SSB0", "SSB1", "FLEX0", "FLEX1", "STD", "FLEX01"),
levels = c(1,2,4,5,6,8,12)),
Belief = factor(df$belieflearn, levels = c(1), labels= c("Believe Learn")), #Interested only in this condition
Choice = factor(df$learned, levels = c(1), labels= c("Learn")) #Same here
)
df1 <- rbind(na.omit(df %>%
count(Belief, menutype) %>%
group_by(menutype) %>%
mutate(prop = n / sum(n))),
na.omit(df %>%
count(Choice, menutype) %>%
group_by(menutype) %>%
mutate(prop = n / sum(n))))
test <- paste(df1$Belief[1:6],paste(df1$Choice[7:13]))
test[1:6] <- paste(df1$Belief[1:6])
test[7:13] <- paste(df1$Choice[7:13])
df1$combine <- paste(test)
ggplot(data = df1, aes(menutype, prop, fill = combine)) +
labs(title = "Classification based on rank ordering\n", x = "", y = "Fraction of subjects", fill = "\n") +
geom_bar(stat = "identity", position = "dodge")+
theme_bw() +
theme(legend.position="bottom", plot.title = element_text(hjust = 0.5)) #Centering of the main title+
#geom_text(aes(label="ok"), vjust=-0.3, size=3.5)+
The problem is that it's more or less working, I'm almost getting the graph that I want but it is a workaround and there is still some errors. Indeed, I've for example the same value for STD (0.10), while it should be 0 and 0.10 like in the original graph.
What I would like to do optimally is to have two different dataframe, one with menutype and Belief, the other one with menutype and Choice, then as I did, compute the proportion of a specific modality in each latter variables on menutype, and finally to plot it as histograms, much as the graph in the original study. Additionally, I'd like to have the proportions as fractions above each bar, but that is optional.
Could someone help me on this matter? I'm really struggling to get it working.
Thanks in advance!
EDIT: I think the issue is with the fill =. I would like to specify for each bar the variable I want (e.g, fill = df2$Belief & df2$Choice) but I don't know how to proceed.
library(tidyverse)
set.seed(10)
# example data frame
df <- data.frame(
menutype = sample(c(1,2,4,5,6,8,12), 120, replace = T),
Belief = sample(c(0,1), 120, replace = T),
Choice = sample(c(0,1), 120, replace = T)
)
# calculate all metrics based on all variables you want to plot in a tidy way
df_plot = df %>%
group_by(Choice) %>%
count(menutype, Belief) %>%
mutate(prop = n / sum(n),
prop_text = paste0(n, "/", sum(n))) %>%
ungroup()
# barplots using one variable and split plots using another variable
df_plot %>%
mutate(Belief = factor(Belief),
menutype = factor(menutype)) %>%
ggplot(aes(menutype, prop, fill = Belief))+
geom_col(position = "dodge")+
facet_wrap(~Choice, ncol=1)+
geom_text(aes(label=prop_text), position = position_dodge(1), vjust = -0.5)+
ylim(0,0.2)

Specify the order for groups when using unite from dplyr for plotting with ggplot

I wanted to do something like this
Add multiple comparisons using ggsignif or ggpubr for subgroups with no labels on x-axis
I got this far:
Packages and Example data
library(tidyverse)
library(ggpubr)
library(ggpol)
library(ggsignif)
example.df <- data.frame(species = sample(c("primate", "non-primate"), 50, replace = TRUE),
treated = sample(c("Yes", "No"), 50, replace = TRUE),
gender = sample(c("male", "female"), 50, replace = TRUE),
var1 = rnorm(50, 100, 5))
Levels
example.df$species <- factor(example.df$species,
levels = c("primate", "non-primate"), labels = c("p", "np"))
example.df$treated <- factor(example.df$treated,
levels = c("No", "Yes"), labels = c("N","Y"))
example.df$gender <- factor(example.df$gender,
levels = c("male", "female"), labels = c("M", "F"))
Since I have had no luck in getting either ggsignif or ggpubr to work with placing the significant groups correctly when the groups they need to refer to are not explicitly named in the x-axis (as they are subgroups of each variable in the x-axis and are indicated only in the fill legend and not the x-axis, I tried this instead.
example.df %>%
unite(groups, species, treated, remove = F, sep= "\n") %>%
{ggplot(., aes(groups, var1, fill= treated)) +
geom_boxjitter() +
facet_wrap(~ gender, scales = "free") +
ggsignif::geom_signif(comparisons = combn(sort(unique(.$groups)), 2, simplify = F),
step_increase = 0.1)}
I get this,
Faceted plot with significance values computed for every group
However, the order of the combined groups on the x -axis is not how I want it. I want to order it with p/N, np/N, p/Y, np/Y for each facet.
How do I do this? Any help is greatly appreciated.
Edit: Creating a new variable using mutate and making it an ordered factor with my preferred plotting order solves.
example.df %>%
unite(groups, species, treated, remove = F, sep= "\n") %>%
mutate(groups2 = factor(groups, levels = c("p\nN", "np\nN", "p\nY", "np\nY"),
ordered = TRUE)) %>%
{ggplot(., aes(groups2, var1, fill= treated)) +
geom_boxjitter() +
facet_wrap(~gender,scales = "free") +
ggsignif::geom_signif(comparisons = combn(sort(unique(.$groups2)), 2, simplify = F),
step_increase = 0.1)}
But I am still looking for solutions to not having to use unite at all and keeping the original factors and still get significance values to plot using ggsignif or ggpubr.
The default parameters for interaction (from the base package) appear to give the factor ordering you are looking for:
example.df %>%
mutate(groups = interaction(species, treated, sep = "\n")) %>%
{ggplot(., aes(groups, var1, fill= treated)) +
geom_boxjitter() +
facet_wrap(~ gender, scales = "free") +
geom_signif(comparisons = combn(sort(as.character(unique(.$groups))), 2, simplify = F),
step_increase = 0.1)}

Heatmaps for a matrix with ones and zeros using R

Below is my sample data, basically its a matrix with row names as person names
and some columns for each of these rows. All I have in the data is just zeros and ones. I would like to visualize it using heatmaps. (reds for 0s and green for 1s or any other color coding). How do I accomplish this using R? you can show me using any example dataset with just ones and zeros (binary values).
Just another approach using ggplot
library(ggplot2)
library(reshape2)
library(plyr)
library(scales)
df <- structure(list(people = structure(c(2L, 1L), .Label = c("Dwayne", "LeBron"), class = "factor"),
G = c(1L, 0L),
MIN = c(1L, 0L),
PTS = c(0L, 1L),
FGM = c(0L,0L),
FGA = c(0L,0L),
FGP = c(1L,1L)),
.Names = c("people", "G", "MIN", "PTS", "FGM", "FGA", "FGP"),
class = "data.frame",
row.names = c(NA, -2L))
df.m <- melt(df)
df1.m <- ddply(df.m, .(variable), transform, rescale = value)
p <- ggplot(df1.m, aes(variable, people)) +
geom_tile(aes(fill = rescale), colour = "black")
p + scale_fill_gradient(low = "green", high = "red")
show(p)
Adopted from this tutorial
With highcharter:
library(highcharter)
library(tidyr)
library(dplyr)
df<-data.frame(row=c("Dwayne","James"),G=c(1,0),MIN=c(1,0),PTS=c(0,1),FGM=c(0,0),FGA=c(0,0),FGP=c(1,1))
rownames(df)<-c("Dwayne","James")
df$row<-rownames(df)
data<-df%>%
tidyr::gather(row,value)%>%
setNames(c("name","variable","value"))
hchart(data, "heatmap", hcaes(x = variable, y = name, value = value)) %>%
hc_colorAxis(stops = color_stops(2, c("red","green")))
UPDATE:
You can add hc_size(height = 800) for height=800 or make something like that
x<-50
hg<-length(unique(data$name))*x+100
hchart(data, "heatmap", hcaes(x = variable, y = name, value = value)) %>%
hc_colorAxis(stops = color_stops(2, c("red","green")))%>%
hc_size(height = hg)
Where each row in dataset makes chart bigger by 50 points. You can change it in x
This answer uses plotly and hence adding it as another answer. Using the same data as the following one.
library(plotly)
df1 <- as.matrix(df)
p <- plot_ly(x = colnames(df), y = df[,1], z = as.matrix(df[-1]), colors = colorRamp(c("green", "red")), type = "heatmap")
This is much simpler than the ggplot2 in terms of getting the output.
Hope this helps!

Resources