Related
I want to automate adding brackets/braces to a ggplot object and then convert it to plotly using ggplotly.
library(tidyverse)
library(plotly)
#devtools::install_github("NicolasH2/ggbrace")
library(ggbrace)
set.seed(10)
mydata <- data.frame(xx = c(1:10), yy = sample(0:10, 10))
my_bracket_data <- data.frame(start = c(1, 4, 6),
end = c(3, 5, 9),
info = c("first", "second", "third"),
y_bottom = rep(11, 3),
y_top = rep(12, 3))
I can do this by using geom_brace but it involves manually typing it out for each brace rather than using the data that is already stored in a data.frame:
plot_geom_brace <- ggplot(data = mydata, aes(xx, yy)) +
geom_line(size = 1.5) +
geom_brace(aes(x = c(1, 3), y = c(11, 12), label = "first"), inherit.data = F, labelsize = 5) +
geom_brace(aes(x = c(4, 5), y = c(11, 12), label = "second"), inherit.data = F, labelsize = 5) +
geom_brace(aes(x = c(6, 9), y = c(11, 12), label = "third"), inherit.data = F, labelsize = 5)
plot_geom_brace
ggplotly(plot_geom_brace)
Is there a way that I can do this without repeatedly writing geom_brace layers for each brace (and instead access the data from my_bracket_data directly)?
As an aside this can be automated using geom_bracket but this is not supported by plotly yet.
library(ggpubr)
plot_geom_bracket <- ggplot(data = mydata, aes(xx, yy)) +
geom_line(size = 1.5) +
geom_bracket(xmin = my_bracket_data$start,
xmax = my_bracket_data$end,
y.position = rep(11, 3),
label = my_bracket_data$info,
inherit.aes = FALSE)
plot_geom_bracket
ggplotly(plot_geom_bracket)
# Warning message:
# In geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]) :
# geom_GeomBracket() has yet to be implemented in plotly.
# If you'd like to see this geom implemented,
# Please open an issue with your example code at
# https://github.com/ropensci/plotly/issues
Any suggestions?
Thanks
An option could be by creating two dataframes of your bracket data. One dataframe for the geom_braces by converting the data to a longer format with pivot_longer to create three geom braces through the aes. To get the labels you can create a small summarise table with the x and y positions per group of info. Here is some reproducible code:
library(tidyverse)
library(plotly)
#devtools::install_github("NicolasH2/ggbrace")
library(ggbrace)
set.seed(10)
mydata <- data.frame(xx = c(1:10), yy = sample(0:10, 10))
my_bracket_data <- data.frame(start = c(1, 4, 6),
end = c(3, 5, 9),
info = c("first", "second", "third"),
y_bottom = rep(11, 3),
y_top = rep(12, 3))
# Data for geom_brace
my_bracket_data_long <- my_bracket_data %>%
pivot_longer(cols = c(start, end), values_to = "x_value", names_to = "x_names") %>%
pivot_longer(cols = c(y_bottom, y_top), values_to = "y_value", names_to = "y_names")
# data for labels braces
my_bracket_data_labels <- my_bracket_data_long %>%
group_by(info) %>%
summarise(x_pos = mean(x_value),
y_pos = mean(y_value) + 1)
# plot
plot_geom_brace <- ggplot(data = mydata, aes(xx, yy)) +
geom_line(size = 1.5) +
geom_brace(data = my_bracket_data_long, aes(x = x_value, y = y_value, group = info)) +
geom_text(data = my_bracket_data_labels, aes(x = x_pos, y = y_pos, group = info, label = info))
ggplotly(plot_geom_brace)
Created on 2023-01-07 with reprex v2.0.2
special ggplot2 libraries like ggpubr usually don't play along well with conversion to plotly objects.
If you dont want to type out each geom_bracet call you could loop over the rows of the dataframe, create the geom statement using paste and pass it to the existing plot object using eval in the following line:
m<- data.frame(s = c(1, 4, 6), ## = my_bracket_data
e = c(3, 5, 9),
i = c("first", "second", "third"),
y_b = rep(11, 3),
y_t = rep(12, 3))
p<- ggplot(data = mydata, aes(xx, yy)) + geom_line(size = 1.5)
for (i in 1:NROW(my_bracket_data)) {
input = paste('geom_brace(aes(x =c(',m[i,]$s,',',m[i,]$e'),c(',m[i,]$y_b,',',
m[i,]$y_t,'),label=',m[i,]$i,'), inherit.data = F, labelsize = 5)',sep='')
p = p + eval(parse(text=input))
}
p
However this is more of a hacky solution, but that's what R tends to become if you incorporate/mix different styles like for example apply functions with tidyr syntax (or in this case ggplot, which could be seen as an ancestor of tidyr) and more programming style approaches (for, while , func...[yes you can programm in R]) and also want to let it automatically converse the whole thing to a Javascript thing (aka plotly) . .. its a beautiful mess .
I have the following data frame which contains 4 columns of data in addition to the vector of labels c.
Time <-c(1:4)
d<-data.frame(Time,
x1= rpois(n = 4, lambda = 10),
x2= runif(n = 4, min = 1, max = 10),
x3= rpois(n = 4, lambda = 5),
x4= runif(n = 4, min = 1, max = 5),
c=c(1,1,2,3))
I would like to use ggpolt to plot 4 curves"x1,..,x4" above each others where each curve is colored according to the label. So curves x1 and x2 are colored by the same color since they have the same label where as curves x3 and x4 in different colors.
I did the following
d %>% pivot_longer(-c(Time,x1,x2,x3,x4))%>%
rename(class=value) %>% select(-name) %>%
pivot_longer(-c(Time,class)) %>%
mutate(Label=ifelse(Time==max(Time,na.rm = T),name,NA),
Label=ifelse(duplicated(Label),NA,Label)) %>%
ggplot(aes(x=Time,y=value,color=factor(class),group=name))+
geom_line()+
labs(color='class')+
scale_color_manual(values=c('red','blue','green'))+
geom_label_repel(aes(label = Label),
nudge_x = 1.5,
na.rm = TRUE,show.legend = F,color='black')
but I don't get the needed plot, the resulted curves are not colored according to the label. I want x1 and x2 in red, x3 in blue and x4 in green.
To add: I would like to obtain the same plot above in the following general case, where I can't add the vector c to the data frame as length(c) is not equal to length(x1)=...=length(x4)
Time <-c(1:5)
d<-data.frame(Time,
x1= rpois(n = 5, lambda = 10),
x2= runif(n = 5, min = 1, max = 10),
x3= rpois(n = 5, lambda = 5),
x4= runif(n = 5, min = 1, max = 5))
and c=c(1,1,2,3)
As you point out in your comments, it is only possible to put the vector of colors as a column in the original data.frame because it happens to be square, but this is a dangerous way to store the information because the colors really belong to the columns rather than the rows. It's better to assign the colors separately and then join into the long format data by variable name prior to plotting.
Below is an example of how I'd do this with your data.
First, prepare the data without the color mapping for each variable, we'll do that next:
# load necessary packages
library(tidyverse)
library(ggrepel)
# set seed to make simulated data reproducible
set.seed(1)
# simulate data
Time <-c(1:4)
d <- data.frame(Time,
x1 = rpois(n = 4, lambda = 10),
x2 = runif(n = 4, min = 1, max = 10),
x3 = rpois(n = 4, lambda = 5),
x4 = runif(n = 4, min = 1, max = 5))
Next, make a separate data.frame that maps the color grouping to the variable names. At some point you'll want to make this a factor (i.e. discrete rather than continuous) to map it to color so I just do it here but it can be done later in the ggplot call if you prefer. Per your request, this solution easily scales with your dataset without needing to manually set each level, but it requires that your vector of color mappings is in the same order and the same length as the variable names in d unless you have some other way to establish that relationship.
# create separate df with color groupings for variable in d
color_grouping <- data.frame(var = names(d)[-1],
color_group = factor(c(1, 1, 2, 3)))
Then you pivot_longer and do a join to merge the color mapping with the data for plotting.
# pivot d to long and merge in color codes
d_long <- d %>%
pivot_longer(cols = -Time, names_to = "var", values_to = "value") %>%
left_join(., color_grouping)
# inspect final table prior to plotting to confirm color mappings
head(d_long, 4)
# # A tibble: 4 x 4
# Time var value color_group
# <int> <chr> <dbl> <fct>
# 1 1 x1 8 1
# 2 1 x2 1.56 1
# 3 1 x3 4 2
# 4 1 x4 4.97 3
Finally, generate line plot where color is mapped to the color_group variable. To ensure you get one line per original variable you also need to set group = var. For more info on this check the documentation on grouping.
# plot data adding labels for each line
p <- d_long %>%
ggplot(aes(x = Time, y = value, group = var, color = color_group)) +
geom_line() +
labs(color='class') +
scale_color_manual(values=c('red','blue','green')) +
geom_label_repel(aes(label = var),
data = d_long %>% slice_max(order_by = Time, n = 1),
nudge_x = 1.5,
na.rm = TRUE,
show.legend = F,
color='black')
p
This produces the this plot:
In your comment you suggested wanting to separate out and stacking the plots. I'm not sure I fully understood, but one way to accomplish this is with faceting.
For example if you wanted to facet out separate panels by color_group, you could add this line to the plot above:
p + facet_grid(rows = "color_group")
Which gives this plot:
Note that the faceting variable must be put in quotes.
You were on the right path, but you need a little bit of a different structure to use ggplot:
# delete old color column
d$c <- NULL
# reshape df
plot.d <- reshape2::melt(d, id.vars = c("Time"))
# create new, correct color column
plot.d$c <- NA
plot.d$c[plot.d$variable == "x1"] <- 1
plot.d$c[plot.d$variable == "x2"] <- 1
plot.d$c[plot.d$variable == "x3"] <- 2
plot.d$c[plot.d$variable == "x4"] <- 3
# plot
ggplot(plot.d, aes(x=Time, y=value, color=as.factor(c), group = variable))+
geom_line() +
labs(color='class')+
scale_color_manual(values=c('red','blue','green'))
Note that I omitted the labels for brevity, but you can add them back in using the same logic. The code above gives the following result:
Here is a solution for how I understood your question.
The DF is brought in the long format, the variable c is replaced with mutate / case_when with the number code you have used.
I have set a seed for better reproducibility.
library(tidyverse)
library(ggrepel)
set.seed(1)
# YOUR DATA
Time <- c(1:4)
d <- data.frame(Time,
x1 = rpois(n = 4, lambda = 10),
x2 = runif(n = 4, min = 1, max = 10),
x3 = rpois(n = 4, lambda = 5),
x4 = runif(n = 4, min = 1, max = 5),
c = c(1, 1, 2, 3)
)
d %>%
pivot_longer(cols = x1:x4) %>% # make it long
mutate(c = as.factor(case_when( # replace consistently
name == "x1" | name == "x2" ~ 1, # according to YOUR DATA
name == "x3" ~ 2,
name == "x4" ~ 3
))) %>%
mutate(
Label = ifelse(Time == max(Time, na.rm = T), name, NA),
Label = ifelse(duplicated(Label), NA, Label)
) %>%
ggplot(aes(x = Time, y = value, color = c, group = name)) +
geom_line() +
labs(color = "class") +
scale_color_manual(values = c("red", "blue", "green")) + # YOUR CHOICE
geom_label_repel(aes(label = Label),
nudge_x = 1.5,
na.rm = TRUE, show.legend = F, color = "black"
)
ADDED
You could leave the c out and color according to name.
The color code was neccessary because you wanted 2 names with the same color. If that is not needed, the following code can do it.
d %>%
pivot_longer(cols = x1:x4) %>% # make it long
mutate(
Label = ifelse(Time == max(Time, na.rm = T), name, NA),
Label = ifelse(duplicated(Label), NA, Label)
) %>%
ggplot(aes(x = Time, y = value, color = name, group = name)) +
geom_line() +
geom_label_repel(aes(label = Label),
nudge_x = 1.5,
na.rm = TRUE, show.legend = F, color = "black"
)
I'm trying to create a line graph depicting different trajectories over time for two groups/conditions. I have two groups for which the data 'eat' was collected at five time points (1,2,3,4,5).
I'd like the lines to connect the mean point for each group at each of five time points, so I'd have two points at Time 1, two points at Time 2, and so on.
Here's a reproducible example:
#Example data
library(tidyverse)
library(ggplot2)
eat <- sample(1:7, size = 30, replace = TRUE)
df <- data.frame(id = rep(c(1, 2, 3, 4, 5, 6), each = 5),
Condition = rep(c(0, 1), each = 15),
time = c(1, 2, 3, 4, 5),
eat = eat
)
df$time <- as.factor(df$time)
df$Condition <- as.factor(df$Condition)
#Create the plot.
library(ggplot2)
ggplot(df, aes(x = time, y = eat, fill = Condition)) + geom_line() +
geom_point(size = 4, shape = 21) +
stat_summary(fun.y = mean, colour = "red", geom = "line")
The problem is, I need my lines to go horizontally (ie to show two different colored lines moving across the x-axis). But this code just connects the dots vertically:
If I don't convert Time to a factor, but only convert Condition to a factor, I get a mess of lines. The same thing happens in my actual data, as well.
I'd like it to look like this aesthetically, with the transparent error envelopes wrapping each line. However, I don't want it to be curvy, I want the lines to be straight, connecting the means at each point.
Here's the lines running in straight segments through the means of each time, with the range set to be the standard deviation of the points at the time. One stat.summary makes the mean line with the colour aesthetic, the other makes the area using the inherited fill aesthetic. ggplot2::mean_se is a convenient function that takes a vector and returns a data frame with the mean and +/- some number of standard errors. This is the right format for thefun.data argument to stat_summary, which passes these values to the geom specified. Here, geom_ribbon accepts ymin and ymax values to plot a ribbon across the graph.
library(tidyverse)
set.seed(12345)
eat <- sample(1:7, size = 30, replace = T)
df <- data.frame(
Condition = rep(c(0, 1), each = 15),
time = c(1, 2, 3, 4, 5),
eat = eat
)
df$Condition <- as.factor(df$Condition)
ggplot(df, aes(x = time, y = eat, fill = Condition)) +
geom_point(size = 4, shape = 21, colour = "black") +
stat_summary(geom = "ribbon", fun.data = mean_se, alpha = 0.2) +
stat_summary(
mapping = aes(colour = Condition),
geom = "line",
fun.y = mean,
show.legend = FALSE
)
Created on 2018-07-09 by the reprex package (v0.2.0).
Here's my best guess at what you want:
# keep time as numeric
df$time = as.numeric(as.character(df$time))
ggplot(df, aes(x = time, y = eat, group = Condition)) +
geom_smooth(
aes(fill = Condition, linetype = Condition),
method = "lm",
level = 0.65,
color = "black",
size = 0.3
) +
geom_point(aes(color = Condition))
Setting the level = 0.65 is about +/- 1 standard deviation on the linear model fit.
I think this code will get you most of the way there
library(tidyverse)
eat <- sample(1:7, size = 30, replace = TRUE)
tibble(id = rep(c(1, 2, 3, 4, 5, 6), each = 5),
Condition = factor(rep(c(0, 1), each = 15)),
time = factor(rep(c(1, 2, 3, 4, 5), 6)),
eat = eat) %>%
ggplot(aes(x = time, y = eat, fill = Condition, group = Condition)) +
geom_point(size = 4, shape = 21) +
geom_smooth()
geom_smooth is what you were looking for, I think. This creates a linear model out of the points, and as long as your x value is a factor, it should use the mean and connect the points that way.
I have dataframe that contains data on the number of TVs and radios owned by survey respondents now and before:
DF <- data.frame(TV_now = as.numeric(c(4, 9, 1, 0, 4, NA)),
TV_before = as.numeric(c(4, 1, 2, 4, 5, 2)),
Radio_now = as.numeric(c(4, 5, 1, 5, 6, 9)),
Radio_before = as.numeric(c(6, 5, 3, 6, 7, 10)))
I want to sum the total value of each variable and then create a barplot that shows the number of TVs and radios owned by survey respondents now and before.
I can manually create a new dataframe that contains just the sum of the value of each variable in the original DF
DFsum <- data.frame(TV_now = as.numeric(c(sum(DF$TV_now,na.rm = TRUE))),
TV_before = as.numeric(c(sum(DF$TV_before,na.rm = TRUE))),
Radio_now = as.numeric(c(sum(DF$TV_now,na.rm = TRUE))),
Radio_before = as.numeric(c(sum(DF$Radio_before,na.rm = TRUE))))
and then use tidyr to do the following:
library(tidyr)
library(ggplot2)
DFsum %>%
gather(key=Device, value=Number) %>%
ggplot(aes(x=Number,fill=Device)) +
geom_bar(aes(x = Device, y = Number), position = "dodge", stat = "identity")
This gives me the result I want, but seems unnecessarily complicated for what should be easy to achieve. Is there an easier way to plot this?
You can simplify your code with use of dplyr::mutate_all since you are summarizing all your columns:
library(tidyverse)
library(ggplot2)
DF %>% mutate_all(funs(sum), na.rm = TRUE) %>%
gather(key=Device, value=Number) %>%
ggplot(aes(x=Device,fill=Device)) +
geom_bar(aes(x = Device, y = Number), position = "dodge", stat = "identity")
Simplify data creation. R knows that 4, 9, 1, etc., are numbers, you don't need as.numeric.
DF <- data.frame(TV_now = c(4, 9, 1, 0, 4, NA),
TV_before = c(4, 1, 2, 4, 5, 2),
Radio_now = c(4, 5, 1, 5, 6, 9),
Radio_before = c(6, 5, 3, 6, 7, 10))
Simplify the data manipulation. Tidy your data (convert it to long format) first, then do other things:
DF_long = gather(DF, key = "device") %>%
group_by(device) %>%
summarize(number = sum(value, na.rm = TRUE))
Simplify the plotting. Aesthetics are inherited - you don't need to specify them multiple times. geom_col is preferred to geom_bar with stat = "identity". position = "dodge" does nothing when there is one group per x index.
ggplot(aes(x = device, y = number, fill = device)) +
geom_col()
I generally prefer to do my own data manipulation, but we can also lean on ggplots stacking bars to replace the summing, making the entire code:
gather(DF, key = "device", value = "number") %>%
ggplot(aes(x = device, y = number, fill = device)) +
geom_col()
Base approach
dev = colSums(DF, na.rm = TRUE)
barplot(dev, col = factor(names(dev)))
say I have the means of two datasets that I want to plot as barplots with error bars next to each other in ggplot2, or base
Each dataset consists of a matrix of numbers
10 20 12
10 20 12
10 20 12
which is then transformed into a mean vector of for example 3 elements
10 20 12
What I want to do is to take both mean vectors and plot them as a bar plot where the first element of one is besides the first element of the other
Dataset1Element1Bar-Dataset2Element1Bar Dataset1Element2Bar-Dataset2Element2Bar etc
Give each bar an error bar, say of standard deviation. I know I can calculate it through sd but I'm not sure how to stick it into the graph in the proper form
And lastly color them by their element number (ie Element 1)
I have the code to do one dataset but I'm not sure where to go from there.
result<-barplot(bardata, main="Mean Coverage", names.arg=namePosTargetGroup, ylab="mean Magnitude", cex.names=.4,col=c("red","blue","green"))
legend(10,legend=c("Group1","Group2","Group3"),fill = c("red","blue","green"))
A lot of what I look up gives the answer for one thing or another but its difficult to figure out how to combine them together.
I would generally not recommend plotting just a bar chart with error bars. There are many other ways to plot your data, which reveal the data and its structure a lot better.
Especially if you just have very few cases, plotting means with bars is not good. A good explanation can be found here: Beyond Bar and Line Graphs: Time for a New Data Presentation Paradigm
I find it difficult to give you a good solution, since I don't know your research-question. Knowing what you actually want to show or emphasis would make things easier.
I will give you two suggestions, one for a small dataset, one for a bigger one. All of them are created with ggplot2. I'm not coloring them by their "element number" but by their origin ("dataset 1/2"), since I find it easier to accomplish a proper graphic this way.
Small Dataset
Use geom_jitter to display all your cases, avoiding overplotting.
# import hadleyverse
library(magrittr)
library(dplyr)
library(tidyr)
library(ggplot2)
# generate small amount of data
set.seed(1234)
df1 <- data.frame(v1 = rnorm(5, 4, 1),
v2 = rnorm(5, 5, 1),
v3 = rnorm(5, 6, 1),
origin = rep(factor("df1", levels = c("df1", "df2")), 5))
df2 <- data.frame(v1 = rnorm(5, 4.5, 1),
v2 = rnorm(5, 5.5, 1),
v3 = rnorm(5, 6.5, 1),
origin = rep(factor("df2", levels = c("df1", "df2")), 5))
# merge dataframes and gather in long format
pdata <- bind_rows(df1, df2) %>%
gather(id, variable, -origin)
# plot data
ggplot(pdata, aes(x = id, y = variable, fill = origin, colour = origin)) +
stat_summary(fun.y = mean, geom = "point", position = position_dodge(width = .5),
size = 30, shape = "-", show_guide = F, alpha = .7) + # plot mean as "-"
geom_jitter(position = position_jitterdodge(jitter.width = .3, jitter.height = .1,
dodge.width = .5),
size = 4, alpha = .85) +
labs(x = "Variable", y = NULL) + # adjust legend
theme_light() # nicer theme
"Big" Dataset
If you have more datapoints, you can use geom_violin to summarise them.
set.seed(12345)
df1 <- data.frame(v1 = rnorm(50, 4, 1),
v2 = rnorm(50, 5, 1),
v3 = rnorm(50, 6, 1),
origin = rep(factor("df1", levels = c("df1", "df2")), 50))
df2 <- data.frame(v1 = rnorm(50, 4.5, 1),
v2 = rnorm(50, 5.5, 1),
v3 = rnorm(50, 6.5, 1),
origin = rep(factor("df2", levels = c("df1", "df2")), 50))
# merge dataframes
pdata <- bind_rows(df1, df2) %>%
gather(id, variable, -origin)
# plot with violin plot
ggplot(pdata, aes(x = id, y = variable, fill = origin)) +
geom_violin(adjust = .6) +
stat_summary(fun.y = mean, geom = "point", position = position_dodge(width = .9),
size = 6, shape = 4, show_guide = F) +
guides(fill = guide_legend(override.aes = list(colour = NULL))) +
labs(x = "Variable", y = NULL) +
theme_light()
Version with mean and sd
If you insist on plotting the mean with standard deviation, here is how it could be done.
# merge dataframes and compute limits for sd
pdata <- bind_rows(df1, df2) %>%
gather(id, variable, -origin) %>%
group_by(origin, id) %>% # group data for limit calculation
mutate(upper = mean(variable) + sd(variable), # upper limit for error bar
lower = mean(variable) - sd(variable)) # lower limit for error bar
# plot
ggplot(pdata, aes(x = id, y = variable, fill = origin)) +
stat_summary(fun.y = mean, geom = "bar", position = position_dodge(width = .9),
size = 3) +
geom_errorbar(aes(ymin = lower, ymax = upper),
width = .2, # Width of the error bars
position = position_dodge(.9))