Lets say, in R, I have a data frame letters, numbers and animals and I want to examine the relationship between all three graphically. I could do something like.
library(dplyr)
library(ggplot2)
library(gridExtra)
set.seed(33)
my_df <- data.frame(
letters = c(letters[1:10], letters[6:15], letters[11:20]),
animals = c(rep('sheep', 10), rep('cow', 10), rep('horse', 10)),
numbers = rnorm(1:30)
)
ggplot(my_df, aes(x = letters, y = numbers)) + geom_point() +
facet_wrap(~animals, ncol = 1, scales = 'free_x')
I'd get something that looks like.
However, I want the order of the x axis to be dependent on the order of the y-axis. This is easy enough to do without facets, as per this example.
I can even make an ordered figure for each animal and then bind them together with grid.arrange as in this example
my_df_shp <- my_df %>% filter(animals == 'sheep')
my_df_cow <- my_df %>% filter(animals == 'cow')
my_df_horse <- my_df %>% filter(animals == 'horse')
my_df_shp1 <- my_df_shp %>% mutate(letters = reorder(letters, numbers))
my_df_cow1 <- my_df_cow %>% mutate(letters = reorder(letters, numbers))
my_df_horse1 <- my_df_horse %>% mutate(letters = reorder(letters, numbers))
p_shp <- ggplot(my_df_shp1, aes(x = letters, y = numbers)) + geom_point()
p_cow <- ggplot(my_df_cow1, aes(x = letters, y = numbers)) + geom_point()
p_horse <- ggplot(my_df_horse1, aes(x = letters, y = numbers)) + geom_point()
grid.arrange(p_shp, p_cow, p_horse, ncol = 1)
I don't particularly like this solution though, because it isn't easily generalizable to cases where there are a lot of facets.
I'd rather do something like
ggplot(my_df, aes(x = y_ordered_by_facet(letters, by = numbers), y = numbers)) + geom_point() +
facet_wrap(~animals, ncol = 1, scales = 'free_x')
Where y_ordered is some function that cleverly orders the letters factor to be in the same order as the numbers.
Something that gets close to this, but doesn't quite seem to work is
ggplot(my_df, aes(x = reorder(letters, numbers), y = numbers)) +
geom_point() + facet_wrap(~animals, ncol = 1, scales = 'free_x')
That doesn't quite work because the order ends up taking effect before, rather than after the facet wrapping and thus putting the labels in not quite the right order for each panel.
Any clever ideas?
I've found dplyr doesn't work super well with group_by() when dealing with different factor levels in each of the groups. So one work around is thinking of creating a new factor that's unique for each animal-letter combination and ordering that. First, we create an interaction variable with animal+letter and determine the proper order for each of the letters for the animals
new_order <- my_df %>%
group_by(animals) %>%
do(data_frame(al=levels(reorder(interaction(.$animals, .$letters, drop=TRUE), .$numbers)))) %>%
pull(al)
Now we create the interaction variable in the data we want to plot, use this new ordering, and finally change the labels so they look like just the letters again
my_df %>%
mutate(al=factor(interaction(animals, letters), levels=new_order)) %>%
ggplot(aes(x = al, y = numbers)) +
geom_point() + facet_wrap(~animals, ncol = 1, scales = 'free_x') +
scale_x_discrete(breaks= new_order, labels=gsub("^.*\\.", "", new_order))
set.seed(33)
my_df <- data.frame(
letters = c(letters[1:10], letters[6:15], letters[11:20]),
animals = c(rep('sheep', 10), rep('cow', 10), rep('horse', 10)),
numbers = rnorm(1:30)
)
my_df %>% group_by(animals) %>%
arrange(numbers, .by_group = T) %>%
mutate(lett = factor(interaction(animals,letters, drop=TRUE))) -> my_df
ggplot(my_df, aes(x = reorder(lett, numbers), y = numbers)) +
geom_point(size = 3) +
facet_wrap(~animals, ncol = 1, scales = 'free_x') +
scale_x_discrete(breaks = my_df$lett, labels=gsub("^.*\\.", "", my_df$lett))
Related
I have following data:
df <- data.frame("Stat" = c("Var1","Var1","Var1","Var1","Var1","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var3","Var3","Var3","Var3","Var3","Var3","Var3","Var3","Var3","Var3"),
"Value" = c(0,1,2,3,4,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,1,2,3,4,5,6,7,8,9,10),
"n" = c(33,120,223,63,20,17,28,33,22, 35,41,53,44,55,59,39,33, 46,30,29,23,21,14,6,18,7,29,50,80,86,91,83,35,34, 20))
What I wanted to do is to plot the above data as bar plot in one canvas but in three rows (1 columns x 3 rows) and each panel should contain plot for only one variable (Stat) eg. Var1 in first panel, Var2 in second and Var3 in the third panel, using the following code:
library(multipanelfigure)
fig1 <- multi_panel_figure(columns = 2, rows = 2, panel_label_type = "none")
# fit the plots on the panels
fig1 %<>%
fill_panel(Var1Plot, column = 1, row = 1) %<>%
fill_panel(Var2Plot, column = 2, row = 1) %<>%
fill_panel(Var3Plot, column = 1:2, row = 2)
fig1
Issue is how to get the Var1Plot, Var2Plot and Var3Plot so that these can be placed in respective panels above. I used the below code, but not able to get the results into above panels:
library(tidyverse)
df %>% ggplot(aes(x = Value, y = n)) +
geom_bar(stat='identity') + facet_wrap(~ Stat)
Expected plot should look something like this :
Here's an approach with cowplot.
library(cowplot)
figure.list <- map(unique(df$Stat), ~
ggplot(data = subset(df, df$Stat == .x), aes(x = Value, y = n)) +
geom_bar(stat='identity') +
ggtitle(.x))
top <- plot_grid(figure.list[[1]], figure.list[[2]], ncol = 2)
bottom <- plot_grid(figure.list[[3]], ncol = 1)
plot_grid(top, bottom,
ncol=1, rel_heights=c(1,1))
If you really want some to be coord_flip-ed, you could make the list manually:
figure.list <- list()
figure.list[[1]] <- ggplot(data = subset(df, df$Stat == "Var1"), aes(x = Value, y = n)) +
geom_bar(stat='identity') + coord_flip()
figure.list[[2]] <- ggplot(data = subset(df, df$Stat == "Var2"), aes(x = Value, y = n)) +
geom_bar(stat='identity') + coord_flip()
figure.list[[3]] <- ggplot(data = subset(df, df$Stat == "Var3"), aes(x = Value, y = n)) +
geom_bar(stat='identity')
top <- plot_grid(figure.list[[1]], figure.list[[2]], ncol = 2)
bottom <- plot_grid(figure.list[[3]], ncol = 1)
plot_grid(top, bottom,
ncol=1, rel_heights=c(1,1))
My first Q here, so please go lightly if I'm out of step anywhere.
I'm trying to code R to produce a single chart to contain a number of data series lines. The number of data series may vary but will be provided in the data frame. I have tried to rearrange another thread's content to print the geom_line , but not successfully.
The logic is:
#desire to replace loop of 1:5 with ncol(df)
print(ggplot(df,aes(x=time))
for (i in 1:5) {
print (+ geom_line(aes(y=df[,i]))
}
#functioning geom point loops ggplot production:
for (i in 1:5) {
print(ggplot(df,aes(x=time,y=df[,i]))+geom_point())
}
#functioning multi-line ggplot where n is explicit:
ggplot(data=df, aes(x=time), group=1) +
geom_line(aes(y=df$`3`))+
geom_line(aes(y=df$`4`))
The functioning example code produces n number of point charts, 5 in this case. I would like just one chart to contain n line series.
This may be similar to How to plot n dimensional matrix? for which there are currently no relevant answers
Any contributions much appreciated, thanks
You can use gather from tidyverse "world" to do that.
As you didn't supply a sample data I used mtcars.
I created two data.frames one with 3 columns one with 9. In each one of them I plotted all of the variables against the variable mpg.
library(tidyverse)
df3Columns <- mtcars[, 1:4]
df9Columns <- mtcars[, 1:10]
df3Columns %>%
gather(var, value, -mpg) %>%
ggplot(aes(mpg, value, group = var, color = var)) +
geom_line()
df9Columns %>%
gather(var, value, -mpg) %>%
ggplot(aes(mpg, value, group = var, color = var)) +
geom_line()
Edit - using the sample data in comments.
library(tidyverse)
df %>%
rownames_to_column("time") %>%
gather(var, value, -time) %>%
ggplot(aes(time, value, group = var, color = var)) +
geom_line()
Sample data:
df <- structure(list("39083" = c(96, 100, 100), "39090" = c(99, 100, 100), "39097" = c(99, 100, 100)), row.names = 3:5, class = "data.frame")
To strictly answer your question, you can simply store your ggplot in a variable and add the geom_line one by one:
df <- structure(list("39083" = c(96, 100, 100), "39090" = c(99, 100, 100), "39097" = c(99, 100, 100)), row.names = 3:5, class = "data.frame")
g <- ggplot(df, aes(x = 1:nrow(df)))
for (i in colnames(df))
{
g <- g + geom_line(y = df[,i])
}
g <- g + scale_y_continuous(limits = c(min(df), max(df)))
print(g)
However, this is not a very convenient solution. I would highly recommend to refactor your data frame to be more ggplot style.
df.ultimate <- data.frame(time = numeric(), value = numeric(), group = character())
for (i in colnames(df))
{
df.ultimate <- rbind(df.ultimate, data.frame(time = 1:nrow(df), value = df[, i], group = i))
}
g <- ggplot(df.ultimate, aes(x = time, y = value, color = group))
g <- g + geom_line()
print(g)
A one-line solution:
ggplot(data.frame(time = rep(1:nrow(df), ncol(df)),
value = as.vector(as.matrix(df)),
group = rep(colnames(df), each = nrow(df))),
aes(x = time, y = value, color = group)) + geom_line()
I'm struggling to reorder my data for plotting with ggplot in a function that also uses dplyr:
# example data
library(ggplot2)
library(dplyr)
dat <- data.frame(a = c(rep("l", 10), rep("m", 5), rep("o", 15)),
b = sample(100, 30),
c= c(rep("q", 10), rep("r", 5), rep("s", 15)))
Here are my steps outside of a function:
# set a variable
colm <- "a"
# make a table
dat1 <- dat %>%
group_by_(colm) %>%
tally(sort = TRUE)
# put in order and plot
ggplot(dat2, aes(x = reorder(a, n), y = n)) +
geom_bar(stat = "identity")
But when I try to make that into a function, I can't seem to use reorder:
f <- function(the_data, the_column){
dat %>% group_by_(the_column) %>%
tally(sort = TRUE) %>%
ggplot(aes_string(x = reorder(the_column, 'n'), y = 'n')) +
geom_bar(stat = "identity")
}
f(dat, "a")
Warning message:
In mean.default(X[[i]], ...) :
argument is not numeric or logical: returning NA
The function will work without reorder:
f <- function(the_data, the_column){
dat %>% group_by_(the_column) %>%
tally(sort = TRUE) %>%
ggplot(aes_string(x = the_column, y = 'n')) +
geom_bar(stat = "identity")
}
f(dat, "a")
And I can get what I want without dplyr, but I'd prefer to use dplyr because it's more efficient in my actual use case:
# without dplyr
ff = function(the_data, the_column) {
data.frame(table(the_data[the_column])) %>%
ggplot(aes(x = reorder(Var1, Freq), y = Freq)) +
geom_bar(stat = "identity") +
ylab("n") +
xlab(the_column)
}
ff(dat, "a")
I see that others have struggled with this (1, 2), but it seems there must be a more efficient dplyr/pipe idiom for this reordering-in-a-function task.
If you are going to use aes_string, then the whole value must be a string, not just partially a string. You can use paste() to help build the expression you want to use for x. For example
f <- function(the_data, the_column){
dat %>% group_by_(the_column) %>%
tally(sort = TRUE) %>%
ggplot(aes_string(x = paste0("reorder(",the_column,", n)"), y = 'n')) +
geom_bar(stat = "identity")
}
Or you could use expressions rather than strings
f <- function(the_data, the_column){
dat %>% group_by_(the_column) %>%
tally(sort = TRUE) %>%
ggplot(aes_q(x = substitute(reorder(x, n),list(x=as.name(the_column))), y = quote(n))) +
geom_bar(stat = "identity")
}
but the general idea is that you need to be careful when mixing strings and raw language elements (like names or expressions).
I am trying to insert labels into a proportional barchart: one label per segment, with as text the percentage of each segment. With the help of thothal I managed to do this:
var1 <- factor(as.character(c(1,1,2,3,1,4,3,2,3,2,1,4,2,3,2,1,4,3,1,2)))
var2 <- factor(as.character(c(1,4,2,3,4,2,1,2,3,4,2,1,1,3,2,1,2,4,3,2)))
data <- data.frame(var1, var2)
dat <- ddply(data, .(var1), function(.) {
res <- cumsum(prop.table(table(factor(.$var2))))
data.frame(lab = names(res), y = c(res))
})
ggplot(data, aes(x = var1)) + geom_bar(aes(fill = var2), position = 'fill') +
geom_text(aes(label = lab, x = var1, y = y), data = dat)
I would like to have for labels the percentage of each level, and not the level name.
Any help appreciated!
You are telling geom_text to use var2 as your y variable. That is in fact as.numeric(data$var2), which translates to a range of 1-4. However, your barplot uses the cumulative percentages.
Hence you have to calculate these positions before:
library(ggplot2)
library(plyr) # just for convenience
var1 <- factor(as.character(c(1,1,2,3,1,4,3,2,3,2,1,4,2,3,2,1,4,3,1,2)))
var2 <- factor(as.character(c(1,4,2,3,4,2,1,2,3,4,2,1,1,3,2,1,2,4,3,2)))
data <- data.frame(var1, var2)
dat <- ddply(data, .(var1), function(.) {
res <- cumsum(prop.table(table(factor(.$var2)))) # re-factor to use only used levels
res2 <- prop.table(table(factor(.$var2))) # re-factor to use only used levels
data.frame(lab = names(res), y = c(res), lab2 = c(res2))
})
ggplot(data, aes(x = var1)) + geom_bar(aes(fill = var2), position = 'fill') +
geom_text(aes(label = round(lab2, 2), x = var1, y = y), data = dat)
This places the labs at the end of each bar. If you want to have them slightly offset, you should play arround in the creation of dat.
Another way to get non-cumulative percentage plus centering the labels, for future reference:
dat <- ddply(data, .(var1), function(.) {
good <- prop.table(table(factor(.$var2)))
res <- cumsum(prop.table(table(factor(.$var2))))
data.frame(lab = names(res), y = c(res), good = good, pos = cumsum(good) - 0.5*good)
})
ggplot(data, aes(x = var1)) + geom_bar(aes(fill = var2), position = 'fill') +
geom_text(aes(label = round(good.Freq, 2), x = var1, y = pos.Freq), data = dat)
I used the following code and work well for me, give it a try.
geom_text(aes(label = paste(round(dat2$value,0), "%"),
vjust = ifelse(value >= 0, -0.05, 1.15)
),
size = 4, position = position_stack(vjust=0.5)
)
Basically, you need label = paste(y value, "%"). In my code, dat2 is the data file name; value is the Y value in the figure. In this case, I rounded up the number with 0 decimal.Good luck.
Does anyone know if there is a way to add variable labels to the ggparcoord function in GGally? I've tried numerous ways with geom_text, but nothing is yielding results.
To be more explicit, I am looking to pass the row.names(mtcars) through geom_text. The only way that I can distinguish the car is passing row.names(mtcars) through the groupColumn argument, but I don't like the way this looks.
Doesn't work:
mtcars$carName <- row.names(mtcars) # This becomes column 12
library(GGally)
# Attempt 1
ggparcoord(mtcars,
columns = c(12, 1, 6),
groupColumn = 1) +
geom_text(aes(label = carName))
# Attempt 2
ggparcoord(mtcars,
columns = c(12, 1, 6),
groupColumn = 1,
mapping = aes(label = carName))
Any ideas would be appreciated!
Solution 1: If you want to stick close to your original attempt, you can calculate the appropriate y coordinates for the car names, & add that as a separate data source. Use inherit.aes = FALSE so that this geom_text layer doesn't inherit anything from the ggplot object created using ggparcoord():
library(dplyr)
p1 <- ggparcoord(mtcars,
columns = c(12, 1, 6),
groupColumn = 1) +
geom_text(data = mtcars %>%
select(carName) %>%
mutate(x = 1,
y = scale(as.integer(factor(carName)))),
aes(x = x, y = y, label = carName),
hjust = 1.1,
inherit.aes = FALSE) +
# optional: remove "carName" from x-axis labels
scale_x_discrete(labels = function(x) c("", x[-1])) +
# also optional: hide legend, which doesn't really seem relevant here
theme(legend.position = "none")
p1
Solution 2: This alternative uses carName as the group column, & doesn't pass it as one of the parallel coordinate columns. (which I think this might be closer to the use cases intended by this function...) Specifying carName as the group column allows the car name values to be captured in the data slot of the ggplot object created by ggparcoord() this time, so our geom_text label can inherit it directly, & even filter only for rows corresponding to variable == "mpg" (or whatever the first of the parallel coordinate columns is named, in the actual use case). The y coordinates are not as evenly spread out as above, but geom_text_repel from the ggrepel package does a decent job at shifting overlapping text labels away from one another.
library(dplyr)
library(ggrepel)
p2 <- ggparcoord(mtcars,
columns = c(1, 6),
groupColumn = "carName") +
geom_text_repel(data = . %>%
filter(variable == "mpg"),
aes(x = variable, y = value, label = carName),
xlim = c(NA, 1)) + # limit repel region to the left of the 1st column
theme(legend.position = "none") # as before, hide legend since the labels
# are already in the plot
p2
Solution 3 / 4: You can actually plot the same with ggplot(), without relying on extensions that may do unexpected stuff behind the scenes:
library(dplyr)
library(tidyr)
library(ggrepel)
# similar output to solution 1
p3 <- mtcars %>%
select(carName, mpg, wt) %>%
mutate(carName.column = as.integer(factor(carName))) %>%
gather(variable, value, -carName) %>%
group_by(variable) %>%
mutate(value = scale(value)) %>%
ungroup() %>%
ggplot(aes(x = variable, y = value, label = carName, group = carName)) +
geom_line() +
geom_text(data = . %>% filter(variable == "carName.column"),
hjust = 1.1) +
scale_x_discrete(labels = function(x) c("", x[-1]))
p3
# similar output to solution 2
p4 <- mtcars %>%
select(carName, mpg, wt) %>%
gather(variable, value, -carName) %>%
group_by(variable) %>%
mutate(value = scale(value)) %>%
ungroup() %>%
ggplot(aes(x = variable, y = value, label = carName, group = carName)) +
geom_line() +
geom_text_repel(data = . %>% filter(variable == "mpg"),
xlim = c(NA, 1))
p4
Edit
You can add text labels on the right as well, for each of the above. Do note that the location for labels may not be nicely spaced out, since they are positioned according to wt's scaled values:
p1 +
geom_text(data = mtcars %>%
select(carName, wt) %>%
mutate(x = 3,
y = scale(wt)),
aes(x = x, y = y, label = carName),
hjust = -0.1,
inherit.aes = FALSE)
p2 +
geom_text_repel(data = . %>%
filter(variable == "wt"),
aes(x = variable, y = value, label = carName),
xlim = c(2, NA))
p3 +
geom_text(data = . %>% filter(variable == "wt"),
hjust = -0.1)
p4 +
geom_text_repel(data = . %>% filter(variable == "wt"),
xlim = c(2, NA))