I want to create a side by side barplot using geom_bar() of this data frame,
> dfp1
value percent1 percent
1 (18,29] 0.20909091 0.4545455
2 (29,40] 0.23478261 0.5431034
3 (40,51] 0.15492958 0.3661972
4 (51,62] 0.10119048 0.1726190
5 (62,95] 0.05660377 0.1194969
With values on the x-axis and the percents as the side by side barplots. I have tried using this code,
p = ggplot(dfp1, aes(x = value, y= c(percent, percent1)), xlab="Age Group")
p = p + geom_bar(stat="identity", width=.5)
However, I get this error: Error: Aesthetics must either be length one, or the same length as the dataProblems:value. My percent and percent1 are the same length as value, so I am confused. Thanks for the help.
You will need to melt your data first over value. It will create another variable called value by default, so you will need to renames it (I called it percent). Then, plot the new data set using fill in order to separate the data into groups, and position = "dodge" in order put the bars side by side (instead of on top of each other)
library(reshape2)
library(ggplot2)
dfp1 <- melt(dfp1)
names(dfp1)[3] <- "percent"
ggplot(dfp1, aes(x = value, y= percent, fill = variable), xlab="Age Group") +
geom_bar(stat="identity", width=.5, position = "dodge")
Similar to David's answer, here is a tidyverse option using tidyr::pivot_longer to reshape the data before plotting:
library(tidyverse)
dfp1 %>%
pivot_longer(-value, names_to = "variable", values_to = "percent") %>%
ggplot(aes(x = value, y = percent, fill = variable), xlab="Age Group") +
geom_bar(stat = "identity", position = "dodge", width = 0.5)
Related
I'd like to use position dodge to offset one variable in my ggplot chart (in this case banana) and leave the other two variables (red_apple and green_apple) without an offset. Using position_dodge applies the offset to each variable, but I'd like to choose which variables are offset specifically.
library(ggplot2)
data <- data.frame(Place = c(rep('Place_A',30),rep('Place_B',30)),
variable = c(rep(c(rep('red_Apple',10),rep('green_Apple',10),rep('bananna',10)),2)),
value = rep(c(1:10,1:10-.05,1:10+.2),2))
dodge = position_dodge(.5)
ggplot(data, aes(Place, value)) +
geom_point(aes(color=variable),position=dodge)
Is there for example a way to scale position manually, like how you can do for other aesthetics?
This obviously throws an error, but is what I was hoping for...
ggplot(data, aes(Place, value)) +
geom_point(aes(color=variable, position = variable)) +
scale_position_manual(breaks = c('green_Apple','red_Apple','bananna'),
values = c(position_dodge(0),position_dodge(0),position_dodge(.5)))
Does this look like what you want?
data$grp = ifelse(data$variable == "bananna", 2, 1)
ggplot(data, aes(Place, value, group = grp)) +
geom_point(aes(color=variable), position = position_dodge(0.5))
I am trying to make some changes to my plot, but am having difficulty doing so.
(1) I would like warm, avg, and cold to be filled in as the colors red, yellow, and blue, respectively.
(2) I am trying to make the y-axis read "Count" and have it be horizontally written.
(3) In the legend, I would like the title to be Temperatures, rather than variable
Any help making these changes would be much appreciated along with other suggestions to make the plot look nicer.
df <- read.table(textConnection(
'Statistic Warm Avg Cold
Homers(Away) 1.151 1.028 .841
Homers(Home) 1.202 1.058 .949'), header = TRUE)
library(ggplot2)
library(reshape2)
df <- melt(df, id = 'Statistic')
ggplot(
data = df,
aes(
y = value,
x = Statistic,
group = variable,
shape = variable,
fill = variable
)
) +
geom_bar(stat = "identity")
You are on the right lines by trying to reshape the data into long format. My preference is to use gather from the tidyr package for that. You can also create the variable names Temperatures and Count in the gather step.
The next step is to turn the 3 classes of temperature into a factor, ordered from cold, through average, to warm.
Now you can plot. You want position = "dodge" to get the bars side by side, since it makes no sense to stack the values in a single bar. Fill colours you specify using scale_fill_manual.
You rotate the y-axis title by manipulating axis.title.y.
So putting all of that together (plus a black/white theme):
library(dplyr)
library(tidyr)
library(ggplot2)
df %>%
gather(Temperatures, Count, -Statistic) %>%
mutate(Temperatures = factor(Temperatures, c("Cold", "Avg", "Warm"))) %>%
ggplot(aes(Statistic, Count)) +
geom_col(aes(fill = Temperatures), position = "dodge") +
scale_fill_manual(values = c("blue", "yellow", "red")) +
theme_bw() +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5))
Result:
I'd question whether Count is a sensible variable name in this case.
You are almost there. To map specific colors to specific factor levels you can use scale_fill_manual and create your own scale:
scale_fill_manual(values=c("Warm"="red", "Avg"="yellow", "Cold"="blue")) +
Changing the y axis legend is also easy in ggplot:
ylab("Count") +
And to change the legend title you can use:
labs(fill='TEMPERATURE') +
Giving us:
ggplot(df, aes(y = value, x = Statistic, group= variable, fill = variable)) +
geom_bar(stat = "identity") +
scale_fill_manual(values=c("Warm"="red", "Avg"="yellow", "Cold"="blue")) +
labs(fill='TEMPERATURE') +
ylab("Count") +
xlab("") +
theme_bw() +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5))
I am making a dodged barplot in ggplot2 and one grouping has a zero count that I want to display. I remembered seeing this on HERE a while back and figured the scale_x_discrete(drop=F) would work. It does not appear to work with dodged bars. How can I make the zero counts show?
For instance, (code below) in the plot below, type8~group4 has no examples. I would still like the plot to display the empty space for the zero count instead of eliminating the bar. How can I do this?
mtcars2 <- data.frame(type=factor(mtcars$cyl),
group=factor(mtcars$gear))
m2 <- ggplot(mtcars2, aes(x=type , fill=group))
p2 <- m2 + geom_bar(colour="black", position="dodge") +
scale_x_discrete(drop=F)
p2
Here's how you can do it without making summary tables first.
It did not work in my CRAN versioin (2.2.1) but in the latest development version of ggplot (2.2.1.900) I had no issues.
ggplot(mtcars, aes(factor(cyl), fill = factor(vs))) +
geom_bar(position = position_dodge(preserve = "single"))
http://ggplot2.tidyverse.org/reference/position_dodge.html
Updated geom_bar() needs stat = "identity"
For what it's worth: The table of counts, dat, above contains NA. Sometimes, it is useful to have an explicit 0 instead; for instance, if the next step is to put counts above the bars. The following code does just that, although it's probably no simpler than Joran's. It involves two steps: get a crosstabulation of counts using dcast, then melt the table using melt, followed by ggplot() as usual.
library(ggplot2)
library(reshape2)
mtcars2 = data.frame(type=factor(mtcars$cyl), group=factor(mtcars$gear))
dat = dcast(mtcars2, type ~ group, fun.aggregate = length)
dat.melt = melt(dat, id.vars = "type", measure.vars = c("3", "4", "5"))
dat.melt
ggplot(dat.melt, aes(x = type,y = value, fill = variable)) +
geom_bar(stat = "identity", colour = "black", position = position_dodge(width = .8), width = 0.7) +
ylim(0, 14) +
geom_text(aes(label = value), position = position_dodge(width = .8), vjust = -0.5)
The only way I know of is to pre-compute the counts and add a dummy row:
dat <- rbind(ddply(mtcars2,.(type,group),summarise,count = length(group)),c(8,4,NA))
ggplot(dat,aes(x = type,y = count,fill = group)) +
geom_bar(colour = "black",position = "dodge",stat = "identity")
I thought that using stat_bin(drop = FALSE,geom = "bar",...) instead would work, but apparently it does not.
I asked this same question, but I only wanted to use data.table, as it's a faster solution for much larger data sets. I included notes on the data so that those that are less experienced and want to understand why I did what I did can do so easily. Here is how I manipulated the mtcars data set:
library(data.table)
library(scales)
library(ggplot2)
mtcars <- data.table(mtcars)
mtcars$Cylinders <- as.factor(mtcars$cyl) # Creates new column with data from cyl called Cylinders as a factor. This allows ggplot2 to automatically use the name "Cylinders" and recognize that it's a factor
mtcars$Gears <- as.factor(mtcars$gear) # Just like above, but with gears to Gears
setkey(mtcars, Cylinders, Gears) # Set key for 2 different columns
mtcars <- mtcars[CJ(unique(Cylinders), unique(Gears)), .N, allow.cartesian = TRUE] # Uses CJ to create a completed list of all unique combinations of Cylinders and Gears. Then counts how many of each combination there are and reports it in a column called "N"
And here is the call that produced the graph
ggplot(mtcars, aes(x=Cylinders, y = N, fill = Gears)) +
geom_bar(position="dodge", stat="identity") +
ylab("Count") + theme(legend.position="top") +
scale_x_discrete(drop = FALSE)
And it produces this graph:
Furthermore, if there is continuous data, like that in the diamonds data set (thanks to mnel):
library(data.table)
library(scales)
library(ggplot2)
diamonds <- data.table(diamonds) # I modified the diamonds data set in order to create gaps for illustrative purposes
setkey(diamonds, color, cut)
diamonds[J("E",c("Fair","Good")), carat := 0]
diamonds[J("G",c("Premium","Good","Fair")), carat := 0]
diamonds[J("J",c("Very Good","Fair")), carat := 0]
diamonds <- diamonds[carat != 0]
Then using CJ would work as well.
data <- data.table(diamonds)[,list(mean_carat = mean(carat)), keyby = c('cut', 'color')] # This step defines our data set as the combinations of cut and color that exist and their means. However, the problem with this is that it doesn't have all combinations possible
data <- data[CJ(unique(cut),unique(color))] # This functions exactly the same way as it did in the discrete example. It creates a complete list of all possible unique combinations of cut and color
ggplot(data, aes(color, mean_carat, fill=cut)) +
geom_bar(stat = "identity", position = "dodge") +
ylab("Mean Carat") + xlab("Color")
Giving us this graph:
Use count and complete from dplyr to do this.
library(tidyverse)
mtcars %>%
mutate(
type = as.factor(cyl),
group = as.factor(gear)
) %>%
count(type, group) %>%
complete(type, group, fill = list(n = 0)) %>%
ggplot(aes(x = type, y = n, fill = group)) +
geom_bar(colour = "black", position = "dodge", stat = "identity")
You can exploit the feature of the table() function, which computes the number of occurrences of a factor for all its levels
# load plyr package to use ddply
library(plyr)
# compute the counts using ddply, including zero occurrences for some factor levels
df <- ddply(mtcars2, .(group), summarise,
types = as.numeric(names(table(type))),
counts = as.numeric(table(type)))
# plot the results
ggplot(df, aes(x = types, y = counts, fill = group)) +
geom_bar(stat='identity',colour="black", position="dodge")
I'm hoping to use ggplot2 to generate a set of stacked bars in pairs, much like this:
With the following example data:
df <- expand.grid(name = c("oak","birch","cedar"),
sample = c("one","two"),
type = c("sapling","adult","dead"))
df$count <- sample(5:200, size = nrow(df), replace = T)
I would want the x-axis to represent the name of the tree, with two bars per tree species: one bar for sample one and one bar for sample two. Then the colors of each bar should be determined by type.
The following code generates the stacked bar with colors by type:
ggplot(df, aes(x = name, y = count, fill = type)) + geom_bar(stat = "identity")
And the following code generates the dodged bars by sample:
ggplot(df, aes(x = name, y = count, group = sample)) + geom_bar(stat = "identity", position = "dodge")
But I can't get it to dodge one of the groupings (sample) and stack the other grouping (type):
ggplot(df, aes(x = name, y = count, fill = type, group = sample)) + geom_bar(stat = "identity", position = "dodge")
One workaround would be to put interaction of sample and name on x axis and then adjust the labels for the x axis. Problem is that bars are not put close to each other.
ggplot(df, aes(x = as.numeric(interaction(sample,name)), y = count, fill = type)) +
geom_bar(stat = "identity",color="white") +
scale_x_continuous(breaks=c(1.5,3.5,5.5),labels=c("oak","birch","cedar"))
Another solution is to use facets for name and sample as x values.
ggplot(df,aes(x=sample,y=count,fill=type))+
geom_bar(stat = "identity",color="white")+
facet_wrap(~name,nrow=1)
I am making a dodged barplot in ggplot2 and one grouping has a zero count that I want to display. I remembered seeing this on HERE a while back and figured the scale_x_discrete(drop=F) would work. It does not appear to work with dodged bars. How can I make the zero counts show?
For instance, (code below) in the plot below, type8~group4 has no examples. I would still like the plot to display the empty space for the zero count instead of eliminating the bar. How can I do this?
mtcars2 <- data.frame(type=factor(mtcars$cyl),
group=factor(mtcars$gear))
m2 <- ggplot(mtcars2, aes(x=type , fill=group))
p2 <- m2 + geom_bar(colour="black", position="dodge") +
scale_x_discrete(drop=F)
p2
Here's how you can do it without making summary tables first.
It did not work in my CRAN versioin (2.2.1) but in the latest development version of ggplot (2.2.1.900) I had no issues.
ggplot(mtcars, aes(factor(cyl), fill = factor(vs))) +
geom_bar(position = position_dodge(preserve = "single"))
http://ggplot2.tidyverse.org/reference/position_dodge.html
Updated geom_bar() needs stat = "identity"
For what it's worth: The table of counts, dat, above contains NA. Sometimes, it is useful to have an explicit 0 instead; for instance, if the next step is to put counts above the bars. The following code does just that, although it's probably no simpler than Joran's. It involves two steps: get a crosstabulation of counts using dcast, then melt the table using melt, followed by ggplot() as usual.
library(ggplot2)
library(reshape2)
mtcars2 = data.frame(type=factor(mtcars$cyl), group=factor(mtcars$gear))
dat = dcast(mtcars2, type ~ group, fun.aggregate = length)
dat.melt = melt(dat, id.vars = "type", measure.vars = c("3", "4", "5"))
dat.melt
ggplot(dat.melt, aes(x = type,y = value, fill = variable)) +
geom_bar(stat = "identity", colour = "black", position = position_dodge(width = .8), width = 0.7) +
ylim(0, 14) +
geom_text(aes(label = value), position = position_dodge(width = .8), vjust = -0.5)
The only way I know of is to pre-compute the counts and add a dummy row:
dat <- rbind(ddply(mtcars2,.(type,group),summarise,count = length(group)),c(8,4,NA))
ggplot(dat,aes(x = type,y = count,fill = group)) +
geom_bar(colour = "black",position = "dodge",stat = "identity")
I thought that using stat_bin(drop = FALSE,geom = "bar",...) instead would work, but apparently it does not.
I asked this same question, but I only wanted to use data.table, as it's a faster solution for much larger data sets. I included notes on the data so that those that are less experienced and want to understand why I did what I did can do so easily. Here is how I manipulated the mtcars data set:
library(data.table)
library(scales)
library(ggplot2)
mtcars <- data.table(mtcars)
mtcars$Cylinders <- as.factor(mtcars$cyl) # Creates new column with data from cyl called Cylinders as a factor. This allows ggplot2 to automatically use the name "Cylinders" and recognize that it's a factor
mtcars$Gears <- as.factor(mtcars$gear) # Just like above, but with gears to Gears
setkey(mtcars, Cylinders, Gears) # Set key for 2 different columns
mtcars <- mtcars[CJ(unique(Cylinders), unique(Gears)), .N, allow.cartesian = TRUE] # Uses CJ to create a completed list of all unique combinations of Cylinders and Gears. Then counts how many of each combination there are and reports it in a column called "N"
And here is the call that produced the graph
ggplot(mtcars, aes(x=Cylinders, y = N, fill = Gears)) +
geom_bar(position="dodge", stat="identity") +
ylab("Count") + theme(legend.position="top") +
scale_x_discrete(drop = FALSE)
And it produces this graph:
Furthermore, if there is continuous data, like that in the diamonds data set (thanks to mnel):
library(data.table)
library(scales)
library(ggplot2)
diamonds <- data.table(diamonds) # I modified the diamonds data set in order to create gaps for illustrative purposes
setkey(diamonds, color, cut)
diamonds[J("E",c("Fair","Good")), carat := 0]
diamonds[J("G",c("Premium","Good","Fair")), carat := 0]
diamonds[J("J",c("Very Good","Fair")), carat := 0]
diamonds <- diamonds[carat != 0]
Then using CJ would work as well.
data <- data.table(diamonds)[,list(mean_carat = mean(carat)), keyby = c('cut', 'color')] # This step defines our data set as the combinations of cut and color that exist and their means. However, the problem with this is that it doesn't have all combinations possible
data <- data[CJ(unique(cut),unique(color))] # This functions exactly the same way as it did in the discrete example. It creates a complete list of all possible unique combinations of cut and color
ggplot(data, aes(color, mean_carat, fill=cut)) +
geom_bar(stat = "identity", position = "dodge") +
ylab("Mean Carat") + xlab("Color")
Giving us this graph:
Use count and complete from dplyr to do this.
library(tidyverse)
mtcars %>%
mutate(
type = as.factor(cyl),
group = as.factor(gear)
) %>%
count(type, group) %>%
complete(type, group, fill = list(n = 0)) %>%
ggplot(aes(x = type, y = n, fill = group)) +
geom_bar(colour = "black", position = "dodge", stat = "identity")
You can exploit the feature of the table() function, which computes the number of occurrences of a factor for all its levels
# load plyr package to use ddply
library(plyr)
# compute the counts using ddply, including zero occurrences for some factor levels
df <- ddply(mtcars2, .(group), summarise,
types = as.numeric(names(table(type))),
counts = as.numeric(table(type)))
# plot the results
ggplot(df, aes(x = types, y = counts, fill = group)) +
geom_bar(stat='identity',colour="black", position="dodge")