How can I make the x-axis display the text in the "xaxisTitles" vector?
Here is my code you can run:
require(ggplot2)
require(reshape)
xaxisTitles<- cbind("a","b","c","d","e","f","g","h","j","k")
df <- data.frame(time = 1:10,
a = cumsum(rnorm(10)),
b = cumsum(rnorm(10)),
c = cumsum(rnorm(10)))
df <- melt(df , id = 'time', variable_name = 'series')
# plot on same grid, each series colored differently --
# good if the series have same scale
ggplot(df, aes(time,value)) + geom_line(aes(colour = series))+ theme(axis.text.x = xaxisTitles)
I am getting the error:
Error in (function (el, elname) :
Element axis.text.x must be a element_text object.
The reason you are getting the error is that theme(...) is used to set the appearance of the axis text (e.g., color, font family, font face, size, orientation, etc.), but not the values of the text. To do that, as #SteveReno points out, you have to use scale_x_discrete(...).
require(ggplot2)
require(reshape)
set.seed(321)
# xaxisTitles<- cbind("a","b","c","d","e","f","g","h","j","k")
xaxisTItles<- letters[1:10] # easier way to do this...
df <- data.frame(time = 1:10,
a = cumsum(rnorm(10)),
b = cumsum(rnorm(10)),
c = cumsum(rnorm(10)))
df <- melt(df , id = 'time', variable_name = 'series')
# plot on same grid, each series colored differently --
# good if the series have same scale
ggplot(df, aes(time,value)) +
geom_line(aes(colour = series))+
scale_x_discrete(labels=xaxisTitles)+
theme(axis.text.x=element_text(face="bold",colour="red",size=14))
The best way to do this is to make the time variable a factor rather than a numeric vector, as long as you remember to adjust the group aesthetic:
df$time = factor(xaxisTitles[df$time])
ggplot(df, aes(time, value)) + geom_line(aes(colour = series, group=series))
(If you don't add the group=series argument, it won't know that you want to connect lines across the factor on the x axis).
You can just use scale_x_discrete to set the labels.
ggplot(df, aes(time,value)) + geom_line(aes(colour = series))+ scale_x_discrete(labels= xaxisTitles)
Here's some more helpful info http://docs.ggplot2.org/0.9.3.1/scale_discrete.html
Related
I have a csv file which looks like the following:
Name,Count1,Count2,Count3
application_name1,x1,x2,x3
application_name2,x4,x5,x6
The x variables represent numbers and the applications_name variables represent names of different applications.
Now I would like to make a barplot for each row by using ggplot2. The barplot should have the application_name as title. The x axis should show Count1, Count2, Count3 and the y axis should show the corresponding values (x1, x2, x3).
I would like to have a single barplot for each row, because I have to store the different plots in different files. So I guess I cannot use "melt".
I would like to have something like:
for each row in rows {
print barplot in file
}
Thanks for your help.
You can use melt to rearrange your data and then use either facet_wrap or facet_grid to get a separate plot for each application name
library(ggplot2)
library(reshape2)
# example data
mydf <- data.frame(name = paste0("name",1:4), replicate(5,rpois(4,30)))
names(mydf)[2:6] <- paste0("count",1:5)
# rearrange data
m <- melt(mydf)
# if you are wanting to export each plot separately
# I used facet_wrap as a quick way to add the application name as a plot title
for(i in levels(m$name)) {
p <- ggplot(subset(m, name==i), aes(variable, value, fill = variable)) +
facet_wrap(~ name) +
geom_bar(stat="identity", show_guide=FALSE)
ggsave(paste0("figure_",i,".pdf"), p)
}
# or all plots in one window
ggplot(m, aes(variable, value, fill = variable)) +
facet_wrap(~ name) +
geom_bar(stat="identity", show_guide=FALSE)
I didn't see #user20650's nice answer before preparing this. It's almost identical, except that I use plyr::d_ply to save things instead of a loop. I believe dplyr::do() is another good option (you'd group_by(Name) first).
yourData <- data.frame(Name = sample(letters, 10),
Count1 = rpois(10, 20),
Count2 = rpois(10, 10),
Count3 = rpois(10, 8))
library(reshape2)
yourMelt <- melt(yourData, id.vars = "Name")
library(ggplot2)
# Test a function on one piece to develope graph
ggplot(subset(yourMelt, Name == "a"), aes(x = variable, y = value)) +
geom_bar(stat = "identity") +
labs(title = subset(yourMelt, Name == 'a')$Name)
# Wrap it up, with saving to file
bp <- function(dat) {
myPlot <- ggplot(dat, aes(x = variable, y = value)) +
geom_bar(stat = "identity") +
labs(title = dat$Name)
ggsave(filname = paste0("path/to/save/", dat$Name, "_plot.pdf"),
myPlot)
}
library(plyr)
d_ply(yourMelt, .variables = "Name", .fun = bp)
How do I show the specific values of variables on a graph?
For example:
ggplot(data=df)+
geom_bar(mapping=aes(x=var))
How do I get it to have the actual count on the bar chart?
I believe this question has asked before but I couldn' find a duplicate quickly.
Here is an example how to annotate the columns of a bar chart with the counts:
n_row <- 100L
set.seed(123L)
df <- data.frame(var = sample(LETTERS[1:5], n_row, TRUE, 5:1))
library(ggplot2)
ggplot(data = df) + aes(x = var) +
geom_bar() +
stat_count(geom = "text", aes(label = ..count..), vjust = "bottom")
Alternatively, we can write
ggplot(data = df) + aes(x = var, label = ..count..) +
geom_bar() +
geom_text(stat = "count", vjust = "bottom")
Some geoms and stats do compute variables which can be accessed using special names like ..count... To plot labels, the x and y positions and the text need to be specified. The x position is taken from the date as specified in aes(). The y position seems to be taken automatically from the statistical transformation but the text needs to be specified explicitely.
Suggested reading:
Statistical transformations in R for Data Science
ggplot2 homepage
I'm trying to plot a geom_histogram where the bars are colored by a gradient.
This is what I'm trying to do:
library(ggplot2)
set.seed(1)
df <- data.frame(id=paste("ID",1:1000,sep="."),val=rnorm(1000),stringsAsFactors=F)
ggplot(df,aes_string(x="val",y="..count..+1",fill="val"))+geom_histogram(binwidth=1,pad=TRUE)+scale_y_log10()+scale_fill_gradient2("val",low="darkblue",high="darkred")
But getting:
Any idea how to get it colored by the defined gradient?
Not sure you can fill by val because each bar of the histogram represents a collection of points.
You can, however, fill by categorical bins using cut. For example:
ggplot(df, aes(val, fill = cut(val, 100))) +
geom_histogram(show.legend = FALSE)
Just for completeness.
If the colors I'd like to have the gradient on to be manually selected here's what I suggest:
data:
library(ggplot2)
set.seed(1)
df <- data.frame(id=paste("ID",1:1000,sep="."),val=rnorm(1000),stringsAsFactors=F)
colors:
bins <- 10
cols <- c("darkblue","darkred")
colGradient <- colorRampPalette(cols)
cut.cols <- colGradient(bins)
cuts <- cut(df$val,bins)
names(cuts) <- sapply(cuts,function(t) cut.cols[which(as.character(t) == levels(cuts))])
plot:
ggplot(df,aes(val,fill=cut(val,bins))) +
geom_histogram(show.legend=FALSE) +
scale_color_manual(values=cut.cols,labels=levels(cuts)) +
scale_fill_manual(values=cut.cols,labels=levels(cuts))
Instead of binning manually another option would be to make use of the bins computed by stat_bin by mapping ..x.. (or factor(..x..) in case of a discrete scale) or after_stat(x) on the fill aesthetic.
An issue with computing the bins manually is that we end up with multiple groups per bin for which the count has to be computed (even if the count is zero most of the time) and which get stacked on top of each other in the histogram. Especially, this gets problematic if one would add labels of counts to the histogram as can be seen in this post, because in that case one ends up with multiple labels per bin.
library(ggplot2)
set.seed(1)
df <- data.frame(id = paste("ID", 1:1000, sep = "."), val = rnorm(1000), stringsAsFactors = F)
ggplot(df, aes(x = val, y = ..count.. + 1, fill = ..x..)) +
geom_histogram(binwidth = .1, pad = TRUE) +
scale_y_log10() +
scale_fill_gradient2(name = "val", low = "darkblue", high = "darkred")
#> Warning: Duplicated aesthetics after name standardisation: pad
I am trying to generate a (grouped) density plot labelled with sample sizes.
Sample data:
set.seed(100)
df <- data.frame(ab.class = c(rep("A", 200), rep("B", 200)),
val = c(rnorm(200, 0, 1), rnorm(200, 1, 1)))
The unlabelled density plot is generated and looks as follows:
ggplot(df, aes(x = val, group = ab.class)) +
geom_density(aes(fill = ab.class), alpha = 0.4)
What I want to do is add text labels somewhere near the peak of each density, showing the number of samples in each group. However, I cannot find the right combination of options to summarise the data in this way.
I tried to adapt the code suggested in this answer to a similar question on boxplots: https://stackoverflow.com/a/15720769/1836013
n_fun <- function(x){
return(data.frame(y = max(x), label = paste0("n = ",length(x))))
}
ggplot(df, aes(x = val, group = ab.class)) +
geom_density(aes(fill = ab.class), alpha = 0.4) +
stat_summary(geom = "text", fun.data = n_fun)
However, this fails with Error: stat_summary requires the following missing aesthetics: y.
I also tried adding y = ..density.. within aes() for each of the geom_density() and stat_summary() layers, and in the ggplot() object itself... none of which solved the problem.
I know this could be achieved by manually adding labels for each group, but I was hoping for a solution that generalises, and e.g. allows the label colour to be set via aes() to match the densities.
Where am I going wrong?
The y in the return of fun.data is not the aes. stat_summary complains that he cannot find y, which should be specificed in global settings at ggplot(df, aes(x = val, group = ab.class, y = or stat_summary(aes(y = if global setting of y is not available. The fun.data compute where to display point/text/... at each x based on y given in the data through aes. (I am not sure whether I have made this clear. Not a native English speaker).
Even if you have specified y through aes, you won't get desired results because stat_summary compute a y at each x.
However, you can add text to desired positions by geom_text or annotate:
# save the plot as p
p <- ggplot(df, aes(x = val, group = ab.class)) +
geom_density(aes(fill = ab.class), alpha = 0.4)
# build the data displayed on the plot.
p.data <- ggplot_build(p)$data[[1]]
# Note that column 'scaled' is used for plotting
# so we extract the max density row for each group
p.text <- lapply(split(p.data, f = p.data$group), function(df){
df[which.max(df$scaled), ]
})
p.text <- do.call(rbind, p.text) # we can also get p.text with dplyr.
# now add the text layer to the plot
p + annotate('text', x = p.text$x, y = p.text$y,
label = sprintf('n = %d', p.text$n), vjust = 0)
This question already has answers here:
Plotting two variables as lines using ggplot2 on the same graph
(5 answers)
Closed 4 years ago.
The solution with ggplot in this question worked really well for my data. However, I am trying to add a legend and everything that I tried does not work...
For example, in the ggplot example in the above question, how I can add a legend to show that the red curve is related to "Ocean" and the green curve is related to "Soil"? Yes, I want to add text that I will define and it is not related to any other variable in my data.frame.
The example below is some of my own data...
Rate Probability Stats
1.0e-04 1e-04 891.15
1.0e-05 1e-04 690
...
etc (it's about 400 rows). And I have two data frames similar to the above one.
So My code is
g <- ggplot(Master1MY, aes(Probability))
g <- g + geom_point(aes(y=Master1MY$Stats), colour="red", size=1)
g <- g + geom_point(aes(y=Transposon1MY$Stats), colour="blue", size=1)
g + labs(title= "10,000bp and 1MY", x = "Probability", y = "Stats")
The plot looks like
I just want a red and blue legend saying "Master" and "Transposon"
Thanks!
In ggplot it is generally most convenient to keep the data in a 'long' format. Here I use the function melt from the reshape2 package to convert your data from wide to long format. Depending how you specify different aesthetics (size, shape, colour et c), corresponding legends will appear.
library(ggplot2)
library(reshape2)
# data from the example you were referring to, in a 'wide' format.
x <- seq(-2, 2, 0.05)
ocean <- pnorm(x)
soil <- pnorm(x, 1, 1)
df <- data.frame(x, ocean, soil)
# melt the data to a long format
df2 <- melt(data = df, id.vars = "x")
# plot, using the aesthetics argument 'colour'
ggplot(data = df2, aes(x = x, y = value, colour = variable)) + geom_line()
Edit, set name and labels of legend
# Manually set name of the colour scale and labels for the different colours
ggplot(data = df2, aes(x = x, y = value, colour = variable)) +
geom_line() +
scale_colour_discrete(name = "Type of sample", labels = c("Sea water", "Soil"))
Edit2, following new sample data
Convert your data, assuming its organization from your update, to a long format. Again, I believe you make your ggplot life easier if you keep your data in a long format. I relate every step with the simple example data which I used in my first answer. Please note that there are many alternative ways to rearrange your data. This is one way, based on the small (non-reproducible) parts of your data you provided in the update.
# x <- seq(-2, 2, 0.05)
# Master1MY$Probability
Probability <- 1:100
# ocean <- pnorm(x)
# Master1MY$Stats
Master1MY <- rnorm(100, mean = 600, sd = 20)
# soil <- pnorm(x,1,1)
# Transposon1MY$Stats
Transposon1MY <- rnorm(100, mean = 100, sd = 10)
# df <- data.frame(x, ocean, soil)
df <- data.frame(Probability, Master1MY, Transposon1MY)
# df2 <- melt(df, id.var = "x")
df2 <- melt(df, id.var = "Probability")
# default
ggplot(data = df2, aes(x = Probability, y = value, col = variable)) +
geom_point()
# change legend name and labels, see previous edit using 'scale_colour_discrete'
# set manual colours scale using 'scale_colour_manual'.
ggplot(data = df2, aes(x = Probability, y = value, col = variable)) +
geom_point() +
scale_colour_manual(values = c("red","blue"), name = "Type of sample", labels = c("Master", "Transposon"))