R A simple univariate plot with ggplot - r

I would like to do a simple graph like this:
ff<-data.frame(Freq=c(rep(10000,10),rep(100,15),rep(10,50),rep(1,100)))
plot(log(ff$Freq),type="l")
is the only option to add a x variable?
require(ggplot2)
ff$Ord <- 1:nrow(ff)
ggplot(data=ff,aes(x=Ord,y=log(Freq))) + geom_line()
thanks in advance

Here's one approach with geom_step():
library(ggplot2)
library(scales)
ggplot(ff, aes(x = seq_along(Freq), y = log10(Freq))) + geom_step(size = 1) +
labs(x = "Index", y = "Freq") +
scale_y_continuous(labels = math_format(10^.x))

Related

ggplot x axis for year

The following are my r code for the scatterplot.
library(tidyverse)
Pop <-c(24039274, 24854892, 25718048, 26624820, 27568436, 28543940, 29550662, 30590487, 31663896, 32771895)
Popu <- data.frame(Year = 2000:2009, lpop = log2(Pop))
ggplot(Popu, aes(Year, lpop)) +
geom_point(size=3, col = "steelblue") +
ylab("Log2 of Population")
My question is, why the x axis reflects 2000.0, 2002.5 and so on? How do I fix this one?
Thank you in advance.
Try this:
Popu <- data.frame(Year = factor(seq(2000,2009,1)), lpop = log2(Pop))
ggplot(Popu, aes(Year, lpop)) +
geom_point(size=3, col = "steelblue") +
ylab("Log2 of Population")

Only display label per category

I have the following dataset:
year <- as.factor(c(1999,2000,2001))
era <- c(0.4,0.6,0.7)
player_id <- as.factor(c(2,2,2))
df <- data.frame(year, era, player_id)
Using this data I created the following graph:
ggplot(data = df, aes(x = year, y=era, colour = player_id))+
geom_line() +
geom_text(aes(label = player_id), hjust=0.7)
Thing is however that I do now get a label at every datapoint. I only want to have a label at the end of each datapoint.
Any thoughts on what I should change to I get only one label?
If I understand correctly, you want label at end of data point. You could do this using directlabels library, as below:
library(ggplot2)
library(directlabels)
ggplot(data = df, aes(x = year, y=era, group = player_id, colour = player_id))+
geom_line() +
scale_colour_discrete(guide = 'none') +
scale_x_discrete(expand=c(0, 1)) +
geom_dl(aes(label = player_id), method = list(dl.combine("last.points"), cex = 0.8))
Output:
If I am understanding correctly what you want, then you can replace the geom_text(...) with geom_point()

ggplot can not group bars

Who can tell me why ggplot can't give me grouped bars?
ggplot(df, aes(x = factor(labels), y = srednia, dodge=factor(group))) +
labs(title = gen, size=3)+ ylab("Fold change")+ xlab("Linnia komórkowa") +
geom_bar(aes(fill=factor(group)),stat="identity",position ="dodge") +
geom_errorbar(aes(ymin=minus, ymax=plus))
Grouped bars I means something like this (paint art):
Thank you in advance!
I guess you can achieve this by changing the scale for the x axis. Here's a reproducible example and a possible solution.
# packages
require(plyr)
require(ggplot2)
# generate data
set.seed(123)
df <- data.frame(labels=LETTERS[1:6],
group=rep(1:3, each=2),
srednia=runif(6))
# limits for x axis
mylims <- head(unlist(dlply(df, .(group), function(x) c(levels(factor(x$labels)), "space"))), -1)
# additional space between groups
ggplot(df, aes(x = factor(labels), y = srednia, dodge=factor(group))) +
geom_bar(aes(fill=factor(group)),stat="identity") +
scale_x_discrete(limits=mylims, breaks=levels(factor(df$labels)))
# removing space within group
ggplot(df, aes(x = factor(labels), y = srednia, dodge=factor(group))) +
geom_bar(aes(fill=factor(group)),stat="identity", width=1) +
scale_x_discrete(limits=mylims, breaks=levels(factor(df$labels)))

cumulative plot using ggplot2

I'm learning to use ggplot2 and am looking for the smallest ggplot2 code that reproduces the base::plot result below. I've tried a few things and they all ended up being horrendously long, so I'm looking for the smallest expression and ideally would like to have the dates on the x-axis (which are not there in the plot below).
df = data.frame(date = c(20121201, 20121220, 20130101, 20130115, 20130201),
val = c(10, 5, 8, 20, 4))
plot(cumsum(rowsum(df$val, df$date)), type = "l")
Try this:
ggplot(df, aes(x=1:5, y=cumsum(val))) + geom_line() + geom_point()
Just remove geom_point() if you don't want it.
Edit: Since you require to plot the data as such with x labels are dates, you can plot with x=1:5 and use scale_x_discrete to set labels a new data.frame. Taking df:
ggplot(data = df, aes(x = 1:5, y = cumsum(val))) + geom_line() +
geom_point() + theme(axis.text.x = element_text(angle=90, hjust = 1)) +
scale_x_discrete(labels = df$date) + xlab("Date")
Since you say you'll have more than 1 val for "date", you can aggregate them first using plyr, for example.
require(plyr)
dd <- ddply(df, .(date), summarise, val = sum(val))
Then you can proceed with the same command by replacing x = 1:5 with x = seq_len(nrow(dd)).
After a couple of years, I've settled on doing:
ggplot(df, aes(as.Date(as.character(date), '%Y%m%d'), cumsum(val))) + geom_line()
Jan Boyer seems to have found a more concise solution to this problem in this question, which I have shortened a bit and combined with the answers of Prradep, so as to provide a (hopefully) up-to-date-answer:
ggplot(data = df,
aes(x=date)) +
geom_col(aes(y=value)) +
geom_line(aes(x = date, y = cumsum((value))/5, group = 1), inherit.aes = FALSE) +
ylab("Value") +
theme(axis.text.x = element_text(angle=90, hjust = 1))
Note that date is not in Date-Format, but character, and that value is already grouped as suggested by Prradep in his answer above.

Sort legend in ggplot2

I have produced a stacked percent barplot from the following data, which is in a csv file,
,ONE,TWO,THREE
1,2432,420,18
2,276,405,56
3,119,189,110
4,90,163,140
5,206,280,200
6,1389,1080,1075
7,3983,3258,4878
8,7123,15828,28111
9,8608,48721,52576
10,9639,44725,55951
11,8323,45695,32166
12,2496,18254,26600
13,1524,8591,18583
14,7861,1857,1680
15,10269,5165,4618
16,13560,64636,63262
using the following code
library(ggplot2)
library(reshape2)
library(scales)
data <- read.csv(file="file.csv",sep=",",header=TRUE)
data <- data[,2:ncol(data)]
datam <- melt(cbind(data,ind = sort(rownames(data))),is.var = c('ind'))
datam$ind <- as.numeric(datam$ind)
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") +xlab("Barcode")+ylab("Reads")
The result is
The problem is that the items in the legend are not in the same order as the stacks they represent. The colours and the numbers are right but the order is not. In other words, is there a way to invert the order of the items in the legend? Thanks
you can use a new option reverse = TRUE:
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") + xlab("Barcode")+ylab("Reads") +
guides(fill = guide_legend(reverse = TRUE))
Add + scale_fill_hue(breaks=c("new order 1","new order 2","new order...")) as in:
library(ggplot2)
ggplot(data=PlantGrowth, aes(x=group, fill=group)) + geom_bar() +
geom_bar(colour="black", legend=FALSE) +
scale_fill_hue(breaks=c("trt1","ctrl","trt2"))
I'd also check out http://wiki.stdout.org/rcookbook/Graphs/Legends%20(ggplot2)/ for more.
This may have changed and become easier with he new ggplot but I'm not sure.

Resources