Barplot with ggplot2 using data from read.csv2 - r

I am trying to create a barplot with the ggplot2 library. My data is stored in read.csv2 format.
# Library
library(ggplot2)
library(tidyverse) # function "%>%"
# 1. Read data (comma separated)
data = read.csv2(text = "Age;Frequency
0 - 10;1
11 - 20;5
21 - 30;20
31 - 40;13
41 - 49;1")
# 2. Print table
df <- as.data.frame(data)
df
# 3. Plot bar chart
ggplot(df, aes(x = Age)) +
geom_bar() +
theme_classic()
The code runs fine, but it produces a graph that looks like all data are at max all the time.

You need to specify your y axis as well:
ggplot(df, aes(x = Age, y = Frequency)) +
geom_bar(stat = "identity") +
theme_classic()

The default value of geom_bar plots the frequency of the values which is 1 for all the Age values here (Check table(df$Age)). You may use geom_bar with stat = 'identity'
library(ggplot2)
ggplot(df, aes(Age, Frequency)) +
geom_bar(stat = 'identity') +
theme_classic()
OR geom_col :
ggplot(df, aes(Age, Frequency)) +
geom_col() +
theme_classic()

Related

ggplot reorder factors in plot without affecting legend order [duplicate]

I have produced a stacked percent barplot from the following data, which is in a csv file,
,ONE,TWO,THREE
1,2432,420,18
2,276,405,56
3,119,189,110
4,90,163,140
5,206,280,200
6,1389,1080,1075
7,3983,3258,4878
8,7123,15828,28111
9,8608,48721,52576
10,9639,44725,55951
11,8323,45695,32166
12,2496,18254,26600
13,1524,8591,18583
14,7861,1857,1680
15,10269,5165,4618
16,13560,64636,63262
using the following code
library(ggplot2)
library(reshape2)
library(scales)
data <- read.csv(file="file.csv",sep=",",header=TRUE)
data <- data[,2:ncol(data)]
datam <- melt(cbind(data,ind = sort(rownames(data))),is.var = c('ind'))
datam$ind <- as.numeric(datam$ind)
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") +xlab("Barcode")+ylab("Reads")
The result is
The problem is that the items in the legend are not in the same order as the stacks they represent. The colours and the numbers are right but the order is not. In other words, is there a way to invert the order of the items in the legend? Thanks
you can use a new option reverse = TRUE:
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") + xlab("Barcode")+ylab("Reads") +
guides(fill = guide_legend(reverse = TRUE))
Add + scale_fill_hue(breaks=c("new order 1","new order 2","new order...")) as in:
library(ggplot2)
ggplot(data=PlantGrowth, aes(x=group, fill=group)) + geom_bar() +
geom_bar(colour="black", legend=FALSE) +
scale_fill_hue(breaks=c("trt1","ctrl","trt2"))
I'd also check out http://wiki.stdout.org/rcookbook/Graphs/Legends%20(ggplot2)/ for more.
This may have changed and become easier with he new ggplot but I'm not sure.

Adding names to all X axis values using ggplot2 in R

The head of data frame is as follows:
Age number
21 4
22 4
23 5
24 6
25 11
26 10
I am trying to plot the frequency chart using ggplot using the following code
ggplot(data=x2, aes(x=Age, y=number)) +
geom_bar(stat="identity", fill="steelblue")+
geom_text(aes(label=number), vjust=-0.3, size=3.5)+
theme_minimal()+ labs(x = "Age", y = "Number of users")+
ggtitle("Frequency of Age")
and I get the output but not all the values on the X Axis are visible. I am sorry as this might be a very silly question but I am very new to R.
You can use scale_x_continuous to set the axis breaks. With such a large number of axis labels, this probably works better if the orientation is flipped. Even then, it's still quite crowded.
library(tidyverse)
# Fake data
set.seed(2)
x2 = data_frame(Age=sample(20:70, 1000, replace=TRUE)) %>%
group_by(Age) %>%
summarise(number=n())
ggplot(data=x2, aes(x=Age, y=number)) +
geom_bar(stat="identity", fill="steelblue")+
geom_text(aes(label=number, y=0.5*number), size=3, colour="white")+
theme_minimal() +
labs(x = "Age", y = "Number of users")+
ggtitle("Frequency of Age") +
coord_flip() +
scale_x_continuous(breaks=min(x2$Age):max(x2$Age), expand=c(0,0.1)) +
scale_y_continuous(expand=c(0,0.2))

ggplot: modify coord_cartesian() values dynamically

Is there a way to modify the plot coordinates using coord_cartesian() in a way that is dynamic based on the data in the ggplot call?
For example:
ggplot(cars, aes(x = speed, y = dist)) +
geom_point() +
coord_cartesian(xlim = c(min(.$speed), max(.$speed) + 10))
This doesn't work, nor does using .data$speed.
You can use the magrittr and dplyr pipe operator (%>%) to pass data into the ggplot call.
cars %>%
{ggplot(., aes(speed, dist)) +
geom_point() +
coord_cartesian(xlim = c(min(.$speed), max(.$speed) + 10))
}
Note that now you need to explicitly identify the data argument at the beginning of the ggplot call using the . pronoun.
It's a bit hacky but you can add a blank geom and add 10 to the y in the aesthetics.
ggplot(cars, aes(x = speed, y = dist)) +
geom_point() +
geom_blank(aes(y = dist + 10))

Scaling really large bar compared to other bars in ggplot2 in R

I am trying to make a plot in ggplot2 in R with the following code:
feature
[1] abs_deg_sum_1 NumAfterEdits_1 N_1 NumAfterEdits_3
[5] TimeSinceLastEdit_2 wt_product_1 NumAfterEdits_2 dwdt_1
52 Levels: abs_deg_diff_1 abs_deg_diff_2 abs_deg_diff_3 abs_deg_diff_4 ... Z_4
relative_importance
[1] 61.048212 17.235435 1.891542 1.409848 1.356924 1.264824 1.220593 1.184612
library(ggplot2)
df = data.frame(feature, relative_importance)
c <- ggplot(df, aes(x = feature, y = relative_importance, fill = feature)) + geom_bar(stat = "identity")
c + coord_flip()
positions <- c("abs_deg_sum_1", "NumAfterEdits_1", "N_1", "NumAfterEdits_3","TimeSinceLastEdit_2", "wt_product_1", "NumAfterEdits_2",
"dwdt_1")
c <- c + scale_x_discrete(limits = positions)
c + coord_flip()
Since the first value in relative_importance is really large compared to all other values, the plot doesn't show much about the other values. I get the following plot:
How can I change my code to capture more information in my plot? Especially about the smaller values
Here are several options, though I prefer the first or second (or maybe the third if you really want to go with a bar plot):
# Fake data
dat = data.frame(group=LETTERS[1:5], values=c(1.5,0.6,12.6,2.1,85))
# Value labels instead of bars, plus we add a horizontal segment to provide
# better visual guidance as to the relative values. This also requires
# some factor gymnastics to be able to get both the segments and the
# correct x-axis labels. I've left in the legend, but it's not necessary
# and can be removed if you wish.
ggplot(dat, aes(as.numeric(group), values, colour=group)) +
geom_segment(aes(x=as.numeric(group)-0.35, xend=as.numeric(group)+0.35,
yend=values), alpha=0.75) +
geom_text(aes(label=values), fontface="bold", show_guide=FALSE) +
scale_x_continuous(breaks=1:5, labels=levels(dat$group))
#scale_y_log10(limits=c(0.1,100), breaks=c(0.1, 0.3,1,3,10,30,100)) # For a log scale, if desired
#coord_flip() # Flip to horizontal orientation, if desired
# Value labels instead of bars
ggplot(dat, aes(group, values, colour=group)) +
geom_text(aes(label=values), fontface="bold")
# Bar plot with value labels added
ggplot(dat, aes(group, values, fill=group)) +
geom_bar(stat="identity") +
geom_text(aes(label=values, y=0.5*values), size=5, colour="black")
# Value labels instead of bars; log scale
ggplot(dat, aes(group, values, colour=group)) +
geom_text(aes(label=values)) +
scale_y_log10(limits=c(0.1,100), breaks=c(0.1,0.3,1,3,10,30,100)) +
coord_flip()
# Bar plot with log scale. Note that bar baseline is 1 instead of
# zero for a log scale, so this doesn't work so well.
ggplot(dat, aes(group, values, fill=group)) +
geom_bar(stat="identity") +
scale_y_log10(limits=c(0.1,100), breaks=c(0.1,0.3,1,3,10,30,100)) +
coord_flip()
# Points instead of bars; log scale
ggplot(dat, aes(group, values, fill=group)) +
geom_point(pch=21, size=4) +
scale_y_log10(limits=c(0.1,100), breaks=c(0.1,0.3,1,3,10,30,100)) +
coord_flip()
If the logarithmic axis doesn't work for you and if you have some flexibility in the plot format, you could divide the features into two groups based on the value of relative_importance and show each in it's own panel with appropriate y-scales. Code including adjustment of bar widths would look like:
library(ggplot2)
# assign rows to Large or Small group
cut_off_for_small_values <- 3
small_value_title <- "Expanded_Scale_for_Smaller_Values"
df <- data.frame(feature, relative_importance,
importance_grp = ifelse(relative_importance > cut_off_for_small_values,
"All", small_value_title))
# calculate relative bar widths
width_adj <- .8*nrow(df[df$importance_grp==small_value_title,])/nrow(df)
# plot data
c <- ggplot(df, aes(x = feature, y = relative_importance, fill = feature))
c <- c + geom_bar(data=transform(df, importance_grp="All"),
stat = "identity")
c <- c + geom_bar(data=df[df$importance_grp==small_value_title,],
stat = "identity", width=width_adj)
c <- c + geom_text(aes(x = feature, y = relative_importance,
label = format(relative_importance, digits=3), vjust=-.5))
c <- c + theme(axis.text.x = element_text(angle=90))
c <- c + facet_wrap( ~ importance_grp, scales="free" )
which gives plot

Using ggplot2 to plot geom_errorbar for Date in R

In many cases, we need to demonstrate the standard error. In ggplot2, we can do it using the geom_errorbar function. I find that when the x variable is of the Date type, ggplot2 could not plot the error bar completely. See the R script below for more information.
library(gcookbook) # For the data set
# Take a subset of the cabbage_exp data for this example
ce <- subset(cabbage_exp, Cultivar == "c39")
# With a line graph
p1 = ggplot(ce, aes(x=Date, y=Weight)) +
geom_line(aes(group=1)) +
geom_point(size=4) +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se), width=.2)
ce$Date = as.Date(c('01/01/2001', '01/01/2002', '01/01/2003'), "%m/%d/%Y")
p2 = ggplot(ce, aes(x=Date, y=Weight)) +
geom_line(aes(group=1)) +
geom_point(size=4) +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se), width=.2)
p1
p2
Simply following RHA's directions (code below). #RHA, please feel free to copy my answer into a new one as it's more yours then it's mine.
# install.packages("gcookbook", dependencies = TRUE)
library(gcookbook) # For the data set
# Take a subset of the cabbage_exp data for this example
ce <- subset(cabbage_exp, Cultivar == "c39")
# With a line graph
# install.packages("ggplot2", dependencies = TRUE)
require(ggplot2)
ce$Date = as.Date(c('01/01/2001', '01/01/2002', '01/01/2003'), "%m/%d/%Y")
(p2 = ggplot(ce, aes(x=Date, y=Weight)) +
geom_line(aes(group=1)) +
geom_point(size=4) +
geom_errorbar(aes(ymin = Weight- se, ymax= Weight + se), width=45)))

Resources