ggplot Find sum of all groups and plot as line - r

I have a data that looks like this
Group x y
A 2 30
B 2 21
C 2 22
A 3 15
B 3 18
C 3 5
A 4 14
B 4 29
C 4 46
And create a chart with:
gg <- ggplot(mydata,
aes(x=x, y=y, fill=Group, group=Group))+
geom_line(data =mydata,
aes(x=x, y=y,colour=Group),
stat="identity",
size=1.5)
plot(gg)
I'm trying to add a fourth line that has the sum of A+B+C at every X. I've tried this but it adds 5 lines, not one with a sum. I want a line that would be y=73 when x=2, y=38 when x=3, and y=89 when x=4.
Code:
Group <- c("A", "B", "C","A", "B", "C","A", "B", "C")
x <- c(2,2,2,3,3,3,4,4,4)
y <- c(30,21,22,15,18,5,14,29,46)
mydata <- data.frame(Group,x,y)
gg <- ggplot(mydata,
aes(x=x, y=y, fill=Group, group=Group))+
geom_line(data =mydata,
aes(x=x, y=y,colour=Group),
stat="identity",
size=1.5)
plot(gg)

One way would be to generate a variable that sums all values of y by x via dplyr's group_by and mutate-functions. You can then generate your plot and add a second line geom that will show the x-specific sums.
library(tidyverse)
mydata %>%
group_by(x) %>%
mutate(sum.y = sum(y)) %>%
ggplot(aes(x=x, y=y, color=Group))+
geom_line(size=1.5) +
geom_line(aes(y = sum.y), color = "black")
Note that I changed your code by removing redundant code in the aesthetics, stat = "identity" in geom_line and all of the data = mydata specifications. These are simply not necessary.

Related

Plotting small multiples using for loop and ggplot

I have a dataset as follows:
Unit Group Feature1 Feature2 Feature3 Feature4
1 1 blue x a 12
2 1 yellow y b 15
3 2 green x a 13
4 3 indigo z c 12
5 1 green y b 16
I'd like to create a grid of visualizations (small multiples) where each row is a group, and each column contains proportions of each feature (ie. the table function, table(dataset$feature1)). I have done the following, however, am having a hard time creating a grid of these visualizations while using a for loop. Currently, I get four different images each with four charts. Any ideas on how to turn this into essentially a 4x4 grid of barcharts rather than 4 separate images?
library(gridExtra)
input_max_groups <- 4
for (i in 1:input_max_groups) {
dataset_subset <- subset(dataset, group== i)
feature1_df <- as.data.frame(table(dataset_subset$feature1)/nrow(dataset_subset)*100)
feature1_plot <- feature1_df %>%
ggplot(aes(x=Var1, y=Freq)) +
geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.4) +
xlab("") +
theme_bw()
feature2_df <- as.data.frame(table(dataset_subset$feature2)/nrow(dataset_subset)*100)
feature2_plot <- feature2_df %>%
ggplot(aes(x=Var1, y=Freq)) +
geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.4) +
xlab("") +
theme_bw()
feature3_df <- as.data.frame(table(dataset_subset$feature3)/nrow(dataset_subset)*100)
feature3_plot <- feature3_df %>%
ggplot(aes(x=Var1, y=Freq)) +
geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.4) +
xlab("") +
theme_bw()
feature4_df <- as.data.frame(table(dataset_subset$feature4)/nrow(dataset_subset)*100)
feature4_plot <- feature4_df %>%
ggplot(aes(x=Var1, y=Freq)) +
geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.4) +
xlab("") +
theme_bw()
plot <- grid.arrange(feature1_plot, feature2_plot, feature3_plot, feature4_plot, nrow=4)
}
Do you mean something like this?
You can accomplish small multiples by using facet_wrap() and you can plot percentages instead of counts by using stat = 'count' in geom_bar() and mapping the y-aesthetic to the special variable ..prop.. for proportion. In your case, you'll need to specify the group to get the proper proportion, and use scales = 'free_x' in the facet wrap to get the x-axis for each facet to contain only the variables of interest.
Now your data is wide and ggplot likes long data so you need to pivot the feature columns into rows to make your data longer using pivot_longer
This way you can leave out your loops and grid.arrange and do it in a single ggplot call.
library(tidyverse)
dataset_subset <- tribble(
~"Unit", ~"Group", ~"Feature1", ~"Feature2", ~"Feature3", ~"Feature4",
1, 1, "blue" , "x", "a", "12",
2, 1, "yellow", "y", "b", "15",
3, 2, "green" , "x", "a", "13",
4, 3, "indigo", "z", "c", "12",
5, 1, "green" , "y", "b", "16")
dataset_subset %>%
pivot_longer(contains("Feature")) %>%
ggplot(aes(x = value)) +
geom_bar(aes(y = ..prop.., group = name), stat = "count", fill = "#f68060", alpha =.6, width = .4) +
scale_y_continuous(labels = scales::percent) +
facet_wrap(~name, scales = "free_x")
Created on 2020-05-23 by the reprex package (v0.3.0)

Scatter plot with ggplot

I want to do a scatter (xy) plot of variables in a melted data frame as shown below.
df
class var mean
0 x 4.25
0 y 6.25
1 x 2.00
1 y 11.00
I have tried this, but it plots 4 points. How can plot x and y?
library(ggplot2)
ggplot(df, aes(x=mean, y=mean, group=var, colour=class)) +
geom_point( size=5, shape=21, fill="white")
As Heroka pointed out, you need the data to be in a more wide type format. If the data was read in like this, you may use the following to convert it.
## you don't need this since you already have df
text = "class var mean
0 x 4.25
0 y 6.25
1 x 2.00
1 y 11.00"
df = read.delim(textConnection(text),header=TRUE,strip.white=TRUE,
stringsAsFactors = FALSE, sep = " ");df2
## use this library to switch from long-wide
library(reshape2)
df2 = dcast(df, class ~ var, value.var = "mean")
library(ggplot2)
ggplot(df2, aes(x=x, y=y, colour=class)) +
geom_point( size=5, shape=21, fill="white")

ggplot: colouring areas between density lines according to relative position

I have this plot
set.seed(28100)
df <- data.frame(value = sample(1:10000,1000,replace=TRUE),
gender = sample(c("male","female"),1000,replace=TRUE))
ggplot(df, aes(value)) +
geom_density() +
geom_density(data=subset(df, gender=='male'), aes(value), colour="blue") +
geom_density(data=subset(df, gender=='female'), aes(value), colour="red")
I wonder if it's conceivable to fill the areas between the red and blue density lines with two colours: one colour when the blue line is above the red line and a different colour when the blue line is below.
There's no easy way to color in different overlapping regions unless you explicitly calculate the regions yourself. Here's a function that can help calculate regions where densities swap places
densitysplit <- function(val, grp, N=200, x=seq(min(val), max(val), length.out=N)) {
grp <- factor(grp)
den <- Map(function(z) {
dx<-density(val[grp==z])
approxfun(dx$x, dx$y)(x)
}, levels(grp))
maxcat <- apply(do.call("cbind",den), 1, which.max)
data.frame(x=x, ymin=do.call("pmin", den), ymax=do.call("pmax", den),
top = levels(grp)[maxcat],
group = cumsum(c(1,diff(maxcat)!=0))
)
}
For your data, you would do something like this
head(densitysplit(df$value, df$gender))
# x ymin ymax top group
# 1 8.00000 4.214081e-05 5.198326e-05 male 1
# 2 58.17085 4.485596e-05 5.433638e-05 male 1
# 3 108.34171 4.760983e-05 5.665547e-05 male 1
# 4 158.51256 5.039037e-05 5.893143e-05 male 1
# 5 208.68342 5.318724e-05 6.115595e-05 male 1
# 6 258.85427 5.598707e-05 6.332672e-05 male 1
This gives you the data you need to use geom_ribbon to plot the data. You can do
ggplot(df, aes(value)) +
geom_ribbon(data=densitysplit(df$value, df$gender), aes(x, ymin=ymin, ymax=ymax, fill=top, group=group)) +
geom_density() +
geom_density(data=subset(df, gender=='male'), aes(value), colour="blue") +
geom_density(data=subset(df, gender=='female'), aes(value), colour="red")
You can use fill and alpha to generate the (maybe) desired effect.
set.seed(28100)
df <- data.frame(value = sample(1:10000,1000,replace=TRUE),
gender = sample(c("male","female"),1000,replace=TRUE))
ggplot(df, aes(value, colour=gender, fill=gender, alpha=0.5)) +
geom_density() +theme(legend.position="none")
I hope this helps. Cheers

facet_wrap equal axis per panel

I would like to make a plot using facet_wrap where the axes can vary for each panel but within a panel the x and y axes should be the same scale.
e.g. see the following plots
df <- read.table(text = "
x y g
1 5 a
2 6 a
3 7 a
4 8 a
5 9 b
6 10 b
7 11 b
8 12 b", header = TRUE)
library(ggplot2)
ggplot(df, aes(x=x,y=y,g=g)) +
geom_point() +
facet_wrap(~ g) # all axes 1-12
ggplot(df, aes(x=x,y=y,g=g)) +
geom_point() +
facet_wrap(~ g, scales = "free")
# fee axes, y & y axes don't match per panel
What i want is for panel a the x and why axes both to be 1-8 and for panel b the x and y axes both to range from 5 - 12.
Is this possible?
Using this answer you could try the following:
dummy <- data.frame(x = c(1, 8, 5, 12), y = c(1, 8, 5, 12), g = c("a", "a", "b", "b"))
ggplot(df, aes(x=x,y=y)) +
geom_point() +
facet_wrap(~ g, scales = "free") +
geom_blank(data = dummy)
Another solution is trick the axes for individual facet_wrap() plots by adding invisible points to the plots with x and y reversed so that the plotted data is "square", e.g.,
library(ggplot2)
p <- ggplot(data = df) +
geom_point(mapping = aes(x = x, y = y)) +
geom_point(mapping = aes(x = y, y = x), alpha = 0) +
facet_wrap( ~ g, scales = "free")
print(p)
You could also use geom_blank(). You don't need dummy data.
This wasn't an option when the question was asked, but these days I would highly recommend patchwork for combining plots.

ggplot not adding legend. What am I missing? very new to R

I'm plotting three samples with ggplot but it's not adding a legend for the samples. It's not spitting out any error message so I'm not sure where I'm going wrong. I'd really appreicate some guidance.
I've tried to declare color for each sample for the legend manually but there is still no legend on the plot.
df<-data.frame(samples$V1, samples$V2, samples$V3, samples$V4, samples$V5, samples$V6, samples$V7)
CG_methplot <- ggplot(df, aes(x=samples$V1,))+
scale_x_continuous(breaks=number_ticks(10))+
xlab("bins")+
ylab("mean CG methylation")+
geom_point(aes(y=samples$V2), size=3, colour='#009933')+
geom_point(aes(y=samples$V3), size=3, colour='#FF0000')+
geom_point(aes(y=samples$V4), size=3, colour='#0033FF')+
scale_color_manual(values=c("samples1"="009933", "sample2"="FF0000", "sample3" ="0033FF"))
CG_methplot
As requested, sample data.
head(df)
samples.V1 samples.V2 samples.V3 samples.V4 samples.V5 samples.V6 samples.V7
1 1 0.033636 0.027857 0.028830 0.029836 0.024457 0.024930
2 2 0.032094 0.029620 0.028005 0.028294 0.026220 0.024105
3 3 0.032011 0.027212 0.029728 0.028211 0.023812 0.025828
4 4 0.030857 0.029833 0.028907 0.027057 0.026433 0.025007
5 5 0.028480 0.028080 0.028553 0.024680 0.024680 0.024653
6 6 0.029445 0.027099 0.029346 0.025645 0.023699 0.025446
library(reshape2)
melted <- melt(df, id.vars = "V1")
p <- ggplot(melted, aes(x = V1, y = value, colour = variable))
p + geom_point()

Resources