Add geom_vline in function with multiple density plots - r

I have the following
densityPlots <- lapply(numericCols, function(var_x){
p <- ggplot(df, aes_string(var_x)) + geom_density()
})
numericCols are the names of the columns that are numeric. I want to add the mean line, I have tried multiple things such as
densityPlots <- lapply(numericCols, function(var_x){
p <- ggplot(df, aes_string(var_x)) + geom_density() + geom_vline(aes(xintercept=mean(var_x)),
color="red", linetype="dashed", size=1)
})
The data
str(df)
tibble [9 × 4] (S3: tbl_df/tbl/data.frame)
$ A: num [1:9] 12 NA 34 45 56 67 78 89 100
$ B: num [1:9] 1 2 3 NA 5 6 7 8 9
$ C: num [1:9] 83 55 27 27 7 3 5 8 9
$ D: num [1:9] 6 2 NA 1 NA 3 4 5 6
dput(df)
structure(list(A = c(12, NA, 34, 45, 56, 67, 78, 89, 100), B = c(1,
2, 3, NA, 5, 6, 7, 8, 9), C = c(83, 55, 27, 27, 7, 3, 5, 8, 9
), D = c(6, 2, NA, 1, NA, 3, 4, 5, 6)), row.names = c(NA, -9L
), class = c("tbl_df", "tbl", "data.frame"))
print(numericCols)
[1] "A" "B" "C"
But it does not work, it just ignores the geom_vline function. Does someone have a suggestion? Thanks :)!

You should use mean(df[, var_x], na.rm=T) in geom_vline:
library(ggplot2)
df <- structure(list(A = c(12, NA, 34, 45, 56, 67, 78, 89, 100), B = c(1,
2, 3, NA, 5, 6, 7, 8, 9), C = c(83, 55, 27, 27, 7, 3, 5, 8, 9
), D = c(6, 2, NA, 1, NA, 3, 4, 5, 6)), row.names = c(NA, -9L
), class = c("tbl_df", "tbl", "data.frame"))
numericCols <- c("A","B","C")
df <- as.data.frame(df)
densityPlots <- lapply(numericCols, function(var_x) {
ggplot(df, aes_string(var_x)) + geom_density() +
geom_vline(aes(xintercept=mean(df[, var_x], na.rm=T)),
color="red", linetype="dashed", size=1)
})
gridExtra::grid.arrange(grobs=densityPlots)

Here is an approach somewhat different than what you tried in your question, but uses dplyr and tidyr to pivot the data and use ggplot mapping. Unfortunately, geom_vline doesn't summarize by group, so you have to pre-compute the values:
set.seed(3)
data <- data.frame(Category = paste0("Catagory",LETTERS[1:20]),
lapply(LETTERS[1:10],function(x){setNames(data.frame(runif(20,10,100)),x)}))
numericCols <- LETTERS[1:10]
library(dplyr)
library(tidyr)
library(ggplot2)
data.means <- data %>%
select(numericCols) %>%
pivot_longer(everything(), names_to = "Variable", values_to = "var_x") %>%
group_by(Variable) %>%
summarize(Mean = mean(var_x))
data %>%
select(numericCols) %>%
pivot_longer(everything(), names_to = "Variable", values_to = "var_x") %>%
ggplot(aes(x = var_x, color = Variable)) +
geom_density() +
geom_vline(data = data.means, aes(xintercept=Mean, color = Variable),
linetype="dashed", size=1)
Or you could combine with facet_wrap for multiple plots.
data %>%
select(numericCols) %>%
pivot_longer(everything(), names_to = "Variable", values_to = "var_x") %>%
ggplot(aes(x = var_x)) +
facet_wrap(.~Variable) +
geom_density() +
geom_vline(data = data.means, aes(xintercept=Mean, color = Variable),
linetype="dashed", size=1)

Related

How to make a bar-chart by using two variables on x-axis and a grouped variable on y-axis?

I hope I asked my question in the right way this time! If not let me know!
I want to code a grouped bar-chart similary to this one (I just created in paint):
enter image description here
I created as flipped both it actually doesn't matter if its flipped or not. So, a plot similarly to this will also be very usefull:
Grouped barchart in r with 4 variables
Both the variables, happy and lifesatisfied are scaled values from 0 to 10. Working hours is a grouped value and contains 43+, 37-42, 33-36, 27-32, and <27.
A very similar example of how my data set looks like (I just changed the values and order, I also have much more observations):
Working hours
happy
lifestatisfied
contry
37-42
7
9
DK
<27
8
8
SE
43+
7
8
DK
33-36
6
6
SE
37-42
7
5
NO
<27
4
7
NO
I tried to found similar examples and based on that tried to code the bar chart in the following way but it doesn't work:
df2 <- datafilteredwomen %>%
pivot_longer(cols = c("happy", "stflife"), names_to = "var", values_to = "Percentage")
ggplot(df2) +
geom_bar(aes(x = Percentage, y = workinghours, fill = var ), stat = "identity", position = "dodge") + theme_minimal()
It give this plot which is not correct/what I want:
enter image description here
seocnd try:
forplot = datafilteredwomen %>% group_by(workinghours, happy, stflife) %>% summarise(count = n()) %>% mutate(proportion = count/sum(count))
ggplot(forplot, aes(workinghours, proportion, fill = as.factor(happy))) +
geom_bar(position = "dodge", stat = "identity", color = "black")
gives this plot:
enter image description here
third try - used the ggplot2 builder add-in:
library(dplyr)
library(ggplot2)
datafilteredwomen %>%
filter(!is.na(workinghours)) %>%
ggplot() +
aes(x = workinghours, group = happy, weight = happy) +
geom_bar(position = "dodge",
fill = "#112446") +
theme_classic() + scale_y_continuous(labels = scales::percent)
gives this plot:
enter image description here
But none of my tries are what I want.. really hope that someone can help me if it's possible!
After speaking to the OP I found his data source and came up with this solution. Apologies if it's a bit messy, I have only been using R for 6 months. For ease of reproducibility I have preselected the variables used from the original dataset.
data <- structure(list(wkhtot = c(40, 8, 50, 40, 40, 50, 39, 48, 45,
16, 45, 45, 52, 45, 50, 37, 50, 7, 37, 36), happy = c(7, 8, 10,
10, 7, 7, 7, 6, 8, 10, 8, 10, 9, 6, 9, 9, 8, 8, 9, 7), stflife = c(8,
8, 10, 10, 7, 7, 8, 6, 8, 10, 9, 10, 9, 5, 9, 9, 8, 8, 7, 7)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
Here are the packages required.
require(dplyr)
require(ggplot2)
require(tidyverse)
Here I have manipulated the data and commented my reasoning.
data <- data %>%
select(wkhtot, happy, stflife) %>% #Select the wanted variables
rename(Happy = happy) %>% #Rename for graphical sake
rename("Life Satisfied" = stflife) %>%
na.omit() %>% # remove NA values
group_by(WorkingHours = cut(wkhtot, c(-Inf, 27, 32,36,42,Inf))) %>% #Create the ranges
select(WorkingHours, Happy, "Life Satisfied") %>% #Select the variables again
pivot_longer(cols = c(`Happy`, `Life Satisfied`), names_to = "Criterion", values_to = "score") %>% # pivot the df longer for plotting
group_by(WorkingHours, Criterion)
data$Criterion <- as.factor(data$Criterion) #Make criterion a factor for graphical reasons
A bit more data prep
# Creating the percentage
data.plot <- data %>%
group_by(WorkingHours, Criterion) %>%
summarise_all(sum) %>% # get the sums for score by working hours and criterion
group_by(WorkingHours) %>%
mutate(tot = sum(score)) %>%
mutate(freq =round(score/tot *100, digits = 2)) # get percentage
Creating the plot.
# Plotting
ggplot(data.plot, aes(x = WorkingHours, y = freq, fill = Criterion)) +
geom_col(position = "dodge") +
geom_text(aes(label = freq),
position = position_dodge(width = 0.9),
vjust = 1) +
xlab("Working Hours") +
ylab("Percentage")
Please let me know if there is a more concise or easier way!!
B
DataSource: https://www.europeansocialsurvey.org/downloadwizard/?fbclid=IwAR2aVr3kuqOoy4mqa978yEM1sPEzOaghzCrLCHcsc5gmYkdAyYvGPJMdRp4
Taking this example dataframe df:
df <- structure(list(Working.hours = c("37-42", "37-42", "<27", "<27",
"43+", "43+", "33-36", "33-36", "37-42", "37-42", "<27", "<27"
), country = c("DK", "DK", "SE", "SE", "DK", "DK", "SE", "SE",
"NO", "NO", "NO", "NO"), criterion = c("happy", "lifesatisfied",
"happy", "lifesatisfied", "happy", "lifesatisfied", "happy",
"lifesatisfied", "happy", "lifesatisfied", "happy", "lifesatisfied"
), score = c(7L, 9L, 8L, 8L, 7L, 8L, 6L, 6L, 7L, 5L, 4L, 7L)), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))
you can proceed like this:
library(dplyr)
library(ggplot2)
df <-
df %>%
pivot_longer(cols = c(happy, lifesatisfied),
names_to = 'criterion',
values_to = 'score'
)
df %>%
ggplot(aes(x = Working.hours,
y = score,
fill = criterion)) +
geom_col(position = 'dodge') +
coord_flip()
For picking colours see ?scale_fill_manual, for formatting legend etc. numerous existing answers to related questions on stackoverflow.

Heatmap in R with raw values

I have this dataframe:
df <- data.frame(PatientID = c("3454","345","5","348","567","79"),
clas1 = c(1, 0, 5, NA, NA, 4),
clas2 = c(4, 1, 0, 3, 1, 0),
clas3 = c(1, NA, 0, 5, 5, 5), stringsAsFactors = F)
I would like to create a heatmap, with patient ID in the x axis and clas1, clas2 and clas3 in the y axis. The values represented in the heat map would be the raw value of each "clas". Here I post a drawing of what I would like
I apologise because I don't have available more colours to represent this, but this is only an example and any colour scale could be used.
An important thing is that I would like to distinguish between zeros and NAs so ideally NAs have their own colour or appear in white (empty).
I hope this is understandable enough.
But any questions just ask
Many thanks!
df <- data.frame(PatientID = c("3454","345","5","348","567","79"),
clas1 = c(1, 0, 5, NA, NA, 4),
clas2 = c(4, 1, 0, 3, 1, 0),
clas3 = c(1, NA, 0, 5, 5, 5), stringsAsFactors = F)
library(tidyverse)
df %>% pivot_longer(!PatientID) %>%
ggplot(aes(x= PatientID, y = name, fill = value)) +
geom_tile()
Created on 2021-05-25 by the reprex package (v2.0.0)
Here is a base R option with ``heatmap`
heatmap(t(`row.names<-`(as.matrix(df[-1]), df$PatientID)))
# Which is like
# x <- as.matrix(df[-1]
# row.names(x) <- df$PatientID
# heatmap(t(x))
Preparing the data
I'll give 4 options, in all four you need to assign the rownames and remove the id column. I.e.:
df <- data.frame(PatientID = c("3454","345","5","348","567","79"),
clas1 = c(1, 0, 5, NA, NA, 4),
clas2 = c(4, 1, 0, 3, 1, 0),
clas3 = c(1, NA, 0, 5, 5, 5), stringsAsFactors = F)
rownames(df) <- df$PatientID
df$PatientID <- NULL
df
The output is:
> df
clas1 clas2 clas3
3454 1 4 1
345 0 1 NA
5 5 0 0
348 NA 3 5
567 NA 1 5
79 4 0 5
Base R
With base R (decent output):
heatmap(as.matrix(df))
gplots
With gplots (a bit ugly, but many more parameters to control):
library(gplots)
heatmap.2(as.matrix(df))
heatmaply
With heatmaply you have nicer defaults to use for the dendrograms (it also organizes them in a more "optimal" way).
You can learn more about the package here.
Static
Static heatmap with heatmaply (better defaults, IMHO)
library(heatmaply)
ggheatmap(df)
Now with colored dendrograms
library(heatmaply)
ggheatmap(df, k_row = 3, k_col = 2)
With no dendrogram:
library(heatmaply)
ggheatmap(df, dendrogram = F)
Interactive
Interactive heatmap with heatmaply (hover tooltip, and the ability to zoom - it's interactive!):
library(heatmaply)
heatmaply(df)
And anything you can do with the static ggheatmap you can also do with the interactive heatmaply version.
Here is another option:
df <- data.frame(PatientID = c("3454","345","5","348","567","79"),
clas1 = c(1, 0, 5, NA, NA, 4),
clas2 = c(4, 1, 0, 3, 1, 0),
clas3 = c(1, NA, 0, 5, 5, 5), stringsAsFactors = F)
# named vector for heatmap
cols <- c("0" = "white",
"1" = "green",
"2" = "orange",
"3" = "yellow",
"4" = "pink",
"5" = "black",
"99" = "grey")
labels_legend <- c("0" = "0",
"1" = "1",
"2" = "2",
"3" = "3",
"4" = "4",
"5" = "5",
"99" = "NA")
df1 <- df %>%
pivot_longer(
cols = starts_with("clas"),
names_to = "names",
values_to = "values"
) %>%
mutate(PatientID = factor(PatientID, levels = c("3454", "345", "5", "348", "567", "79")))
ggplot(
df1,
aes(factor(PatientID), factor(names))) +
geom_tile(aes(fill= factor(values))) +
# geom_text(aes(label = values), size = 5, color = "black") + # text in tiles
scale_fill_manual(
values = cols,
breaks = c("0", "1", "2", "3", "4", "5", "99"),
labels = labels_legend,
aesthetics = c("colour", "fill"),
drop = FALSE
) +
scale_y_discrete(limits=rev) +
coord_equal() +
theme(line = element_blank(),
title = element_blank()) +
theme(legend.direction = "horizontal", legend.position = "bottom")

Faceted Boxplots

I have a boxplot with a group on the left (Baseline.RT) and a group on the right (TBPM.RT). I want to rename Baseline.RT to 1-back and TBPM.RT to TBPM. I also want each group to have a different square frame. I would also like to try black and white options to fill in the values for Neutral, Positive, and Negative.
I have tried several options but did not get the result I wanted. Below is my code. Any help is welcome.
DataFrame:
data.frame(
stringsAsFactors = FALSE,
Participant = c(1, 2, 3, 4, 5, 6),
Sex = c("m", "m", "m", "f", "f", "m"),
Age = c(29, 21, 29, 22, 25, 31),
Stress = c(14, 26, 11, 19, 15, 15),
Dass21.total = c(6, 43, 4, 10, 12, 8),
Dass21Ansie = c(0, 12, 1, 3, 2, 2),
Dass.Depre = c(1, 11, 0, 1, 3, 0),
Dass.Stress = c(5, 20, 3, 6, 7, 6),
Valence = c(0, 1, 2, 0, 1, 2),
Baseline.RT = c(1.17657473346937,
0.656485061072056,0.617504973518475,0.552112912223171,
0.587283706967395,0.569011248952529),
TBPM.RT = c(1.16126499995575,
0.682658424923267,0.643632301167193,0.589782671563839,
0.705303832011063,0.691478784144668),
TotalClockChecks = c(44, 97, 44, 93, 32, 90),
TotalChecks5060 = c(13, 22, 17, 23, 10, 27),
TotalClockResets = c(18, 20, 19, 19, 18, 19),
Correct.Resets = c(16, 16, 18, 18, 12, 19),
Before.55.Resets = c(0, 2, 0, 1, 0, 0),
After.65.Resets = c(2, 2, 1, 0, 6, 0),
MeanResetTime = c(63.0026438647087,
58.9040712799639,60.9928466685597,60.4603108544334,
65.859630879724,60.5522703813385),
Accuracy.Baseline = c(0.987179487179487,
0.991489361702128,0.97907949790795,0.987234042553191,1,
0.987234042553191),
Accuracy.TBPM = c(0.968619246861925,
0.972746331236897,0.989626556016598,0.972515856236786,
0.974736842105263,0.991786447638604),
rau.Baseline = c(112.453264487601,
114.413187265486,109.508971532343,112.475825131896,
122.999999959683,112.475825131896),
rau.TBPM = c(106.447535249234,
107.58519024216,113.516946707831,107.519541719961,
108.163803190644,114.564811317506)
)
Plot:
my44 %>%
select(Participant, Valence, Baseline.RT,TBPM.RT) %>% #Select interest variables
gather(Task,RT, -Valence, -Participant) %>%
ggplot(., aes(factor(Valence), RT)) + #plot
geom_boxplot() + facet_wrap(~ Task) +
scale_x_discrete(name = element_blank(), labels=c("0" = "Neutral", "1" = "Positive", "2" = "Negative")) +
scale_fill_discrete(name="Valence",
breaks=c("0", "1", "2"),
labels=c("Neutral", "Positive",
"Negative"))
The obtained results:
Created on 2020-09-02 by the reprex package (v0.3.0)
It sounds like you're looking for something like this (although your question's input data doesn't produce the values displayed in your plot, and you seem to have a default theme set somewhere).
Your fill colours can be chosen by scale_fill_manual, but you need to map the Valence variable to the fill scale if you want the different boxes to have different colours.
If you want a frame around each facet, theme_bw does this by default, or you can use theme(panel.border = element_rect(colour = "black")).
To re-name facets, I would normally just re-name the faceting variables to the desired names in the input, but here I have shown an alternative method using the labeller parameter in facet_wrap.
my44 %>%
select(Participant, Valence, Baseline.RT,TBPM.RT) %>% #Select interest variables
gather(Task,RT, -Valence, -Participant) %>%
ggplot(., aes(factor(Valence), RT)) +
geom_boxplot(aes(fill = factor(Valence))) +
facet_wrap(~ Task,
labeller = function(x) data.frame(Task = c("1-back", "TBPM"))) +
scale_x_discrete(name = element_blank(),
labels=c("0" = "Neutral", "1" = "Positive", "2" = "Negative")) +
scale_fill_manual(name="Valence",
breaks=c("0", "1", "2"),
labels=c("Neutral", "Positive","Negative"),
values = c("gray50", "gray75", "gray95")) +
theme_bw() +
theme(legend.position = "none",
strip.background = element_blank())

How do you create a grouped barplot in R from only certain columns?

I have a data frame that looks like
Role <- letters(1:3)
df <- data.frame(Role,
Female1=c(1,4,2),
Male1 = c(3,0,0),
Female2 = c(3,5,3),
Male2 = c(1,3,0),
FemaleTotal = Female1+Female2,
MaleTotal = Male1+Male2)
And want to create a barplot grouped with Male,Female for each column category, (in this example it would be 1 and 2), stacked with Roles and also another plot with just the totals. To do just the totals I could use melt() and subset the dataframe to only have those columns, but that seems messy and doesnt help witht the main plot I want to make.
An option would be to reshape to 'long' format
library(dplyr)
library(tidyr)
library(ggplot2)
df %>%
pivot_longer(cols = -Role, names_to = c( "group", '.value'),
names_sep="(?<=[a-z])(?=(\\d+|Total))") %>%
pivot_longer(-c(Role, group)) %>%
ggplot(aes(x = Role, y = value, fill = group)) +
geom_col() +
facet_wrap(~ name)
-output
data
df <- structure(list(Role = c("a", "b", "c"), Female1 = c(1, 4, 2),
Male1 = c(3, 0, 0), Female2 = c(3, 5, 3), Male2 = c(1, 3,
0), FemaleTotal = c(4, 9, 5), MaleTotal = c(4, 3, 0)), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"))

Show point colour according to their row position in table

I want to display a scatter plot of points from a csv table with ggplot2. The trick is that I'd like each point, or cross, to have a different colour according to their row number in the csv file (using RColorBrewer's spectral colours).
The dataset (dat) looks like this:
modu mnc eff
1 0.3080473 0 0.4420544
2 0.3110355 4 0.4633741
3 0.3334024 9 0.4653061
So I'd like row 1 to be very blue, row two to be a little less, row three to be kind of green, etc.
Here's my code so far:
library(ggplot2)
library(RColorBrewer)
dat <- structure(list(modu = c(0.30947265625, 0.3094921875, 0.32958984375,
0.33974609375, 0.33767578125, 0.3243359375, 0.33513671875, 0.3076171875,
0.3203125, 0.3205078125, 0.3220703125, 0.28994140625, 0.31181640625,
0.352421875, 0.31978515625, 0.29642578125, 0.34982421875, 0.3289453125,
0.30802734375, 0.31185546875, 0.3472265625, 0.303828125, 0.32279296875,
0.3165234375, 0.311328125, 0.33640625, 0.3140234375, 0.33515625,
0.34314453125, 0.33869140625), mnc = c(15, 9, 6, 0, 10, 12, 14,
9, 5, 11, 0, 15, 0, 2, 14, 13, 14, 17, 11, 12, 13, 6, 4, 0, 13,
7, 10, 12, 7, 13), eff = c(0.492448979591836, 0.49687074829932,
0.49421768707483, 0.478571428571428, 0.493537414965986, 0.493809523809524,
0.49891156462585, 0.499319727891156, 0.495102040816327, 0.492285714285714,
0.482312925170068, 0.498911564625851, 0.479931972789116, 0.492857142857143,
0.495238095238095, 0.49891156462585, 0.49530612244898, 0.495850340136055,
0.50156462585034, 0.496, 0.492897959183673, 0.487959183673469,
0.495605442176871, 0.47795918367347, 0.501360544217687, 0.497850340136054,
0.493496598639456, 0.493741496598639, 0.496734693877551, 0.499659863945578
)), .Names = c("modu", "mnc", "eff"), row.names = c(NA, 30L), class = "data.frame")
dat2 <- structure(list(modu = c(0.26541015625, 0.282734375, 0.28541015625,
0.29216796875, 0.293671875), mnc = c(0.16, 0.28, 0.28, 0.28,
0.28), eff = c(0.503877551020408, 0.504149659863946, 0.504625850340136,
0.505714285714286, 0.508503401360544)), .Names = c("modu", "mnc",
"eff"), row.names = c(NA, 5L), class = "data.frame")
dat$modu = dat$modu
dat$mnc = dat$mnc*50
dat$eff = dat$eff
dat2$modu = dat2$modu
dat2$mnc = dat2$mnc*50
dat2$eff = dat2$eff
res <- do.call(rbind, combn(1:3, 2, function(ii)
cbind(setNames(dat[,c(ii, setdiff(1:3, ii))], c("x", "y")),
var=paste(names(dat)[ii], collapse="/")), simplify=F))
ggplot(res, aes(x=x, y=y))+ geom_point(shape=4) +
facet_wrap(~ var, scales="free")
How should I go about doing this?
Thanks!
res <- do.call(rbind, combn(1:3, 2, function(ii)
cbind(row=seq(nrow(dat)),setNames(dat[,c(ii, setdiff(1:3, ii))], c("x", "y")),
var=paste(names(dat)[ii], collapse="/")), simplify=F))
ggplot(res, aes(x=x, y=y, color=row))+ geom_point(shape=4) +
scale_color_gradientn(colours=rev(brewer.pal(10,"Spectral")))+
facet_wrap(~ var, scales="free")

Resources