I have the following
densityPlots <- lapply(numericCols, function(var_x){
p <- ggplot(df, aes_string(var_x)) + geom_density()
numericCols are the names of the columns that are numeric. I want to add the mean line, I have tried multiple things such as
densityPlots <- lapply(numericCols, function(var_x){
p <- ggplot(df, aes_string(var_x)) + geom_density() + geom_vline(aes(xintercept=mean(var_x)),
color="red", linetype="dashed", size=1)
The data
tibble [9 × 4] (S3: tbl_df/tbl/data.frame)
$ A: num [1:9] 12 NA 34 45 56 67 78 89 100
$ B: num [1:9] 1 2 3 NA 5 6 7 8 9
$ C: num [1:9] 83 55 27 27 7 3 5 8 9
$ D: num [1:9] 6 2 NA 1 NA 3 4 5 6
structure(list(A = c(12, NA, 34, 45, 56, 67, 78, 89, 100), B = c(1,
2, 3, NA, 5, 6, 7, 8, 9), C = c(83, 55, 27, 27, 7, 3, 5, 8, 9
), D = c(6, 2, NA, 1, NA, 3, 4, 5, 6)), row.names = c(NA, -9L
), class = c("tbl_df", "tbl", "data.frame"))
[1] "A" "B" "C"
But it does not work, it just ignores the geom_vline function. Does someone have a suggestion? Thanks :)!

You should use mean(df[, var_x], na.rm=T) in geom_vline:
df <- structure(list(A = c(12, NA, 34, 45, 56, 67, 78, 89, 100), B = c(1,
2, 3, NA, 5, 6, 7, 8, 9), C = c(83, 55, 27, 27, 7, 3, 5, 8, 9
), D = c(6, 2, NA, 1, NA, 3, 4, 5, 6)), row.names = c(NA, -9L
), class = c("tbl_df", "tbl", "data.frame"))
numericCols <- c("A","B","C")
df <-
densityPlots <- lapply(numericCols, function(var_x) {
ggplot(df, aes_string(var_x)) + geom_density() +
geom_vline(aes(xintercept=mean(df[, var_x], na.rm=T)),
color="red", linetype="dashed", size=1)

Here is an approach somewhat different than what you tried in your question, but uses dplyr and tidyr to pivot the data and use ggplot mapping. Unfortunately, geom_vline doesn't summarize by group, so you have to pre-compute the values:
data <- data.frame(Category = paste0("Catagory",LETTERS[1:20]),
numericCols <- LETTERS[1:10]
data.means <- data %>%
select(numericCols) %>%
pivot_longer(everything(), names_to = "Variable", values_to = "var_x") %>%
group_by(Variable) %>%
summarize(Mean = mean(var_x))
data %>%
select(numericCols) %>%
pivot_longer(everything(), names_to = "Variable", values_to = "var_x") %>%
ggplot(aes(x = var_x, color = Variable)) +
geom_density() +
geom_vline(data = data.means, aes(xintercept=Mean, color = Variable),
linetype="dashed", size=1)
Or you could combine with facet_wrap for multiple plots.
data %>%
select(numericCols) %>%
pivot_longer(everything(), names_to = "Variable", values_to = "var_x") %>%
ggplot(aes(x = var_x)) +
facet_wrap(.~Variable) +
geom_density() +
geom_vline(data = data.means, aes(xintercept=Mean, color = Variable),
linetype="dashed", size=1)


How to make a bar-chart by using two variables on x-axis and a grouped variable on y-axis?

I want to code a grouped bar-chart similary to this one (I just created in paint):
I want to code a grouped bar-chart similary to this one (I just created in paint):
I created as flipped both it actually doesn't matter if its flipped or not. So, a plot similarly to this will also be very usefull:
Grouped barchart in r with 4 variables
Both the variables, happy and lifesatisfied are scaled values from 0 to 10. Working hours is a grouped value and contains 43+, 37-42, 33-36, 27-32, and <27.
A very similar example of how my data set looks like (I just changed the values and order, I also have much more observations):
Working hours
I tried to found similar examples and based on that tried to code the bar chart in the following way but it doesn't work:
df2 <- datafilteredwomen %>%
pivot_longer(cols = c("happy", "stflife"), names_to = "var", values_to = "Percentage")
ggplot(df2) +
geom_bar(aes(x = Percentage, y = workinghours, fill = var ), stat = "identity", position = "dodge") + theme_minimal()
It give this plot which is not correct/what I want:
seocnd try:
forplot = datafilteredwomen %>% group_by(workinghours, happy, stflife) %>% summarise(count = n()) %>% mutate(proportion = count/sum(count))
ggplot(forplot, aes(workinghours, proportion, fill = as.factor(happy))) +
geom_bar(position = "dodge", stat = "identity", color = "black")
gives this plot:
third try - used the ggplot2 builder add-in:
datafilteredwomen %>%
filter(! %>%
ggplot() +
aes(x = workinghours, group = happy, weight = happy) +
geom_bar(position = "dodge",
fill = "#112446") +
theme_classic() + scale_y_continuous(labels = scales::percent)
gives this plot:
But none of my tries are what I want.. really hope that someone can help me if it's possible!
After speaking to the OP I found his data source and came up with this solution. Apologies if it's a bit messy, I have only been using R for 6 months. For ease of reproducibility I have preselected the variables used from the original dataset.
data <- structure(list(wkhtot = c(40, 8, 50, 40, 40, 50, 39, 48, 45,
16, 45, 45, 52, 45, 50, 37, 50, 7, 37, 36), happy = c(7, 8, 10,
10, 7, 7, 7, 6, 8, 10, 8, 10, 9, 6, 9, 9, 8, 8, 9, 7), stflife = c(8,
8, 10, 10, 7, 7, 8, 6, 8, 10, 9, 10, 9, 5, 9, 9, 8, 8, 7, 7)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
Here are the packages required.
Here I have manipulated the data and commented my reasoning.
data <- data %>%
select(wkhtot, happy, stflife) %>% #Select the wanted variables
rename(Happy = happy) %>% #Rename for graphical sake
rename("Life Satisfied" = stflife) %>%
na.omit() %>% # remove NA values
group_by(WorkingHours = cut(wkhtot, c(-Inf, 27, 32,36,42,Inf))) %>% #Create the ranges
select(WorkingHours, Happy, "Life Satisfied") %>% #Select the variables again
pivot_longer(cols = c(`Happy`, `Life Satisfied`), names_to = "Criterion", values_to = "score") %>% # pivot the df longer for plotting
group_by(WorkingHours, Criterion)
data$Criterion <- as.factor(data$Criterion) #Make criterion a factor for graphical reasons
A bit more data prep
# Creating the percentage
data.plot <- data %>%
group_by(WorkingHours, Criterion) %>%
summarise_all(sum) %>% # get the sums for score by working hours and criterion
group_by(WorkingHours) %>%
mutate(tot = sum(score)) %>%
mutate(freq =round(score/tot *100, digits = 2)) # get percentage
Creating the plot.
# Plotting
ggplot(data.plot, aes(x = WorkingHours, y = freq, fill = Criterion)) +
geom_col(position = "dodge") +
geom_text(aes(label = freq),
position = position_dodge(width = 0.9),
vjust = 1) +
xlab("Working Hours") +
Please let me know if there is a more concise or easier way!!
Taking this example dataframe df:
df <- structure(list(Working.hours = c("37-42", "37-42", "<27", "<27",
"43+", "43+", "33-36", "33-36", "37-42", "37-42", "<27", "<27"
), country = c("DK", "DK", "SE", "SE", "DK", "DK", "SE", "SE",
"NO", "NO", "NO", "NO"), criterion = c("happy", "lifesatisfied",
"happy", "lifesatisfied", "happy", "lifesatisfied", "happy",
"lifesatisfied", "happy", "lifesatisfied", "happy", "lifesatisfied"
), score = c(7L, 9L, 8L, 8L, 7L, 8L, 6L, 6L, 7L, 5L, 4L, 7L)), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))
you can proceed like this:
df <-
df %>%
pivot_longer(cols = c(happy, lifesatisfied),
names_to = 'criterion',
values_to = 'score'
df %>%
ggplot(aes(x = Working.hours,
y = score,
fill = criterion)) +
geom_col(position = 'dodge') +
For picking colours see ?scale_fill_manual, for formatting legend etc. numerous existing answers to related questions on stackoverflow.

Heatmap in R with raw values

I have this dataframe:
df <- data.frame(PatientID = c("3454","345","5","348","567","79"),
clas1 = c(1, 0, 5, NA, NA, 4),
clas2 = c(4, 1, 0, 3, 1, 0),
clas3 = c(1, NA, 0, 5, 5, 5), stringsAsFactors = F)
I would like to create a heatmap, with patient ID in the x axis and clas1, clas2 and clas3 in the y axis. The values represented in the heat map would be the raw value of each "clas". Here I post a drawing of what I would like
I apologise because I don't have available more colours to represent this, but this is only an example and any colour scale could be used.
An important thing is that I would like to distinguish between zeros and NAs so ideally NAs have their own colour or appear in white (empty).
I hope this is understandable enough.
But any questions just ask
Many thanks!
df <- data.frame(PatientID = c("3454","345","5","348","567","79"),
clas1 = c(1, 0, 5, NA, NA, 4),
clas2 = c(4, 1, 0, 3, 1, 0),
clas3 = c(1, NA, 0, 5, 5, 5), stringsAsFactors = F)
df %>% pivot_longer(!PatientID) %>%
ggplot(aes(x= PatientID, y = name, fill = value)) +
Here is a base R option with ``heatmap`
heatmap(t(`row.names<-`(as.matrix(df[-1]), df$PatientID)))
# Which is like
# x <- as.matrix(df[-1]
# row.names(x) <- df$PatientID
# heatmap(t(x))
Preparing the data
I'll give 4 options, in all four you need to assign the rownames and remove the id column. I.e.:
df <- data.frame(PatientID = c("3454","345","5","348","567","79"),
clas1 = c(1, 0, 5, NA, NA, 4),
clas2 = c(4, 1, 0, 3, 1, 0),
clas3 = c(1, NA, 0, 5, 5, 5), stringsAsFactors = F)
rownames(df) <- df$PatientID
df$PatientID <- NULL
The output is:
> df
clas1 clas2 clas3
3454 1 4 1
345 0 1 NA
5 5 0 0
348 NA 3 5
567 NA 1 5
79 4 0 5
Base R
With base R (decent output):
With gplots (a bit ugly, but many more parameters to control):
With heatmaply you have nicer defaults to use for the dendrograms (it also organizes them in a more "optimal" way).
You can learn more about the package here.
Static heatmap with heatmaply (better defaults, IMHO)
Now with colored dendrograms
ggheatmap(df, k_row = 3, k_col = 2)
With no dendrogram:
ggheatmap(df, dendrogram = F)
Interactive heatmap with heatmaply (hover tooltip, and the ability to zoom - it's interactive!):
And anything you can do with the static ggheatmap you can also do with the interactive heatmaply version.
Here is another option:
df <- data.frame(PatientID = c("3454","345","5","348","567","79"),
clas1 = c(1, 0, 5, NA, NA, 4),
clas2 = c(4, 1, 0, 3, 1, 0),
clas3 = c(1, NA, 0, 5, 5, 5), stringsAsFactors = F)
# named vector for heatmap
cols <- c("0" = "white",
"1" = "green",
"2" = "orange",
"3" = "yellow",
"4" = "pink",
"5" = "black",
"99" = "grey")
labels_legend <- c("0" = "0",
"1" = "1",
"2" = "2",
"3" = "3",
"4" = "4",
"5" = "5",
"99" = "NA")
df1 <- df %>%
cols = starts_with("clas"),
names_to = "names",
values_to = "values"
) %>%
mutate(PatientID = factor(PatientID, levels = c("3454", "345", "5", "348", "567", "79")))
aes(factor(PatientID), factor(names))) +
geom_tile(aes(fill= factor(values))) +
# geom_text(aes(label = values), size = 5, color = "black") + # text in tiles
values = cols,
breaks = c("0", "1", "2", "3", "4", "5", "99"),
labels = labels_legend,
aesthetics = c("colour", "fill"),
drop = FALSE
) +
scale_y_discrete(limits=rev) +
coord_equal() +
theme(line = element_blank(),
title = element_blank()) +
theme(legend.direction = "horizontal", legend.position = "bottom")

Faceted Boxplots

I have a boxplot with a group on the left (Baseline.RT) and a group on the right (TBPM.RT). I want to rename Baseline.RT to 1-back and TBPM.RT to TBPM. I also want each group to have a different square frame. I would also like to try black and white options to fill in the values for Neutral, Positive, and Negative.
I have tried several options but did not get the result I wanted. Below is my code. Any help is welcome.
stringsAsFactors = FALSE,
Participant = c(1, 2, 3, 4, 5, 6),
Sex = c("m", "m", "m", "f", "f", "m"),
Age = c(29, 21, 29, 22, 25, 31),
Stress = c(14, 26, 11, 19, 15, 15), = c(6, 43, 4, 10, 12, 8),
Dass21Ansie = c(0, 12, 1, 3, 2, 2),
Dass.Depre = c(1, 11, 0, 1, 3, 0),
Dass.Stress = c(5, 20, 3, 6, 7, 6),
Valence = c(0, 1, 2, 0, 1, 2),
Baseline.RT = c(1.17657473346937,
TBPM.RT = c(1.16126499995575,
TotalClockChecks = c(44, 97, 44, 93, 32, 90),
TotalChecks5060 = c(13, 22, 17, 23, 10, 27),
TotalClockResets = c(18, 20, 19, 19, 18, 19),
Correct.Resets = c(16, 16, 18, 18, 12, 19),
Before.55.Resets = c(0, 2, 0, 1, 0, 0),
After.65.Resets = c(2, 2, 1, 0, 6, 0),
MeanResetTime = c(63.0026438647087,
Accuracy.Baseline = c(0.987179487179487,
Accuracy.TBPM = c(0.968619246861925,
rau.Baseline = c(112.453264487601,
rau.TBPM = c(106.447535249234,
my44 %>%
select(Participant, Valence, Baseline.RT,TBPM.RT) %>% #Select interest variables
gather(Task,RT, -Valence, -Participant) %>%
ggplot(., aes(factor(Valence), RT)) + #plot
geom_boxplot() + facet_wrap(~ Task) +
scale_x_discrete(name = element_blank(), labels=c("0" = "Neutral", "1" = "Positive", "2" = "Negative")) +
breaks=c("0", "1", "2"),
labels=c("Neutral", "Positive",
The obtained results:
It sounds like you're looking for something like this (although your question's input data doesn't produce the values displayed in your plot, and you seem to have a default theme set somewhere).
Your fill colours can be chosen by scale_fill_manual, but you need to map the Valence variable to the fill scale if you want the different boxes to have different colours.
If you want a frame around each facet, theme_bw does this by default, or you can use theme(panel.border = element_rect(colour = "black")).
To re-name facets, I would normally just re-name the faceting variables to the desired names in the input, but here I have shown an alternative method using the labeller parameter in facet_wrap.
my44 %>%
select(Participant, Valence, Baseline.RT,TBPM.RT) %>% #Select interest variables
gather(Task,RT, -Valence, -Participant) %>%
ggplot(., aes(factor(Valence), RT)) +
geom_boxplot(aes(fill = factor(Valence))) +
facet_wrap(~ Task,
labeller = function(x) data.frame(Task = c("1-back", "TBPM"))) +
scale_x_discrete(name = element_blank(),
labels=c("0" = "Neutral", "1" = "Positive", "2" = "Negative")) +
breaks=c("0", "1", "2"),
labels=c("Neutral", "Positive","Negative"),
values = c("gray50", "gray75", "gray95")) +
theme_bw() +
theme(legend.position = "none",
strip.background = element_blank())

How do you create a grouped barplot in R from only certain columns?

I have a data frame that looks like
Role <- letters(1:3)
df <- data.frame(Role,
Male1 = c(3,0,0),
Female2 = c(3,5,3),
Male2 = c(1,3,0),
FemaleTotal = Female1+Female2,
MaleTotal = Male1+Male2)
And want to create a barplot grouped with Male,Female for each column category, (in this example it would be 1 and 2), stacked with Roles and also another plot with just the totals. To do just the totals I could use melt() and subset the dataframe to only have those columns, but that seems messy and doesnt help witht the main plot I want to make.
An option would be to reshape to 'long' format
df %>%
pivot_longer(cols = -Role, names_to = c( "group", '.value'),
names_sep="(?<=[a-z])(?=(\\d+|Total))") %>%
pivot_longer(-c(Role, group)) %>%
ggplot(aes(x = Role, y = value, fill = group)) +
geom_col() +
facet_wrap(~ name)
df <- structure(list(Role = c("a", "b", "c"), Female1 = c(1, 4, 2),
Male1 = c(3, 0, 0), Female2 = c(3, 5, 3), Male2 = c(1, 3,
0), FemaleTotal = c(4, 9, 5), MaleTotal = c(4, 3, 0)), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"))

Show point colour according to their row position in table

I want to display a scatter plot of points from a csv table with ggplot2. The trick is that I'd like each point, or cross, to have a different colour according to their row number in the csv file (using RColorBrewer's spectral colours).
The dataset (dat) looks like this:
modu mnc eff
1 0.3080473 0 0.4420544
2 0.3110355 4 0.4633741
3 0.3334024 9 0.4653061
So I'd like row 1 to be very blue, row two to be a little less, row three to be kind of green, etc.
Here's my code so far:
dat <- structure(list(modu = c(0.30947265625, 0.3094921875, 0.32958984375,
0.33974609375, 0.33767578125, 0.3243359375, 0.33513671875, 0.3076171875,
0.3203125, 0.3205078125, 0.3220703125, 0.28994140625, 0.31181640625,
0.352421875, 0.31978515625, 0.29642578125, 0.34982421875, 0.3289453125,
0.30802734375, 0.31185546875, 0.3472265625, 0.303828125, 0.32279296875,
0.3165234375, 0.311328125, 0.33640625, 0.3140234375, 0.33515625,
0.34314453125, 0.33869140625), mnc = c(15, 9, 6, 0, 10, 12, 14,
9, 5, 11, 0, 15, 0, 2, 14, 13, 14, 17, 11, 12, 13, 6, 4, 0, 13,
7, 10, 12, 7, 13), eff = c(0.492448979591836, 0.49687074829932,
0.49421768707483, 0.478571428571428, 0.493537414965986, 0.493809523809524,
0.49891156462585, 0.499319727891156, 0.495102040816327, 0.492285714285714,
0.482312925170068, 0.498911564625851, 0.479931972789116, 0.492857142857143,
0.495238095238095, 0.49891156462585, 0.49530612244898, 0.495850340136055,
0.50156462585034, 0.496, 0.492897959183673, 0.487959183673469,
0.495605442176871, 0.47795918367347, 0.501360544217687, 0.497850340136054,
0.493496598639456, 0.493741496598639, 0.496734693877551, 0.499659863945578
)), .Names = c("modu", "mnc", "eff"), row.names = c(NA, 30L), class = "data.frame")
dat2 <- structure(list(modu = c(0.26541015625, 0.282734375, 0.28541015625,
0.29216796875, 0.293671875), mnc = c(0.16, 0.28, 0.28, 0.28,
0.28), eff = c(0.503877551020408, 0.504149659863946, 0.504625850340136,
0.505714285714286, 0.508503401360544)), .Names = c("modu", "mnc",
"eff"), row.names = c(NA, 5L), class = "data.frame")
dat$modu = dat$modu
dat$mnc = dat$mnc*50
dat$eff = dat$eff
dat2$modu = dat2$modu
dat2$mnc = dat2$mnc*50
dat2$eff = dat2$eff
res <-, combn(1:3, 2, function(ii)
cbind(setNames(dat[,c(ii, setdiff(1:3, ii))], c("x", "y")),
var=paste(names(dat)[ii], collapse="/")), simplify=F))
ggplot(res, aes(x=x, y=y))+ geom_point(shape=4) +
facet_wrap(~ var, scales="free")
How should I go about doing this?
res <-, combn(1:3, 2, function(ii)
cbind(row=seq(nrow(dat)),setNames(dat[,c(ii, setdiff(1:3, ii))], c("x", "y")),
var=paste(names(dat)[ii], collapse="/")), simplify=F))
ggplot(res, aes(x=x, y=y, color=row))+ geom_point(shape=4) +
facet_wrap(~ var, scales="free")
