Overlay 2 dataframes on the same graph using R - r

I would like to overlay the points from two different dataframes on the same graph but I can't seem to achieve it.
I know that I need to combine the 2 ggplot() but I don't know which method to use.
ggplot(grid) +
ggtitle("top+bot") +
aes(x, y) +
geom_point(shape=15, color = "blue", size = 2, alpha = (grid$z/op)) +
scale_x_continuous(name= "length (m)", breaks=seq(0, max(fullgrid[,"x"]), step)) +
scale_y_continuous(name= "width (mm)", breaks=seq(0, max(fullgrid[,"y"]), 100)) +
expand_limits(x = 0, y = 0) +
coord_cartesian(expand = FALSE) +
theme(plot.title = element_text(size=10, hjust = 0.5), axis.title.x = element_text(size=10), axis.text.x= element_text(angle=30, hjust=1), axis.title.y = element_text(size=10), axis.text.y= element_text(angle=30, hjust=1))
ggplot(def) +
ggtitle("top+bot") +
geom_rect(data = def , aes(xmin = xbegc, xmax = xendc, ymin = ybegc, ymax =yendc ),
alpha = 8)

There are many things wrong in your code. I've boiled down your code to something more essential - to crystallise the problem: The use of a global aesthetic for all geom layers, although not all data frames have this aesthetic (in your case: mod).
I've also used a smaller version of your data and slightly changed your regex.
Smaller comments in the code - I recommend to read.
library(ggplot2)
freq <- structure(list(vlookup = c("Entrevista_final|1|Q3_nova|0|C1|1", "Entrevista_final|1|Q3_nova|0|C1|2", "Entrevista_final|1|Q3_nova|0|C3|1", "Entrevista_final|1|Q3_nova|0|C3|4", "Entrevista_final|1|Q3_nova|0|C3|2", "Entrevista_final|1|Q3_nova|0|C3|3", "Entrevista_final|1|Q3_nova|0|C4_1|2018", "Entrevista_final|1|Q3_nova|0|C4_1|2020", "Entrevista_final|1|Q3_nova|0|C4_1|1993", "Entrevista_final|1|Q3_nova|0|C4_1|2015", "Entrevista_final|1|Q3_nova|0|C4_1|2016", "Entrevista_final|1|Q3_nova|0|C4_1|1996", "Entrevista_final|1|Q3_nova|0|C4_1|99", "Entrevista_final|1|Q3_nova|0|C4_1|2017", "Entrevista_final|1|Q3_nova|0|C4_1|2004", "Entrevista_final|1|Q3_nova|0|C4_1|2019", "Entrevista_final|1|Q3_nova|0|C4_1|2002", "Entrevista_final|1|Q3_nova|0|C4_1|2021", "Entrevista_final|1|Q3_nova|0|C4_2|99", "Entrevista_final|1|Q3_nova|0|C4_2|2018"), cruza1 = c("Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final"), mod1 = c("1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"), cruza2 = c("Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova"), mod2 = c("0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0"), var = c("C1", "C1", "C3", "C3", "C3", "C3", "C4_1", "C4_1", "C4_1", "C4_1", "C4_1", "C4_1", "C4_1", "C4_1", "C4_1", "C4_1", "C4_1", "C4_1", "C4_2", "C4_2"), mod = c("1", "2", "1", "4", "2", "3", "2018", "2020", "1993", "2015", "2016", "1996", "99", "2017", "2004", "2019", "2002", "2021", "99", "2018"), pop_extrapolada = c(22, 17, 32, 3, 3, 1, 4, 10, 1, 1, 2, 1, 2, 2, 1, 4, 1, 10, 2, 3), PERCENT = c(56.4102564102564, 43.5897435897436, 82.051282051282, 7.69230769230769, 7.69230769230769, 2.56410256410256, 10.2564102564103, 25.6410256410256, 2.56410256410256, 2.56410256410256, 5.12820512820513, 2.56410256410256, 5.12820512820513, 5.12820512820513, 2.56410256410256, 10.2564102564103, 2.56410256410256, 25.6410256410256, 15.3846153846154, 23.0769230769231), count = c(22, 17, 32, 3, 3, 1, 4, 10, 1, 1, 2, 1, 2, 2, 1, 4, 1, 10, 2, 3), BA = c(39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 13, 13), StdErr = c(0.0795074974876938, 0.0795074974876938, 0.0615313285518115, 0.0427252055542318, 0.0427252055542318, 0.0253433535020364, 0.0486447864199686, 0.0700118062133025, 0.0253433535020364, 0.0253433535020364, 0.0353661791821834, 0.0253433535020364, 0.0353661791821834, 0.0353661791821834, 0.0253433535020364, 0.0486447864199686, 0.0253433535020364, 0.0700118062133025, 0.100484337629983, 0.117340426124568), LowerCLMean = c(0.407774134748444, 0.279569006543315, 0.699529312635895, 0, 0, 0, 0.00691824151210088, 0.118752349327158, 0, 0, 0, 0, 0, 0, 0, 0.00691824151210088, 0, 0.118752349327158, 0, 0), UpperCLMean = c(0.720430993456685, 0.592225865251556, 0.941496328389746, 0.160929799423163, 0.160929799423163, 0.0754713784093646, 0.198209963616104, 0.394068163493355, 0.0754713784093646, 0.0754713784093646, 0.120819383213079, 0.0754713784093646, 0.120819383213079, 0.120819383213079, 0.0754713784093646, 0.198209963616104, 0.0754713784093646, 0.394068163493355, 0.352798149154327, 0.463095108198593), ME = c(15.6328429354121, 15.6328429354121, 12.0983507876925, 8.40067225000863, 8.40067225000863, 4.9830352768339, 9.56458610520017, 13.7657907083098, 4.9830352768339, 4.9830352768339, 6.95373319310279, 4.9830352768339, 6.95373319310279, 6.95373319310279, 4.9830352768339, 9.56458610520017, 4.9830352768339, 13.7657907083098, NA, NA), StdDev = c(4.55932029745697, 4.03560271072923, 5.42183783041485, 1.72749876078217, 1.72749876078217, 1, 1.99211042289525, 3.1247047104581, 1, 1, 1.41235641966466, 1, 1.41235641966466, 1.41235641966466, 1, 1.99211042289525, 1, 3.1247047104581, 1.40830867828517, 1.71755640373177)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"))
## shortened your regex a bit
freq$esc <- gsub("^(.*C[0-9]).*", "\\1", freq$vlookup)
db <- data.frame(x = 1:3, y = c(5.5, 6.5, 9.7))
## change the filter so that the example works
Q6 <- freq[grepl("Entrevista_final|1", freq$esc), c("var", "mod", "PERCENT")]
## remove fill = mod from main ggplot2 call and put it into your bar plot and in geom_text as group
ggplot(Q6, aes(x = var, y = PERCENT)) +
## use geom_col instead of geom_bar(stat = "identity"), position = "stack" is default
geom_col(aes(fill = factor(x = mod)), color = "red") +
geom_text(aes(group = factor(x = mod), label = sprintf(PERCENT, fmt = "%1.0f")),
position = position_stack(vjust = 0.5)
) +
## use your coordinate data frame and use the correct geom layer syntax
geom_point(data = db, aes(x, y))
Created on 2023-02-16 with reprex v2.0.2

Related

How to merge a stacked barchart with a point plot in the same plot?

I have this graph.
And i want to merge it with this one.
This is what I have tried.
freq <- read_excel("mg_freq_MFD.xlsx")
med <- read_excel("mg_med_MFD.xlsx")
freq$lookup <- freq$vlookup
freq$lookup <- gsub("|[^|]+$", "", freq$lookup)
freq$esc <- freq$vlookup
freq$esc <- gsub("_[^_]+$", "", freq$esc)
med$lookup <- med$vlookup
med$lookup <- gsub("|[^|]+$", "", med$lookup)
med$esc <- med$vlookup
med$esc <- gsub("_[^_]+$", "", med$esc)
total=20
db<- data.frame(x=1:3,
y=c(5.5, 6.5,9.7))
escala<-c("4","3","2","1")
tituloQ6<-titulo[titulo$Var=="Q6", "Titulo"]
Q6<-freq[freq$esc=="Entrevista_final|1|ii|1|esc_Q6",c("var","mod","PERCENT")]
Q6med<-med[med$esc=="Entrevista_final|1|ii|1|Q6",c("var","mean")]
ggplot(Q6, aes(x=var, y=PERCENT,fill = factor(x=mod, levels = escala)))+
geom_bar(stat = "identity", position = "stack", width =.5, color="red3")+
scale_fill_manual(values = c("4"="white","3"="#fe5f5f","2"="red","1"="red3"))+
scale_y_continuous(labels = scales::percent)+
geom_text(aes(label=sprintf(PERCENT,fmt="%1.0f")),
position = position_stack(vjust=0.5))+
theme_classic()+
ggtitle(tituloQ6)+
labs(x="",
y=NULL,
caption = (paste("n= ", total)))+
guides(y="none")+
theme(legend.title = element_blank())+
geom_point(data=Q6med, x=var, y=mean)
But i get an error on the factor.
Error in `geom_point()`:
! Problem while computing aesthetics.
ℹ Error occurred in the 3rd layer.
Caused by error in `factor()`:
! object 'mod' not found
Backtrace:
1. base (local) `<fn>`(x)
17. base::factor(x = mod, levels = escala)
Error in geom_point(data = Q6med, x = var, y = mean) :
ℹ Error occurred in the 3rd layer.
Caused by error in `factor()`:
! object 'mod' not found
structure(list(vlookup = c("Entrevista_final|1|Q3_nova|0|C1|1",
"Entrevista_final|1|Q3_nova|0|C1|2", "Entrevista_final|1|Q3_nova|0|C3|1",
"Entrevista_final|1|Q3_nova|0|C3|4", "Entrevista_final|1|Q3_nova|0|C3|2",
"Entrevista_final|1|Q3_nova|0|C3|3", "Entrevista_final|1|Q3_nova|0|C4_1|2018",
"Entrevista_final|1|Q3_nova|0|C4_1|2020", "Entrevista_final|1|Q3_nova|0|C4_1|1993",
"Entrevista_final|1|Q3_nova|0|C4_1|2015", "Entrevista_final|1|Q3_nova|0|C4_1|2016",
"Entrevista_final|1|Q3_nova|0|C4_1|1996", "Entrevista_final|1|Q3_nova|0|C4_1|99",
"Entrevista_final|1|Q3_nova|0|C4_1|2017", "Entrevista_final|1|Q3_nova|0|C4_1|2004",
"Entrevista_final|1|Q3_nova|0|C4_1|2019", "Entrevista_final|1|Q3_nova|0|C4_1|2002",
"Entrevista_final|1|Q3_nova|0|C4_1|2021", "Entrevista_final|1|Q3_nova|0|C4_2|99",
"Entrevista_final|1|Q3_nova|0|C4_2|2018"), cruza1 = c("Entrevista_final",
"Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final",
"Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final",
"Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final",
"Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final",
"Entrevista_final", "Entrevista_final", "Entrevista_final"),
mod1 = c("1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1"), cruza2 = c("Q3_nova",
"Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova",
"Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova",
"Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova",
"Q3_nova"), mod2 = c("0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0"
), var = c("C1", "C1", "C3", "C3", "C3", "C3", "C4_1", "C4_1",
"C4_1", "C4_1", "C4_1", "C4_1", "C4_1", "C4_1", "C4_1", "C4_1",
"C4_1", "C4_1", "C4_2", "C4_2"), mod = c("1", "2", "1", "4",
"2", "3", "2018", "2020", "1993", "2015", "2016", "1996",
"99", "2017", "2004", "2019", "2002", "2021", "99", "2018"
), pop_extrapolada = c(22, 17, 32, 3, 3, 1, 4, 10, 1, 1,
2, 1, 2, 2, 1, 4, 1, 10, 2, 3), PERCENT = c(56.4102564102564,
43.5897435897436, 82.051282051282, 7.69230769230769, 7.69230769230769,
2.56410256410256, 10.2564102564103, 25.6410256410256, 2.56410256410256,
2.56410256410256, 5.12820512820513, 2.56410256410256, 5.12820512820513,
5.12820512820513, 2.56410256410256, 10.2564102564103, 2.56410256410256,
25.6410256410256, 15.3846153846154, 23.0769230769231), count = c(22,
17, 32, 3, 3, 1, 4, 10, 1, 1, 2, 1, 2, 2, 1, 4, 1, 10, 2,
3), BA = c(39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 13, 13), StdErr = c(0.0795074974876938,
0.0795074974876938, 0.0615313285518115, 0.0427252055542318,
0.0427252055542318, 0.0253433535020364, 0.0486447864199686,
0.0700118062133025, 0.0253433535020364, 0.0253433535020364,
0.0353661791821834, 0.0253433535020364, 0.0353661791821834,
0.0353661791821834, 0.0253433535020364, 0.0486447864199686,
0.0253433535020364, 0.0700118062133025, 0.100484337629983,
0.117340426124568), LowerCLMean = c(0.407774134748444, 0.279569006543315,
0.699529312635895, 0, 0, 0, 0.00691824151210088, 0.118752349327158,
0, 0, 0, 0, 0, 0, 0, 0.00691824151210088, 0, 0.118752349327158,
0, 0), UpperCLMean = c(0.720430993456685, 0.592225865251556,
0.941496328389746, 0.160929799423163, 0.160929799423163,
0.0754713784093646, 0.198209963616104, 0.394068163493355,
0.0754713784093646, 0.0754713784093646, 0.120819383213079,
0.0754713784093646, 0.120819383213079, 0.120819383213079,
0.0754713784093646, 0.198209963616104, 0.0754713784093646,
0.394068163493355, 0.352798149154327, 0.463095108198593),
ME = c(15.6328429354121, 15.6328429354121, 12.0983507876925,
8.40067225000863, 8.40067225000863, 4.9830352768339, 9.56458610520017,
13.7657907083098, 4.9830352768339, 4.9830352768339, 6.95373319310279,
4.9830352768339, 6.95373319310279, 6.95373319310279, 4.9830352768339,
9.56458610520017, 4.9830352768339, 13.7657907083098, NA,
NA), StdDev = c(4.55932029745697, 4.03560271072923, 5.42183783041485,
1.72749876078217, 1.72749876078217, 1, 1.99211042289525,
3.1247047104581, 1, 1, 1.41235641966466, 1, 1.41235641966466,
1.41235641966466, 1, 1.99211042289525, 1, 3.1247047104581,
1.40830867828517, 1.71755640373177), lookup = c("Entrevista_final|1|Q3_nova|0|C1|",
"Entrevista_final|1|Q3_nova|0|C1|", "Entrevista_final|1|Q3_nova|0|C3|",
"Entrevista_final|1|Q3_nova|0|C3|", "Entrevista_final|1|Q3_nova|0|C3|",
"Entrevista_final|1|Q3_nova|0|C3|", "Entrevista_final|1|Q3_nova|0|C4_1|",
"Entrevista_final|1|Q3_nova|0|C4_1|", "Entrevista_final|1|Q3_nova|0|C4_1|",
"Entrevista_final|1|Q3_nova|0|C4_1|", "Entrevista_final|1|Q3_nova|0|C4_1|",
"Entrevista_final|1|Q3_nova|0|C4_1|", "Entrevista_final|1|Q3_nova|0|C4_1|",
"Entrevista_final|1|Q3_nova|0|C4_1|", "Entrevista_final|1|Q3_nova|0|C4_1|",
"Entrevista_final|1|Q3_nova|0|C4_1|", "Entrevista_final|1|Q3_nova|0|C4_1|",
"Entrevista_final|1|Q3_nova|0|C4_1|", "Entrevista_final|1|Q3_nova|0|C4_2|",
"Entrevista_final|1|Q3_nova|0|C4_2|"), esc = c("Entrevista_final|1|Q3",
"Entrevista_final|1|Q3", "Entrevista_final|1|Q3", "Entrevista_final|1|Q3",
"Entrevista_final|1|Q3", "Entrevista_final|1|Q3", "Entrevista_final|1|Q3_nova|0|C4",
"Entrevista_final|1|Q3_nova|0|C4", "Entrevista_final|1|Q3_nova|0|C4",
"Entrevista_final|1|Q3_nova|0|C4", "Entrevista_final|1|Q3_nova|0|C4",
"Entrevista_final|1|Q3_nova|0|C4", "Entrevista_final|1|Q3_nova|0|C4",
"Entrevista_final|1|Q3_nova|0|C4", "Entrevista_final|1|Q3_nova|0|C4",
"Entrevista_final|1|Q3_nova|0|C4", "Entrevista_final|1|Q3_nova|0|C4",
"Entrevista_final|1|Q3_nova|0|C4", "Entrevista_final|1|Q3_nova|0|C4",
"Entrevista_final|1|Q3_nova|0|C4")), row.names = c(NA, -20L
), class = c("tbl_df", "tbl", "data.frame"))
structure(list(vlookup = c("Entrevista_final|1|Q3_nova|.|Q6_1",
"Entrevista_final|1|Q3_nova|.|Q6_2", "Entrevista_final|1|Q3_nova|.|Q6_3",
"Entrevista_final|1|Q3_nova|.|Q9_1", "Entrevista_final|1|Q3_nova|.|Q9_2",
"Entrevista_final|1|Q3_nova|.|Q9_3", "Entrevista_final|1|Q3_nova|.|Q9_4",
"Entrevista_final|1|Q3_nova|.|Q9_5", "Entrevista_final|1|Q3_nova|.|Q9_6",
"Entrevista_final|1|Q3_nova|.|Q9_7", "Entrevista_final|1|Q3_nova|.|Q9_8",
"Entrevista_final|1|Q3_nova|0|Q6_1", "Entrevista_final|1|Q3_nova|0|Q6_2",
"Entrevista_final|1|Q3_nova|0|Q6_3", "Entrevista_final|1|Q3_nova|0|Q9_1",
"Entrevista_final|1|Q3_nova|0|Q9_2", "Entrevista_final|1|Q3_nova|0|Q9_3",
"Entrevista_final|1|Q3_nova|0|Q9_4", "Entrevista_final|1|Q3_nova|0|Q9_5",
"Entrevista_final|1|Q3_nova|0|Q9_6"), cruza1 = c("Entrevista_final",
"Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final",
"Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final",
"Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final",
"Entrevista_final", "Entrevista_final", "Entrevista_final", "Entrevista_final",
"Entrevista_final", "Entrevista_final", "Entrevista_final"),
mod1 = c("1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1"), cruza2 = c("Q3_nova",
"Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova",
"Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova",
"Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova", "Q3_nova",
"Q3_nova"), mod2 = c(".", ".", ".", ".", ".", ".", ".", ".",
".", ".", ".", "0", "0", "0", "0", "0", "0", "0", "0", "0"
), var = c("Q6_1", "Q6_2", "Q6_3", "Q9_1", "Q9_2", "Q9_3",
"Q9_4", "Q9_5", "Q9_6", "Q9_7", "Q9_8", "Q6_1", "Q6_2", "Q6_3",
"Q9_1", "Q9_2", "Q9_3", "Q9_4", "Q9_5", "Q9_6"), mean = c(5.69767441860465,
7.18604651162791, 6.81395348837209, 6.325, 6.65, 7.68292682926829,
8.41463414634146, 8.07142857142857, 8.14634146341463, 8.59523809523809,
8.54761904761905, 6.61538461538461, 6.94871794871795, 6.43589743589744,
6.72222222222222, 6.75675675675676, 8.44736842105263, 8.23684210526316,
8.44736842105263, 8.57894736842105), ba = c(43, 43, 43, 40,
40, 41, 41, 42, 41, 42, 42, 39, 39, 39, 36, 37, 38, 38, 38,
38), N = c(43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 39,
39, 39, 39, 39, 39, 39, 39, 39), StdErr = c(0.470914185560204,
0.410281576101709, 0.416835766228631, 0.521900205807444,
0.402892230805839, 0.391375466555417, 0.269664126081159,
0.268409952413896, 0.304135230479118, 0.316349111010602,
0.250470946520814, 0.400365075596811, 0.407435440466744,
0.349722774823449, 0.426532227843502, 0.307106063412623,
0.290287995830448, 0.289658029819206, 0.240601576847107,
0.257153023503742), LowerCLMean = c(4.77203852970105, 6.37957987882874,
5.9946205378507, 5.29900543427478, 5.85799052940539, 6.91360135641181,
7.88453734944423, 7.54379718753762, 7.54850383690226, 7.97340904610116,
8.0552828750472, 5.82842110058807, 6.14784579920957, 5.7484818728014,
5.88370991085601, 6.1530446667685, 7.87675025833957, 7.66744195540224,
7.97440181971579, 8.07346251229383), UpperCLMean = c(6.62331030750825,
7.99251314442708, 7.63328643889348, 7.35099456572522, 7.4420094705946,
8.45225230212478, 8.94473094323869, 8.59905995531952, 8.74417908992701,
9.21706714437503, 9.0399552201909, 7.40234813018116, 7.74959009822633,
7.12331299899347, 7.56073453358844, 7.36046884674501, 9.0179865837657,
8.80624225512407, 8.92033502238947, 9.08443222454828), media_sem_fe = c(5.69767441860465,
7.18604651162791, 6.81395348837209, 6.325, 6.65, 7.68292682926829,
8.41463414634146, 8.07142857142857, 8.14634146341463, 8.59523809523809,
8.54761904761905, 6.61538461538461, 6.94871794871795, 6.43589743589744,
6.72222222222222, 6.75675675675676, 8.44736842105263, 8.23684210526316,
8.44736842105263, 8.57894736842105), ME = c(0.925635888903602,
0.806466632799168, 0.819332950521389, 1.02599456572522, 0.792009470594609,
0.769325472856485, 0.530096796897231, 0.527631383890947,
0.597837626512375, 0.62182904913694, 0.49233617257185, 0.786963514796548,
0.800872149508378, 0.687415563096039, 0.838512311366217,
0.60371208998826, 0.570618162713064, 0.569400149860914, 0.47296660133684,
0.505484856127225), lookup = c("Entrevista_final|1|Q3_nova|.|",
"Entrevista_final|1|Q3_nova|.|", "Entrevista_final|1|Q3_nova|.|",
"Entrevista_final|1|Q3_nova|.|", "Entrevista_final|1|Q3_nova|.|",
"Entrevista_final|1|Q3_nova|.|", "Entrevista_final|1|Q3_nova|.|",
"Entrevista_final|1|Q3_nova|.|", "Entrevista_final|1|Q3_nova|.|",
"Entrevista_final|1|Q3_nova|.|", "Entrevista_final|1|Q3_nova|.|",
"Entrevista_final|1|Q3_nova|0|", "Entrevista_final|1|Q3_nova|0|",
"Entrevista_final|1|Q3_nova|0|", "Entrevista_final|1|Q3_nova|0|",
"Entrevista_final|1|Q3_nova|0|", "Entrevista_final|1|Q3_nova|0|",
"Entrevista_final|1|Q3_nova|0|", "Entrevista_final|1|Q3_nova|0|",
"Entrevista_final|1|Q3_nova|0|"), esc = c("Entrevista_final|1|Q3_nova|.|Q6",
"Entrevista_final|1|Q3_nova|.|Q6", "Entrevista_final|1|Q3_nova|.|Q6",
"Entrevista_final|1|Q3_nova|.|Q9", "Entrevista_final|1|Q3_nova|.|Q9",
"Entrevista_final|1|Q3_nova|.|Q9", "Entrevista_final|1|Q3_nova|.|Q9",
"Entrevista_final|1|Q3_nova|.|Q9", "Entrevista_final|1|Q3_nova|.|Q9",
"Entrevista_final|1|Q3_nova|.|Q9", "Entrevista_final|1|Q3_nova|.|Q9",
"Entrevista_final|1|Q3_nova|0|Q6", "Entrevista_final|1|Q3_nova|0|Q6",
"Entrevista_final|1|Q3_nova|0|Q6", "Entrevista_final|1|Q3_nova|0|Q9",
"Entrevista_final|1|Q3_nova|0|Q9", "Entrevista_final|1|Q3_nova|0|Q9",
"Entrevista_final|1|Q3_nova|0|Q9", "Entrevista_final|1|Q3_nova|0|Q9",
"Entrevista_final|1|Q3_nova|0|Q9")), row.names = c(NA, -20L
), class = c("tbl_df", "tbl", "data.frame"))
How can i solve this?

use of pivot_wider to plot the evolution of variables in R

I would like to plot the evolution of the number of workers per category ("A", "D", "F", "I"), from 2017 to 2021, with a stacked bar chart (with the labels in the middle of each bar, for each category), one bar per year. Yet my dataset isn't in the right way to do this, I think I need to use pivot_wider() or pivot_longer() from what I have seen here, but I don't really know how to manipulate these functions. Could anyone help ?
Here is the structure of my dataset, for reproducibility :
structure(list(A = c("10", "7", "8", "8", "9", "Total"), D = c(23,
14, 29, 35, 16, 117), F = c(8, 7, 11, 6, 6, 38), I = c(449, 498,
415, 470, 531, 2363), annee = c("2017", "2018", "2019", "2020",
"2021", NA)), core = structure(list(A = c("10", "7", "8", "8",
"9"), D = c(23, 14, 29, 35, 16), F = c(8, 7, 11, 6, 6), I = c(449,
498, 415, 470, 531)), class = "data.frame", row.names = c(NA,
-5L)), tabyl_type = "two_way", totals = "row", row.names = c(NA,
6L), class = c("tabyl", "data.frame"))
library(tidyverse)
library(ggrepel)
df <- structure(list(A = c("10", "7", "8", "8", "9", "Total"), D = c(
23,
14, 29, 35, 16, 117
), F = c(8, 7, 11, 6, 6, 38), I = c(
449, 498,
415, 470, 531, 2363
), annee = c(
"2017", "2018", "2019", "2020",
"2021", NA
)), core = structure(list(A = c(
"10", "7", "8", "8",
"9"
), D = c(23, 14, 29, 35, 16), F = c(8, 7, 11, 6, 6), I = c(
449,
498, 415, 470, 531
)), class = "data.frame", row.names = c(
NA,
-5L
)), tabyl_type = "two_way", totals = "row", row.names = c(
NA,
6L
), class = c("tabyl", "data.frame"))
df |>
filter(!is.na(annee)) |>
mutate(A = as.double(A)) |>
pivot_longer(-annee, names_to = "category") |>
ggplot(aes(annee, value, fill = category, label = value)) +
geom_col() +
geom_label_repel(position = position_stack(), max.overlaps = 20)
Created on 2022-08-08 by the reprex package (v2.0.1)
Once you remove the total row, and ensuring that A through I are numeric, you can pivot_longer and pass to ggplot() like this:
data %>%
filter(A!="Total") %>%
mutate(across(A:I, as.numeric)) %>%
pivot_longer(cols = -annee, names_to = "group", values_to = "ct") %>%
ggplot(aes(annee,ct,fill=group)) +
geom_col()
I did not add the category labels, since group I dominates each year; you might want to reconsider that visualization

Align Text to geom_vline with varying location

I have a function that creates a histogram with an overlying density plot. The function also displays a red dotted line indicating alpha. Users can indicate the alpha level. Moreover, the count in the histogram will differ as a function of the input data. I want a label indicating alpha = 0.05(for example) next to the red dotted line. The label should always be next to the alpha line and always be near the top of the graph (I did not solve that). I´m aware of Align geom_text to a geom_vline in ggplot2, but they do not provide what I´m looking for (and/or produce error messages, I tried to reduce the size of the label by text=element_text(size=11) as suggested there, but that does not work).
Find below some sample code:
multiverse.p.histogram <- function(dataframe, pvalues, alpha = 0.05){
hist <- ggplot(dataframe, aes(x = p.value)) + geom_histogram(binwidth = 0.01, color = "black",fill = "dodgerblue") + theme_bw() + xlim(0,1) + geom_density(alpha = 0.5, fill = "#FF6666") +xlab("p-value") + ggtitle("Histogram of Multiverse P-Values") + geom_vline(xintercept = alpha, color = "red", linetype = "dashed") +
geom_text(aes(x = alpha, y = 75, label = "Alpha"), color = "red") +
theme(
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
return(hist)
}#close histogram function
#and some sample data
df_multiverse <- structure(list(transformation = c("normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal"), datatrimming = c("notrimming",
"notrimming", "notrimming", "notrimming", "notrimming", "notrimming",
"notrimming", "notrimming", "notrimming", "notrimming", "notrimming",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad"), fixedtrimming = c("min", "min", "min",
"min", "min", "minmax", "minmax", "minmax", "minmax", "minmax",
"nofixedtrimming", "min", "min", "min", "min", "min", "minmax",
"minmax", "minmax", "minmax", "minmax", "nofixedtrimming", "min",
"min", "min", "min", "min", "minmax", "minmax", "minmax", "minmax",
"minmax", "nofixedtrimming", "min", "min", "min", "min", "min",
"minmax", "minmax", "minmax", "minmax", "minmax", "nofixedtrimming",
"min", "min", "min", "min", "min", "minmax"), min = c("0.1",
"0.2", "0.3", "0.4", "0.5", "0.1", "0.2", "0.3", "0.4", "0.5",
NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1", "0.2", "0.3", "0.4",
"0.5", NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1", "0.2", "0.3",
"0.4", "0.5", NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1", "0.2",
"0.3", "0.4", "0.5", NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1"
), max = c("4.78103879314337", "4.78103879314337", "4.78103879314337",
"4.78103879314337", "4.78103879314337", "10", "10", "10", "10",
"10", NA, "1.50348972125673", "1.50348972125673", "1.50348972125673",
"1.50348972125673", "1.50348972125673", "10", "10", "10", "10",
"10", NA, "1.6673730851492", "1.6673730851492", "1.6673730851492",
"1.6673730851492", "1.6673730851492", "10", "10", "10", "10",
"10", NA, "1.82875939263309", "1.82875939263309", "1.82875939263309",
"1.82875939263309", "1.82875939263309", "10", "10", "10", "10",
"10", NA, "1.98682907108801", "1.98682907108801", "1.98682907108801",
"1.98682907108801", "1.98682907108801", "10"), DispersionMeasure = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2.5", "2.5", "2.5", "2.5", "2.5",
"2.5", "2.5", "2.5", "2.5", "2.5", "2.5", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3.5", "3.5", "3.5", "3.5",
"3.5", "3.5"), df = c(23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23), t.value = c(-1.96240490816673,
-1.91062435558061, -1.88913858576971, -1.50889838134833, -0.584414818091524,
-1.96240490816673, -1.91062435558061, -1.88913858576971, -1.50889838134833,
-0.584414818091524, -2.01035512741752, -2.32446732021548, -2.32446732021548,
-2.25138730178018, -1.75805360848308, -0.671509667928522, -2.32446732021548,
-2.32446732021548, -2.25138730178018, -1.75805360848308, -0.671509667928522,
-2.32446732021548, -2.07781942947361, -2.04327207374561, -1.96398718960439,
-1.45016152484876, -0.43329653628318, -2.07781942947361, -2.04327207374561,
-1.96398718960439, -1.45016152484876, -0.43329653628318, -2.07781942947361,
-3.1795493150037, -3.14621983607465, -3.03987566457514, -2.35519486220697,
-1.34118074962509, -3.1795493150037, -3.14621983607465, -3.03987566457514,
-2.35519486220697, -1.34118074962509, -3.19618807311348, -3.37575126770368,
-3.33582114002809, -3.25737102188504, -2.65364122964845, -1.74520405186558,
-3.37575126770368), p.value = c(0.0619242560601778, 0.0685974542038329,
0.0715464534237802, 0.14494031195569, 0.564630276572904, 0.0619242560601778,
0.0685974542038329, 0.0715464534237802, 0.14494031195569, 0.564630276572904,
0.056262190757649, 0.0292871811194525, 0.0292871811194525, 0.0342153500184824,
0.0920408256371383, 0.508584931329577, 0.0292871811194525, 0.0292871811194525,
0.0342153500184824, 0.0920408256371383, 0.508584931329577, 0.0292871811194525,
0.049074641173751, 0.0526459198825374, 0.0617296734199745, 0.160514579425126,
0.668835951230964, 0.049074641173751, 0.0526459198825374, 0.0617296734199745,
0.160514579425126, 0.668835951230964, 0.049074641173751, 0.00417775230313281,
0.00452298394363368, 0.00581820793330847, 0.0274164539383892,
0.192956766873482, 0.00417775230313281, 0.00452298394363368,
0.00581820793330847, 0.0274164539383892, 0.192956766873482, 0.00401507276581307,
0.00260719926285416, 0.00287129534969705, 0.00346795018735445,
0.0141919615636613, 0.0942977424474807, 0.00260719926285416),
estimate = c(-0.797956867083461, -0.776801900236937, -0.7455698051489,
-0.444049984838546, -0.10530217843728, -0.797956867083461,
-0.776801900236937, -0.7455698051489, -0.444049984838546,
-0.10530217843728, -0.820469748450972, -0.251308805770323,
-0.251308805770323, -0.251096848307402, -0.226028966303428,
-0.134612249858047, -0.251308805770323, -0.251308805770323,
-0.251096848307402, -0.226028966303428, -0.134612249858047,
-0.251308805770323, -0.265907227757688, -0.261504591915461,
-0.260164781545852, -0.225524157517464, -0.10176195202019,
-0.265907227757688, -0.261504591915461, -0.260164781545852,
-0.225524157517464, -0.10176195202019, -0.265907227757688,
-0.409969137221152, -0.405618224033153, -0.409494543344045,
-0.387356945276789, -0.329354185640372, -0.409969137221152,
-0.405618224033153, -0.409494543344045, -0.387356945276789,
-0.329354185640372, -0.422572659021681, -0.506062313897924,
-0.501186805248218, -0.510763602114717, -0.498830153358464,
-0.447892133899374, -0.506062313897924)), row.names = c("df",
"df1", "df2", "df3", "df4", "df5", "df6", "df7", "df8", "df9",
"df10", "df11", "df12", "df13", "df14", "df15", "df16", "df17",
"df18", "df19", "df20", "df21", "df22", "df23", "df24", "df25",
"df26", "df27", "df28", "df29", "df30", "df31", "df32", "df33",
"df34", "df35", "df36", "df37", "df38", "df39", "df40", "df41",
"df42", "df43", "df44", "df45", "df46", "df47", "df48", "df49"
), class = "data.frame")
#execute function
multiverse.p.histogram(df_multiverse, df_multiverse$p.value)
There are two problems with the code:
The alpha does not display next to the line, but on the line and I had to specify y = 75 manually. Ideally, it should always be shortly underneath the upper border. Finally, I can´t get the text size of the alpha to decrease. I tried nudge_x, but that produces the following error: Warnmeldungen:
1: Removed 2 rows containing missing values (geom_bar).
2: Removed 264 rows containing missing values (geom_text).
Does anyone have suggestions?
Thanks already!
Edit:
Based on the answers, here is my updated code:
multiverse.p.histogram <- function(dataframe, pvalues, alpha = 0.05){
ggplot(dataframe, aes(x = p.value)) +
geom_histogram(binwidth = 0.01, color = "black", fill = "dodgerblue") + #plots the histogram
geom_density(alpha = 0.5, fill = "#FF6666") + #adds densityplot
geom_vline(xintercept = alpha, color = "red", linetype = "dashed") + #adds alpha line
geom_text(x = alpha, hjust = -0.5, #adds alpha symbol next to line
y = Inf,
label = expression(paste(alpha)),
color = "red", check_overlap = TRUE,
vjust = "inward") +
ggtitle("Histogram of Multiverse P-Values") +
xlab("p-value") +
theme_bw() +
theme(axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
}
Here's a few tweaks to your function that may help:
Find out where the approximate upper limit of your plot will be by using the base R hist function. Use this as the position for alpha, then set the upper y limit as a small multiple of that to ensure everything fits nicely.
You only need a single alpha label, so don't map the text to an aesthetic. You can use x and y positions directly.
Use hjust to adjust your text position.
It makes your code easier to read and debug if you arrange the plot code so it all fits neatly across a single screen and is in a predictable order (I like ggplot then geoms then scales then lims, then labels then themes, but whatever order works best for you, stick to a consistent scheme.
multiverse.p.histogram <- function(dataframe, pvalues, alpha = 0.05)
{
upper <- max(hist(dataframe$p.value, breaks = seq(0, 1, 0.01))$counts)
ggplot(dataframe, aes(x = p.value)) +
geom_histogram(binwidth = 0.01, color = "black", fill = "dodgerblue") +
geom_density(alpha = 0.5, fill = "#FF6666") +
geom_vline(xintercept = alpha, color = "red", linetype = "dashed") +
geom_text(x = alpha, hjust = -0.25,
y = upper,
label = "Alpha",
color = "red", check_overlap = TRUE) +
coord_cartesian(xlim = c(0, 1)) +
xlim(-0.01, 1) +
ylim(0, upper * 1.1) +
ggtitle("Histogram of Multiverse P-Values") +
xlab("p-value") +
theme_bw() +
theme(axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
}

How to change the font size of the tick labels in contour plot colorbar in R?

I am using plot_ly to plot a contour plot. The code is as follows:
X = c(1, 2, 3, 4, 5, 6)
Y = c(seq(from = 1, to = 17, by = 1))
Z = matrix(runif(17*6), ncol=17)
m <- list(colorbar=list(title = "hi"))
plot_ly(
x = X,
y = Y,
z = t(Z),
type = "contour",
opacity = 100,
fillcolor = "FALSE",
line = list(width = 1.5),
contours = list(showlabels = TRUE,
labelfont = list(size = fs, color = 'white')),
color = I('black')
) %>%
layout(xaxis = list(title = "X", titlefont = list(size = fs),
ticktext = c("9:00", "9:30", "10:00", "10:30", "11:00", "11:30"),
tickvals = c(1, 2, 3, 4, 5, 6),
tickmode = "array",
tickfont = list(size=fs)),
yaxis = list(title = "Y", titlefont = list(size = fs),
ticktext = c("0", "30", "60", "90", "120", "150", "180", "210", "240",
"270", "300", "330", "360", "390", "420", "450", "480"),
tickvals = c(1:17),
tickmode = "array",
tickfont = list(size=fs)))
The output I get is:
How do I change the font size of the colorbar tick labels (encircled in red)?
Just include colorbar=list(tickfont=list(size=25)) in your chart setup.
I had some problems with your sample code, so this will have to do:
Plot:
Code:
library(plotly)
library(tidyr)
library(dplyr)
p <- plot_ly(
x = c(-9, -6, -5, -3, -1),
y = c(0, 1, 4, 5, 7),
z = matrix(c(10, 10.625, 12.5, 15.625, 20, 5.625, 6.25, 8.125, 11.25, 15.625, 2.5, 3.125, 5, 8.125, 12.5, 0.625, 1.25, 3.125,
6.25, 10.625, 0, 0.625, 2.5, 5.625, 10), nrow = 5, ncol = 5),
type = "contour",
colorbar=list(tickfont=list(size=25, color='red')),
contours = list(
start = 0,
end = 8,
size = 2)
)
p

Why hover in plotly barchart does not work?

I've got data like this ...
# rok miesiac ile kwartal miesiac2 kwartal2 miesiac3 limit serwis typ ile2 ile_proc lp
# (dbl) (dbl) (dbl) (dbl) (chr) (fctr) (chr) (dbl) (chr) (chr) (dbl) (dbl) (dbl)
# 1 2017 1 31.5 1 1 Q1 2017 Styczeń 0 Sport wizyty 32.5 97 1
# 2 2017 2 1.0 1 2 Q1 2017 Luty 0 Sport wizyty 32.5 3 1
... and I try to draw this plot from plotly library ...
plot_ly(tab,
x = ~lp,
y = ~ile,
color = ~miesiac2,
type = "bar",
text = ~miesiac3,
hoverinfo = "text")
... and everything is ok but hover. It does not work and I have no idea why. What is curious when I have the same format of data but a bit 'longer', everything works.
I have no idea where the problem is. I hope you do!
Simple data:
structure(list(rok = c(2017, 2017), miesiac = c(1, 2), ile = c(31.5,
1), kwartal = c(1, 1), miesiac2 = c("1", "2"), kwartal2 = structure(c(1L,
1L), .Label = "Q1 2017", class = "factor"), miesiac3 = c("Styczeń",
"Luty"), limit = c(97, 97), serwis = c("Sport", "Sport"), typ = c("wizyty",
"wizyty"), ile2 = c(32.5, 32.5), ile_proc = c(97, 3), lp = c(1,
1)), class = "data.frame", .Names = c("rok", "miesiac", "ile",
"kwartal", "miesiac2", "kwartal2", "miesiac3", "limit", "serwis",
"typ", "ile2", "ile_proc", "lp"), row.names = c(NA, -2L))
'Longer' data:
structure(list(rok = c(2016, 2016, 2016, 2016, 2016, 2016, 2016,
2016, 2016, 2017, 2017), miesiac = c(4, 5, 6, 7, 8, 9, 10, 11,
12, 1, 2), ile = c(80.1, 87.5, 159, 104, 125.3, 74.2, 84.9, 74.4,
75.3, 81.8, 2.4), kwartal = c(2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1
), miesiac2 = c("1", "2", "3", "1", "2", "3", "1", "2", "3",
"1", "2"), kwartal2 = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L, 4L, 4L), .Label = c("Q2 2016", "Q3 2016", "Q4 2016",
"Q1 2017"), class = "factor"), miesiac3 = c("Kwiecień", "Maj",
"Czerwiec", "Lipiec", "Sierpień", "Wrzesień", "Październik",
"Listopad", "Grudzień", "Styczeń", "Luty"), limit = c(308, 308,
308, 300, 300, 300, 245, 245, 245, 244, 244), serwis = c("Sport",
"Sport", "Sport", "Sport", "Sport", "Sport", "Sport", "Sport",
"Sport", "Sport", "Sport"), typ = c("odslony", "odslony", "odslony",
"odslony", "odslony", "odslony", "odslony", "odslony", "odslony",
"odslony", "odslony"), ile2 = c(326.6, 326.6, 326.6, 303.5, 303.5,
303.5, 234.6, 234.6, 234.6, 84.2, 84.2), ile_proc = c(25, 27,
49, 34, 41, 24, 36, 32, 32, 97, 3), lp = c(1, 1, 1, 2, 2, 2,
3, 3, 3, 4, 4)), class = "data.frame", .Names = c("rok", "miesiac",
"ile", "kwartal", "miesiac2", "kwartal2", "miesiac3", "limit",
"serwis", "typ", "ile2", "ile_proc", "lp"), row.names = c(NA,
-11L))
The plotting works for me, although I do get a warning
Warning message:
In RColorBrewer::brewer.pal(N, "Set2") :
minimal value for n is 3, returning requested palette with 3 different levels
This is just a warning and can in this case be ignored. For the curious, it originates from RColorBrewer and can be avoided by manually specifying the colors.
library(RColorBrewer)
# display.brewer.all() # see all the palettes
# generate colors beforehand - same warning. extract only first two
cols <- brewer.pal(n = 2, name = "Set2")[1:2]
plot_ly(xyshort,
x = ~lp,
y = ~ile,
color = ~miesiac2,
colors = cols, # explicitly name colors
type = "bar",
text = ~miesiac3,
hoverinfo = "text")
R 3.3.2 on Windows 7 and plotly_4.5.6.

Resources