How to add manually pvalue on geom_barplot with group? - r

I have this dataframe
bat=rep(c("A", "B", "C", "D"),3)
loc=c(rep("ab",4), rep("te",4), rep("po", 4))
value=c(17,8,14,20,2,9,11,18,5,17,7,14)
tot=rep(c(30,45,36,58),3)
say=rep()
dt=data_frame(bat, loc, value, tot) %>% mutate(pct=value/tot*100) %>% mutate(se=sqrt(((pct*(100-pct))/(tot)))) %>% mutate(value2=tot-value)
> dt
# A tibble: 12 x 7
bat loc value tot pct se value2
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A ab 17 30 56.7 9.05 13
2 B ab 8 45 17.8 5.70 37
3 C ab 14 36 38.9 8.12 22
4 D ab 20 58 34.5 6.24 38
5 A te 2 30 6.67 4.55 28
6 B te 9 45 20 5.96 36
7 C te 11 36 30.6 7.68 25
8 D te 18 58 31.0 6.07 40
9 A po 5 30 16.7 6.80 25
10 B po 17 45 37.8 7.23 28
11 C po 7 36 19.4 6.60 29
12 D po 14 58 24.1 5.62 44
I made fisher test between each pair of bat for each loc with this following code.
dt_ab=dt %>% filter(loc=="ab")
dt_po=dt %>% filter(loc=="po")
dt_te=dt %>% filter(loc=="te")
# ab
idx = t(combn(seq_along(dt_ab$bat),2))
res_ab = lapply(1:nrow(idx),function(i){
test = fisher.test(dt_ab[idx[i,],c("value","value2")])
data.frame(
group1 = dt_ab$bat[idx[i,1]],
group2 = dt_ab$bat[idx[i,2]],
loc=dt_ab$loc[idx[i,1]],
odds_ratio = as.numeric(test$estimate),
p = as.numeric(test$p.value)
)
})
res_ab = do.call(rbind,res_ab)
res_ab
# po
idx = t(combn(seq_along(dt_po$bat),2))
res_po = lapply(1:nrow(idx),function(i){
test = fisher.test(dt_po[idx[i,],c("value","value2")])
data.frame(
group1 = dt_po$bat[idx[i,1]],
group2 = dt_po$bat[idx[i,2]],
loc=dt_po$loc[idx[i,1]],
odds_ratio = as.numeric(test$estimate),
p = as.numeric(test$p.value)
)
})
res_po = do.call(rbind,res_po)
res_po
# te
idx = t(combn(seq_along(dt_te$bat),2))
res_te = lapply(1:nrow(idx),function(i){
test = fisher.test(dt_te[idx[i,],c("value","value2")])
data.frame(
group1 = dt_te$bat[idx[i,1]],
group2 = dt_te$bat[idx[i,2]],
loc=dt_te$loc[idx[i,1]],
odds_ratio = as.numeric(test$estimate),
p = as.numeric(test$p.value)
)
})
res_te= do.call(rbind,res_te)
res_te
res=NULL
res=rbind(res_ab, res_po, res_te)
res
group1 group2 loc odds_ratio p
1 A B ab 5.8817685 0.0009288055
2 A C ab 2.0321378 0.2157927844
3 A D ab 2.4578341 0.0678462682
4 B C ab 0.3445393 0.0451456088
5 B D ab 0.4143084 0.0750566107
6 C D ab 1.2066231 0.6665182345
7 A B po 0.3341536 0.0700086821
8 A C po 0.8309230 1.0000000000
9 A D po 0.6317547 0.5859688210
10 B C po 2.4870606 0.0896596469
11 B D po 1.8959538 0.1934540389
12 C D po 0.7608147 0.7994419705
13 A B te 0.2899040 0.1823689517
14 A C te 0.1664441 0.0270616991
15 A D te 0.1614577 0.0141330017
16 B C te 0.5722244 0.3084931936
17 B D te 0.5586957 0.2609751992
18 C D te 0.9780082 1.0000000000
Now I would like to add manually the pvalue and the odd ratio for each pair on the following barplot
I saw we can use the stat_pvalue_manual function to do that, but I have some difficulties to use it. Please, someone can have look on my code and tell me what is wrong. Below is the code and the plot I obtained.
bp_tab_pct<- ggplot(dt, aes(x=loc, y=pct, fill=cat))+
geom_bar(stat = "identity",position="dodge") +
scale_fill_manual(values = c("A"= "#5977FF", "B"="#FF7F50", "C"= "#00CED1", "D" = "#FFFF33"))+
theme(plot.title = element_text(color="black", size=15, face="bold.italic")) +
labs(fill = "") +
ylim(c(0,100))+
ggtitle(label = "")+
geom_text(aes(y = pct+se+2, label = paste(round(pct,2),"%", "\n","(",value,"/",tot,")")), color = "black",size=5, position = position_dodge(width = 0.9))+
theme(plot.title = element_text(color="black", size=15, face="bold.italic"))+
theme(axis.text.x = element_text(size=16), axis.text.y = element_text(size=14))+
theme(axis.title.y = element_text(size=16), axis.title.x = element_blank(),) +
geom_errorbar(aes(ymin=pct-se, ymax=pct+se), width=.2,
position=position_dodge(.9)) +
stat_pvalue_manual(res, y.position = 35, step.increase = 0.1,
label = "p", inherit.aes = FALSE)
enter image description here
I also tried with facet insted of group but I still don't have the plot I want.
It seems that the stat_pvalue_manual function considere only the 10 first lines and I don't know why.
bp_tab_pct<- ggplot(dt, aes(x=bat, y=pct, fill=bat))+
facet_wrap(~loc) +
geom_bar(stat = "identity",position="dodge") +
scale_fill_manual(values = c("A"= "#5977FF", "B"="#FF7F50", "C"= "#00CED1", "D" = "#FFFF33"))+
theme(plot.title = element_text(color="black", size=15, face="bold.italic")) +
labs(fill = "") +
ylim(c(0,100))+
ggtitle(label = "")+
geom_text(aes(y = pct+se+2, label = paste(round(pct,2),"%", "\n","(",value,"/",tot,")")), color = "black",size=5, position = position_dodge(width = 0.9))+
theme(plot.title = element_text(color="black", size=15, face="bold.italic"))+
theme(axis.text.x = element_text(size=16), axis.text.y = element_text(size=14))+
theme(axis.title.y = element_text(size=16), axis.title.x = element_blank(),) +
geom_errorbar(aes(ymin=pct-se, ymax=pct+se), width=.2,
position=position_dodge(.9)) +
stat_pvalue_manual(res, y.position = 35, step.increase = 0.1,
label = "p", inherit.aes = FALSE)
enter image description here

I understood the problem. It was du to the ylim masking the highest values on the y axis. Now I would like to be able to adjust for the y position of each pvalue as I did for the pct indicated up to the bar but I will post another question for that
enter image description here

Related

ggplot in R define breaks for character column in x axis

graf4<-ggplot(DF,
aes(x=variable,y=value,color=paese)) +
geom_line(aes(linetype=paese,group=paese),size=2.5)+
theme_bw()+
labs(x="",y="")+
scale_colour_manual(values=c("darkorange1","darkorchid1","darkgreen","steelblue3","maroon2","grey2","yellow4",
"burlywood4", "chocolate"))+
scale_y_continuous(labels=fmt_dcimals(1))+
theme(panel.grid.major = element_line(colour = "grey",size=0.1)) +
theme(panel.grid.minor = element_line(colour = "white",linetype="dashed",size=0.1)) +
theme(strip.text.y=element_text(angle=0))+
theme(axis.text.x = element_text(angle=90,vjust=0.5,size=12),
axis.text.y = element_text(size=12),
legend.position = "bottom",
legend.box="horizontal",
legend.box.background=element_rect(),
legend.title = element_blank(),
legend.text=element_text(size=12))+
guides(col=guide_legend(nrow=1,byrow=TRUE))
print(graf4)
The code above work but my problem is that the tick (label) on x axis are too many and I would like to have less tick on the graph. For example have a breaks every 3 observation.
Note that VARIABLE isn't date format but character format. I dont want, for some other reason date format.
I tried with scale_x_discrete, scale_x_continuous and other instruction with parameters or not but I cant have on the x axis every 3 observation tick, for example.
I know that for numeric type on the x axis I cant set a breaks, but I don't know hot to set for character value type.
Variable * value * paese
#1 1999 q1 12 UK
#2 1999 q2 15 UK
#3 1999 q3 55 UK
#4 1999 q4 67 UK
#5 1999 q1 12 DE
#6 1999 q2 15 DE
#7 1999 q3 55 DE
#8 1999 q4 67 DE
#9 2000 q1 33 UK
#10 2000 q2 23 UK
#11 2000 q3 65 UK
#12 2000 q4 34 UK
#13 2000 q1 33 DE
#14 2000 q2 23 DE
#15 2000 q3 65 DE
#16 2000 q4 34 DE
# .... . ..
#45 2023 q1 23 UK
#46 2023 q2 11 UK
#47 2023 q3 23 UK
#48 2023 q1 23 DE
#49 2023 q2 11 DE
#50 2023 q3 23 DE
Something like this perhaps?
library(tidyverse)
df <- data.frame(variable = rep(LETTERS[1:21], 2, each = 2),
value = round(rnorm(84, 10, 3), 0),
paese = c("UK", "US"))
df |>
ggplot(aes(variable, value)) +
geom_line(aes(linetype = paese, group = paese)) +
scale_x_discrete(breaks = unique(df$variable)[seq(1, length(unique(df$variable)), 3)])
Created on 2023-02-17 with reprex v2.0.2
You can pass a custom labelling function in scale_x_discrete via the labels = parameter. This allows you to make blank labels for any of the points on the x axis.
For example, suppose I wanted to only show every year instead of every quarter. I could look for the string "q1" inside each label, and if it is present, return just the year component. If "q1" is not present, we would return an empty string:
ggplot(DF, aes(Variable, value, color = paese)) +
geom_line(aes(linetype = paese, group = paese), size = 2.5) +
scale_x_discrete(labels = ~ifelse(grepl('q1', .x), sub(' q1', '', .x), ''),
name = NULL) +
scale_colour_manual(values = c("darkorange1", "darkorchid1")) +
scale_y_continuous(NULL, labels = fmt_dcimals(1)) +
guides(col = guide_legend(nrow = 1, byrow = TRUE)) +
theme_bw() +
theme(panel.grid.major = element_line(colour = "grey", size = 0.1),
panel.grid.minor = element_blank(),
strip.text.y = element_text(angle = 0),
axis.text.x = element_text(size = 12, vjust = -1),
axis.text.y = element_text(size = 12),
legend.position = "bottom",
legend.box = "horizontal",
legend.box.background = element_rect(),
legend.title = element_blank(),
legend.text = element_text(size = 12))

Euclidean distance from a point (x,y) to the diagonal line not make sense when plotting the dots

I have this data frame:
data_new
X Y dis
1 123.99561 51.91563 51
2 37.88886 58.53972 15
3 141.40895 63.65503 55
4 119.14452 30.76315 62
5 274.76553 86.58737 133
6 47.52072 45.31681 2
7 95.85810 50.61795 32
8 67.15455 32.80465 24
9 154.51721 47.30627 76
10 48.44236 28.53955 14
11 31.81736 37.71708 4
12 25.88461 30.42648 3
13 236.74989 67.73648 120
14 99.48136 24.94934 53
15 23.36206 39.49535 11
16 24.44889 37.43396 9
17 113.75209 80.35577 24
18 95.45518 29.98268 46
19 28.81779 43.61353 10
20 199.56171 26.04967 123
21 111.38701 40.14422 50
22 284.14249 59.72049 159
23 63.29509 25.16359 27
24 227.48874 41.13283 132
25 125.92173 84.18931 30
26 51.42495 46.09882 4
27 215.53986 54.20816 114
28 81.68309 48.08828 24
29 144.99979 79.08405 47
30 125.25327 33.18417 65
31 107.41949 27.30456 57
32 68.18888 34.59822 24
33 24.66347 53.77048 21
34 142.25804 39.52901 73
35 45.39493 21.50758 17
36 410.58755 168.68774 171
37 37.72387 20.73628 12
38 88.80147 24.92792 45
39 104.97260 20.85106 59
40 180.27368 72.85551 76
41 181.45314 45.02298 96
42 191.04002 41.63631 106
43 39.53319 26.83474 9
44 102.12894 42.60350 42
45 215.98138 74.87252 100
46 110.32643 32.65773 55
47 111.40245 106.77119 3
48 444.09225 150.84842 207
49 104.60363 47.16242 41
50 207.58011 35.09406 122
The col "dis" is the calculation of each dot's distance to the diagonal line.
Calculated as abs(Y - X) / sqrt(2).
Then, I plot out:
ggplot(aes(x = X, y = Y)) +
geom_point(aes(x = X, y = Y), alpha = 1, size = 2, colour = "red") +
geom_text_repel(aes(x = X, y = Y, label = dis), max.overlaps = 20) +
geom_abline(intercept = c(0, 1, -1), slope = 1, linetype = c("solid","dashed", "dashed"), color = c("blue", "orange", "orange")) +
scale_x_continuous("X",
trans = "asinh",
limits = c(1, 10^5, 10),
breaks = c(1,10,100,1000,10000,100000),
labels = trans_format("log10", math_format(10^.x))) +
scale_y_continuous("Y",
trans = "asinh",
limits = c(1, 10^5, 10),
breaks = c(1,10,100,1000,10000,100000),
labels = trans_format("log10", math_format(10^.x)))+
annotation_logticks(sides = "bl", outside = TRUE) +
annotation_logticks(sides = "bl", outside = TRUE) +
theme(plot.margin = margin(10, 0, 10, 10),
plot.title = element_text(color = "black", size = 16, hjust = 0.5),
plot.subtitle = element_text(color = "black", size = 16, hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black", fill = NA, size = 0.5),
panel.background = element_rect(fill = "white", colour = "black"),
axis.text.x = element_text(vjust = -1, size = 15, colour = "black"),
axis.text.y = element_text(hjust = -1, size = 15, colour = "black"),
axis.title.x = element_text(vjust = -0.5, size = 18),
axis.title.y = element_text(vjust = 3, size = 18))
The plot looks so different from my expectation.
first, I lay down two parallel lines along the diagonal line, intercept = 1/-1, theoretically, the dots on the y = x+1 line should have abs(1)/sqrt(2) distance from y=x line. But based on those calculated dots, all of them have "dis" value of more than 1. Did I run the wrong calculation?
Second, the dot has a value 207 is even closed to the diagonal line than the dot has a value 159. I'm so confused.
I asinh transferred the x and y-axis, is that the reason that may impact the plotting?
Thanks.

Creating a bar graph with several variables in the x axis

I have the following dataset (graph_data):
# A tibble: 18 x 7
construction phase group mean se se_top se_bottom
<chr> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
1 hacer pre-test heritage 7.67 3.67 11.3 4
2 hacer treatment heritage 15.5 3.00 18.5 12.5
3 hacer post-test heritage 9.83 4.25 14.1 5.58
4 acc pre-test heritage 0.166 0.166 0.332 0
5 acc treatment heritage 4.33 2.67 7.00 1.67
6 acc post-test heritage 0.166 0.166 0.332 0
7 spe pre-test heritage 2.33 1.36 3.69 0.975
8 spe treatment heritage 6.67 2.69 9.36 3.98
9 spe post-test heritage 0.833 0.477 1.31 0.356
10 hacer pre-test monolingual 1 0.707 1.71 0.293
11 hacer treatment monolingual 1 0.577 1.58 0.423
12 hacer post-test monolingual 0.25 0.25 0.5 0
13 acc pre-test monolingual 0 0 0 0
14 acc treatment monolingual 1 0.577 1.58 0.423
15 acc post-test monolingual 0 0 0 0
16 spe pre-test monolingual 4 3.37 7.37 0.634
17 spe treatment monolingual 15.8 2.36 18.1 13.4
18 spe post-test monolingual 3.5 3.18 6.68 0.325
I want to create a bar graph using ggplot2 with the following conditions:
y axis: mean
x axis: phase + construction
facet: group
error bars: standard error (se_top, se_bottom)
I have used this code:
graph_data %>%
ggplot(aes(graph_data, x=phase, y=mean, fill =phase)) +
geom_bar(stat = "identity", color = "black", position = "dodge") +
scale_y_continuous(limits = c(0,20)) +
labs(x = "Phase", y = "Average number of targets produced") +
facet_wrap( ~ group) +
geom_errorbar(aes(ymin= se_bottom, ymax = se_top), width=.2,
position=position_dodge(.9)) +
theme(text = element_text(size=20)) +
theme_classic() + scale_fill_manual(values=c("#90EE90", "#3CB371", "#2E8B57")) +
theme(axis.text=element_text(size=16),
axis.title=element_text(size=15,face="bold"),
axis.text.x = element_text(angle=45, hjust = 1),
legend.position = "none")
However, in the graph that I get, columns are stacked on top of each other, even though I have used position = "dodge" in my code:
What should I change in order to get the graph I want?
Maybe this can be useful:
library(ggplot2)
#Code
graph_data %>%
ggplot(aes(graph_data, x=interaction(phase,construction), y=mean, fill =phase,group=group)) +
geom_bar(stat = "identity", color = "black", position = position_dodge(0.9)) +
scale_y_continuous(limits = c(0,20)) +
labs(x = "Phase", y = "Average number of targets produced") +
facet_wrap( ~ group) +
geom_errorbar(aes(ymin= se_bottom, ymax = se_top), width=.2,
position=position_dodge(.9)) +
theme(text = element_text(size=20)) +
theme_classic() + scale_fill_manual(values=c("#90EE90", "#3CB371", "#2E8B57")) +
theme(axis.text=element_text(size=16),
axis.title=element_text(size=15,face="bold"),
axis.text.x = element_text(angle=45, hjust = 1),
legend.position = "none")
Output:
Update: In order to get some order, try this:
#Code 2
graph_data %>%
mutate(phase=factor(phase,levels = c('pre-test','treatment','post-test'),
ordered = T)) %>%
arrange(phase) %>%
mutate(conc=paste(as.character(phase),construction),
conc=factor(conc,levels = unique(conc),ordered = T)) %>%
ggplot(aes(graph_data, x=conc, y=mean, fill =phase,group=group)) +
geom_bar(stat = "identity", color = "black", position = position_dodge(0.9)) +
scale_y_continuous(limits = c(0,20)) +
labs(x = "Phase", y = "Average number of targets produced") +
facet_wrap( ~ group) +
geom_errorbar(aes(ymin= se_bottom, ymax = se_top), width=.2,
position=position_dodge(.9)) +
theme(text = element_text(size=20)) +
theme_classic() + scale_fill_manual(values=c("#90EE90", "#3CB371", "#2E8B57")) +
theme(axis.text=element_text(size=16),
axis.title=element_text(size=15,face="bold"),
axis.text.x = element_text(angle=45, hjust = 1),
legend.position = "none")
Output:

Proper x axis scale with years only

I have a grid with two plots, each one consist of two time series of mean values: one come from an elaboration with R df5 the other one mmzep is not (I received this dataset already calculated).
library(dplyr)
library(lubridate)
df5 <- data.frame(df$Date, df$Price)
colnames(df5)<- c("date","price")
df5$date <- as.Date(df5$date,"%Y/%m/%d")
df5$price<- as.numeric(gsub(",",".",df5$price))
colnames(mmzep)<- c("date","Mar","Apr")
Then, I created other two dfs from df5 , I tried to group in only one df, but I was not able to do it.
meanM <- df5 %>%
mutate(Month = month(date), Year = year(date)) %>%
filter(month(df5$date) %in% 3 & year(df5$date) %in% 2010:2019) %>%
group_by(Year, Month) %>%
summarise_all(list(mean=mean, sd=sd), na.rm=TRUE) %>%
na.omit()
Year Month date_mean price_mean date_sd price_sd
<dbl> <dbl> <date> <dbl> <dbl> <dbl>
1 2010 3 2010-03-23 1082. 5.48 685.
2 2012 3 2012-03-27 858. 2.74 333.
3 2015 3 2015-03-16 603. 8.86 411.
4 2017 3 2017-03-15 674. 9.65 512.
5 2018 3 2018-03-16 318. 9.09 202.
6 2019 3 2019-03-14 840. 9.42 329.
meanA <- df5 %>%
mutate(Month = month(date), Year = year(date)) %>%
filter(month(df5$date) %in% 4 & year(df5$date) %in% 2010:2019) %>%
group_by(Year, Month) %>%
summarise_all(list(mean=mean, sd=sd), na.rm=TRUE) %>%
na.omit()
Year Month date_mean price_mean date_sd price_sd
<dbl> <dbl> <date> <dbl> <dbl> <dbl>
1 2010 4 2010-04-18 361. 9.00 334.
2 2011 4 2011-04-14 527. 8.36 312.
3 2012 4 2012-04-15 726. 8.80 435.
4 2013 4 2013-04-16 872. 8.50 521.
5 2014 4 2014-04-09 668. 5.34 354.
6 2015 4 2015-04-15 689. 8.80 436.
7 2017 4 2017-04-15 806. 8.80 531.
8 2018 4 2018-04-15 727. 8.80 291.
9 2019 4 2019-04-15 600. 8.94 690.
#mmzep
date Mar Apr
<dbl> <dbl> <dbl>
1 2010 793. 540
2 2011 650 378.
3 2012 813. 612.
4 2013 755. 717
5 2014 432. 634
6 2015 474. 782.
7 2016 590 743.
8 2017 544. 628
9 2018 249. 781
10 2019 547. 393
I plot the dfs
g5 = ggplot() +
geom_point(data=meanM, aes(x = (Year), y = (price_mean)),size = 3, colour="gray40") +
geom_point(data=mmzep, aes(x= (date), y=(Mar)), size =3, colour = "red") +
geom_line(data=meanM, aes(group = 1, x = (Year), y = (price_mean)), colour="gray40") +
geom_line(data=mmzep, aes(x = (date), y = (Mar)), colour="red") +
stat_smooth(data=meanM,aes(group = 1, x = (Year), y = (price_mean)),
method = "lm", size = 1, se = FALSE, formula = y ~ x,
colour = "black") +
stat_smooth(data=mmzep, aes(x = (date), y = (Mar)),
method = "lm", size = 1, se = FALSE, formula = y ~ x,
colour = "red3") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1500)) +
theme(panel.background = element_rect(fill = 'white', colour = 'black'),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.length = unit(-0.25, "lines"),
plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
axis.text.x = element_text(margin = margin(t = 0.25, unit = "cm")),
axis.text.y = element_text(margin = margin(r = 0.25, unit = "cm"))) +
labs(y = expression(March),
x = NULL) +
theme(axis.text.x = element_text(size=10),
axis.title = element_text(size=10))
I plot g5 and g6 in the same way, than the grid, to obtain this:
enter image description here
As you can see the x axis is not correct, I tried scale_x_date(breaks="year", labels=date_format("%Y")) , scale_x_discrete(labels=c("2010","2011","2012","2013","2014","2015","2016","2017","2018","2019")), scale_x_continuous in different ways.
I also tried mmzep$date <- as.Date(mmzep$date,"%Y") but I saw the R needs a day (in my case a day and a month?) mmzep$date <- as.Date(paste("01", mmzep$date, sep="/"), "%d/%m/%Y") , but R substitutes the years with NA. I think that the errors is in the the way R see the date in mmzep, but I don't understand how can I made R recognized the correct object.
Anyone have any suggestion? Thanks in advance!
There are a few ways to do this. In your data, your year values are stored as type double. This tells ggplot that you have a continuous variable. If you want to leave your data as is, then the solution is
+ scale_x_continuous(breaks = seq(2010, 2020, 2))
# or something else that expressly lists the years you want to see on the axis.
You cannot use scale_x_date without your year data being converted to a date. You can do that with, for example
MeanM$Year <- as.Date(paste(MeanM$Year, "01", "01", sep = "/"))
Then you can use
+ scale_x_date(date_labels = "%Y")
Or you can convert your years into discrete data with factor. You cannot use scale_x_discrete on a continuous variable.
MeanM$Year <- factor(MeanM$Year)
And then use
+ scale_x_discrete()
Try this approach tested on MeanM without using mmzep which we do not have data. The issue is that as you are using multiple geom the functions are adding strange labels to axis. Changing all x-axis variables to factor can alleviate the issue. In the case of mmzep with aes(x= (date),..) also be careful on formating the date as year with a code like this aes(x= factor(format(date,'%Y')) so that all labels fit well into axis. Here the code:
#Code
ggplot() +
geom_point(data=meanM, aes(x = factor(Year), y = (price_mean)),size = 3, colour="gray40") +
geom_line(data=meanM, aes(group = 1, x = factor(Year), y = (price_mean)), colour="gray40") +
stat_smooth(data=meanM,aes(group = 1, x = factor(Year), y = (price_mean)),
method = "lm", size = 1, se = FALSE, formula = y ~ x,
colour = "black") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1500)) +
theme(panel.background = element_rect(fill = 'white', colour = 'black'),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.length = unit(-0.25, "lines"),
plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
axis.text.x = element_text(margin = margin(t = 0.25, unit = "cm")),
axis.text.y = element_text(margin = margin(r = 0.25, unit = "cm"))) +
labs(y = expression(March),
x = NULL) +
theme(axis.text.x = element_text(size=10),
axis.title = element_text(size=10))
Output:
Some data used:
#Data
meanM <- structure(list(Year = c(2010L, 2012L, 2015L, 2017L, 2018L, 2019L
), Month = c(3L, 3L, 3L, 3L, 3L, 3L), date_mean = c("23/03/2010",
"27/03/2012", "16/03/2015", "15/03/2017", "16/03/2018", "14/03/2019"
), price_mean = c(1082L, 858L, 603L, 674L, 318L, 840L), date_sd = c(5.48,
2.74, 8.86, 9.65, 9.09, 9.42), price_sd = c(685L, 333L, 411L,
512L, 202L, 329L), Year2 = structure(1:6, .Label = c("2010",
"2012", "2015", "2017", "2018", "2019"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")

How to change the linetype of one of two lines

I would like to change the linetype of one of my two lines in the plot, only making "line1" into a dashed one.
My plot:
My data looks like as bellow:
Year Sex value Rate Group
<dbl> <chr> <dbl> <dbl> <chr>
1 1912 Female 18 1.14 A
2 1912 Male 52 0.893 L
3 1913 Female 25 1.02 A
4 1913 Male 42 1.05 L
5 1914 Female 14 1.26 A
6 1914 Male 67 1.29 L
7 1915 Female 25 1.32 A
8 1915 Male 61 1.45 L
9 1916 Female 32 1.52 A
10 1916 Male 71 1.64 L
11 1917 Female 42 2.01 A
12 1917 Male 92 1.87 L
My code:
data %>% ggplot() +
geom_bar(aes(x = Year, y = value, fill= Sex), stat = "identity",
width=0.8,
alpha=0.8) +
geom_line(aes(x = Year, y = Rate * 100, colour= Group),
size = 1.0) +
scale_colour_manual(labels = c("line1","line2"),
values = c("red","blue"))+
scale_fill_manual(values = c("Female"="green","Male"="black"))+
guides(fill=guide_legend(title = "Number"),
color=guide_legend(title= "Ratio"))
Could anyone help? I tried for quite a while but failed. Thanks in advance.
As suggested in comments, and using the "name" term in each scale_*_manual to specify the legend names:
data %>%
ggplot() +
geom_bar(aes(x = Year, y = value, fill= Sex), stat = "identity",
width=0.8,
alpha=0.8) +
geom_line(aes(x = Year, y = Rate * 100,
colour = Group, linetype = Group),
size = 1.0) +
scale_colour_manual(name = "Ratio",
labels = c("line1","line2"),
values = c("red","blue"))+
scale_linetype_manual(name = "Ratio",
labels = c("line1","line2"),
values = c("dashed","solid"))+
scale_fill_manual(name = "Number",
values = c("Female"="green","Male"="black"))

Resources