How to show legend values as percent - r

For the following data:
RW GA Freq percFreq
0 0 9 0.13043478
0 3 1 0.01449275
0 14 1 0.01449275
0 16 1 0.01449275
0 23 1 0.01449275
0 25 1 0.01449275
0 29 2 0.02898551
0 30 1 0.01449275
2 30 1 0.01449275
15 30 2 0.02898551
19 30 1 0.01449275
22 30 1 0.01449275
24 30 1 0.01449275
29 30 1 0.01449275
30 29 16 0.23188406
30 30 29 0.42028986
I would like to change the legend values in the following plot to be shown as percent:
The script to generate the plot is:
ggplot(counts, aes(x=RW, y=GA, size=Freq, color=as.factor(percFreq))) + geom_point(alpha=0.7) +
scale_size(range = c(1, 10), name="Freq", limits=c(1,30), breaks=lbreaks) +
scale_color_discrete(name="Freq", breaks=lbreaks)
Basically, instead of showing 0.42028986 in the legend, I want it to be shown as 42%.
How can I do that?

Use 'percent' from 'scales' library.
Load the scales library:
library(scales)
And add labels = percent to your discrete scale:
ggplot(counts, aes(x=RW, y=GA, size=Freq, color=as.factor(percFreq))) +
geom_point(alpha=0.7) +
scale_size(range = c(1, 10), name="Freq", limits=c(1,30), breaks=lbreaks) +
scale_color_discrete(name="Freq", breaks=lbreaks, labels = percent(lbreaks, accuracy = .01))
If you want to change how it rounds the number, use the accuracy argument:
scales::percent(percFreq, accuracy = .001)
(this has accuracy = .1)
Hope this helps.

You can either transform percFreq into percentages
df$percFreq <- df$percFreq*100
or you can color=as.factor(percFreq*100)))
---- Reproducible example
df <- data.frame(RW = round(runif(16,0,30)),
GA=round(runif(16,0,30)),
Freq=round(runif(16,1,30)),
percFreq = runif(16,0.1,0.9))
df$percFreq <- round(df$percFreq*100,digits = 2)
ggplot(df, aes(x=RW, y=GA, size=Freq, color=as.factor(percFreq))) +
geom_point(alpha=0.7) +
scale_size(range = c(1, 10), name="Freq", limits=c(1,30)) +
scale_color_discrete(name="%")
I would advise against, but if you want the % with the numbers, simply paste(df$percFreq,"%",sep=" ")

Related

Adding text in one of the four facets [duplicate]

This question already has an answer here:
Annotation on only the first facet of ggplot in R?
(1 answer)
Closed last month.
I want to add a few texts in one facet out of four facets in my ggplot.
I am using annotate function to add a text but it generates the text at a given location (x,y) in every facet. Because the data variables have different ranges of y in each facet, the texts are not coming at a desired location (x,y).
Please let me know what should be done. Thanks.
library(dplyr)
library(tidyr)
library(ggplot2)
df%>%
select(Date, Ca, Na, K, Mg)%>%
gather(var,value,-Date)%>%
ggplot(aes(as.Date(Date), value))+
geom_point()+
theme_bw()+
facet_wrap(~var,scales = 'free_y',ncol = 1)+
ylab(" (ppm) (ppm)
(ppm) (ppm)")+
facet_wrap(~var,scales = 'free_y',ncol = 1, strip.position = "right")+
geom_vline(aes(xintercept = as.Date("2021-04-28")), col = "red")+
geom_vline(aes(xintercept = as.Date("2021-04-28")), col = "red")+
geom_vline(aes(xintercept = as.Date("2021-04-29")), col = "red")+
theme(axis.title = element_text(face="bold"))+
theme(axis.text = element_text(face="bold"))+
xlab('Date')+
theme(axis.title.x = element_text(margin = margin(t = 10)))+
theme(axis.title.y = element_text(margin = margin(r = 10)))+
annotate("text", label = "E1", x = as.Date("2021-04-28"), y = 2.8)
This is the code I am using for the desired output. I want to name all the xintercept lines which is E1, E2, E3 (from left to right) on the top of xaxis i.e. above the first facet of variable Ca in the data. Any suggestions?
Here is a part of my data:
df <- read.table(text = "
Date Ca K Mg Na
2/18/2021 1 25 21 19
2/22/2021 2 26 22 20
2/26/2021 3 27 23 21
3/4/2021 4 28 5 22
3/6/2021 5 29 6 8
3/10/2021 6 30 7 9
3/13/2021 7 31 8 10
3/17/2021 8 32 9 11
3/20/2021 9 33 10 12
3/23/2021 10 34 11 13
3/27/2021 11 35 12 14
3/31/2021 12 36 13 15
4/3/2021 13 37 14 16
4/7/2021 14 38 15 17
4/10/2021 15 39 16 18
4/13/2021 16 40 17 19
4/16/2021 17 41 18 20
4/19/2021 8 42 19 21
4/22/2021 9 43 20 22
4/26/2021 0 44 21 23
4/28/2021 1 45 22 24
4/28/2021 2 46 23 25
4/28/2021 3 47 24 26
4/28/2021 5 48 25 27
4/29/2021 6 49 26 28
5/4/2021 7 50 27 29
5/7/2021 8 51 28 30
5/8/2021 9 1 29 31
5/10/2021 1 2 30 32
5/29/2021 3 17 43 45
5/31/2021 6 18 44 46
6/1/2021 4 19 45 47
6/2/2021 8 20 46 48
6/3/2021 2 21 47 49
6/7/2021 3 22 48 50
6/10/2021 5 23 49 51
6/14/2021 3 5 50 1
6/18/2021 1 6 51 2
", header = TRUE)
Prepare the data before plotting, make a separate data for text annotation:
dfplot <- df %>%
select(Date, Ca, Na, K, Mg) %>%
#convert to date class before plotting
mutate(Date = as.Date(Date, "%m/%d/%Y")) %>%
#using pivot instead of gather. gather is superseded.
#gather(var, value, -Date)
pivot_longer(cols = 2:5, names_to = "grp", values_to = "ppm")
dftext <- data.frame(grp = "Ca", # we want text to show up only on "Ca" facet.
ppm = max(dfplot[ dfplot$grp == "Ca", "ppm" ]),
Date = as.Date(c("2021-04-27", "2021-04-28", "2021-04-29")),
label = c("E1", "E2", "E3"))
After cleaning up your code, we can use geom_text with dftext:
ggplot(dfplot, aes(Date, ppm)) +
geom_point() +
facet_wrap(~grp, scales = 'free_y',ncol = 1, strip.position = "right") +
geom_vline(xintercept = dftext$Date, col = "red") +
geom_text(aes(x = Date, y = ppm, label = label), data = dftext, nudge_y = -2)
Try using ggrepel library to avoid label overlap, replace geom_text with one of these:
#geom_text_repel(aes(x = Date, y = ppm, label = label), data = dftext)
#geom_label_repel(aes(x = Date, y = ppm, label = label), data = dftext)
After cleaning up the code and seeing the plot, I think this post is a duplicate of Annotation on only the first facet of ggplot in R? .

ggplot boxplots with 2 y axes

I have been looking everywhere to find out how to ggplot boxplots with 2 y axes.
This is what I want the plot to look like:
boxplot
Example data:
Sample Tumor Score_1 Score_2
1 A 100 -20
2 B 80 -10
3 C 5 -5
4 C 6 -7
5 C 80 -8
6 C 70 -30
7 C 80 -5
8 C 90 -6
9 A 150 -8
10 B 1 -10
11 B 2 -10
12 B 4 -9
13 B 5 -7
14 B 8 -6
15 B 10 -4
16 B 12 -8
17 B 7 -10
18 B 6 -11
19 C 70 -15
20 C 90 -4
21 C 95 -3
22 C 120 -6
23 C 130 -9
24 C 50 -5
25 C 113 -10
26 C 100 -2
27 C 90 -1
28 C 50 -11
29 C 80 -15
30 A 200 -7
31 A 200 -4
32 A 180 -3
33 A 160 -9
34 A 107 -15
35 A 115 -11
36 A 80 -12
37 A 90 -14
38 A 130 -13
39 A 140 -9
40 A 120 -10
myboxplot <- read.csv("Example.csv")
#Set up labels
ylim.prim <- c(0, 500)
ylim.sec <- c(-35, 0)
b <- diff(ylim.prim)/diff(ylim.sec)
a <- b*(ylim.prim[1] - ylim.sec[1])
myboxplot %>%
pivot_longer(cols = c(Score_1, Score_2)) %>%
mutate(name = factor(name, levels = c("Score_1", "Score_2"))) %>%
ggplot(aes(x = Tumor)) +
geom_boxplot(aes(y = value, fill = name)) +
scale_y_continuous(name ="Score 1", sec.axis = sec_axis(~ ((. - a)/b), name = expression("Score 2"))) +
scale_x_discrete(name = "Tumor") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
theme(plot.title = element_text(size = 14, face = "bold"),
text = element_text(size = 12),
#axis.title = element_text(face="bold"),
axis.text.x=element_text(size = 11),
legend.position = "right") +
scale_fill_manual(values = wes_palette("GrandBudapest2"))
I do get the plot in the image (linked above), the problem is my second set of data (the purple boxplots "Score 2") is not being aligned with the second y axis, it is aligning with the first y axis. Since the data is much smaller with a range of -35 to 0, you can't see the difference between the tumor types. Does anyone have any ideas how to change this?
Thank you in advance!
I think the plot you are requesting might be misleading. Instead, how about a facet?
library(tidyverse)
data %>%
pivot_longer(-c("Sample","Tumor"), names_to = "Score") %>%
ggplot(aes( x= Tumor, y = value, fill = Score)) +
geom_boxplot() +
facet_wrap(.~Score, scales = "free")
Or as #NickCox suggests:
data %>%
pivot_longer(-c("Sample","Tumor"), names_to = "Score") %>%
group_by(Score,Tumor) %>%
arrange(value) %>%
mutate(xcoord = seq(-0.25,0.25,length.out = n()),
Tumor = factor(Tumor)) %>%
ggplot(aes( x= Tumor, y = value, fill = Score)) +
geom_boxplot(outlier.shape = NA, coef = 0) +
geom_point(aes(x = xcoord + as.integer(Tumor))) +
facet_wrap(.~Score, scales = "free")
[This was posted when the question was on Cross Validated]
Box plots I find oversold whenever, as usually, there is scope to show more detail. Here is one of several possibilities, a quantile-box plot in Parzen's sense in which for each group a standard box showing median and quartiles is superimposed on a quantile plot, in which the implicit horizontal axis is rank order. The detail that apart from some small integers many values are just multiples of 10 is of interest and should help a little in interpretation.
This plot doesn't use R. People who use R should find doing something similar or better to be trivial -- and those whose favourite software is different should be able to say the same. If not, you need new favourite software.

Add a percent to y axis labels [duplicate]

This question already has answers here:
How can I change the Y-axis figures into percentages in a barplot?
(4 answers)
Closed 4 years ago.
I'm sure I missed an obvious solution tot his problem but I can't figure out how to add a percent sign to the y axis labels.
Data Sample:
Provider Month Total_Count Total_Visits Procedures RX State
Roberts 2 19 19 0 0 IL
Allen 2 85 81 4 4 IL
Dawson 2 34 34 0 0 CA
Engle 2 104 100 4 4 CA
Goldbloom 2 7 6 1 1 NM
Nathan 2 221 192 29 20 NM
Castro 2 6 6 0 0 AK
Sherwin 2 24 24 0 0 AK
Brown 2 282 270 12 12 UT
Jackson 2 114 96 18 16 UT
Corwin 2 22 22 0 0 CO
Dorris 2 124 102 22 22 CO
Ferris 2 427 318 109 108 OH
Jeffries 2 319 237 82 67 OH
The following code gives graphs with inaccurate values because R seems to be multiplying by 100.
procs <- read.csv(paste0(dirdata, "Procedure percents Feb.csv"))
procs$Percentage <- round(procs$Procedures/procs$Total.Visits*100, 2)
procs$Percentage[is.na(procs$Percentage)] <- 0
procsplit <- split(procs, procs$State)
plots <- function(procs) {
ggplot(data = procs, aes(x= Provider, y= Percentage, fill= Percentage)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(x = Provider, y = Percentage, label = sprintf("%.1f%%", Percentage)), position = position_dodge(width = 0.9), hjust = .5, vjust = 0 , angle = 0) +
theme(axis.text.x = element_text(angle = 45, vjust = .5)) +
ggtitle("Procedure Percentages- February 2018", procs$State) +
theme(plot.title = element_text(size = 22, hjust = .5, family = "serif")) +
theme(plot.subtitle = element_text(size = 18, hjust = .5, family = "serif")) +
scale_y_continuous(name = "Percentage", labels = percent)
}
lapply(procsplit, plots)
I'm not sure if there's a way to use sprintf to add it or if there's a way to paste it onto the labels.
adding + scale_y_continuous(labels = function(x) paste0(x, "%")) to the ggplot statement fixes this issue

R hist vs geom_hist break points

I am using both geom_hist and histogram in R with the same breakpoints but I get different graphs. I did a quick search, does anyone know what the definition breaks are and why they would be a difference
These produce two different plots.
set.seed(25)
data <- data.frame(Mos=rnorm(500, mean = 25, sd = 8))
data$Mos<-round(data$Mos)
pAge <- ggplot(data, aes(x=Mos))
pAge + geom_histogram(breaks=seq(0, 50, by = 2))
hist(data$Mos,breaks=seq(0, 50, by = 2))
Thanks
To get the same histogram in ggplot2 you specify the breaks inside scale_x_continuous and binwidth inside geom_histogram.
Additionally, hist and histograms in ggplot2 use different defaults to create the intervals:
hist: right-closed (left open) intervals. Default: right = TRUE
stat_bin (ggplot2): left-closed (right open) intervals. Default: right = FALSE
**hist** **ggplot2**
freq1 Freq freq2 Freq
1 (0,2] 0 [0,2) 0
2 (2,4] 2 [2,4) 2
3 (4,6] 2 [4,6) 1
4 (6,8] 1 [6,8) 2
5 (8,10] 6 [8,10) 2
6 (10,12] 9 [10,12) 7
7 (12,14] 24 [12,14) 17
8 (14,16] 27 [14,16) 26
9 (16,18] 39 [16,18) 31
10 (18,20] 48 [18,20) 46
11 (20,22] 52 [20,22) 43
12 (22,24] 38 [22,24) 57
13 (24,26] 44 [24,26) 36
14 (26,28] 46 [26,28) 52
15 (28,30] 39 [28,30) 39
16 (30,32] 31 [30,32) 33
17 (32,34] 30 [32,34) 26
18 (34,36] 24 [34,36) 29
19 (36,38] 18 [36,38) 27
20 (38,40] 9 [38,40) 12
21 (40,42] 5 [40,42) 6
22 (42,44] 4 [42,44) 0
23 (44,46] 1 [44,46) 5
24 (46,48] 1 [46,48) 0
25 (48,50] 0 [48,50) 1
I included the argument right = FALSE so the histogram intervalss are left-closed (right open) as they are in ggplot2. I added the labels in both plots, so it is easier to check the intervals are the same.
ggplot(data, aes(x = Mos))+
geom_histogram(binwidth = 2, colour = "black", fill = "white")+
scale_x_continuous(breaks = seq(0, 50, by = 2))+
stat_bin(binwidth = 2, aes(label=..count..), vjust=-0.5, geom = "text")
hist(data$Mos,breaks=seq(0, 50, by = 2), labels =TRUE, right =FALSE)
To check the frequencies in each bin:
freq <- cut(data$Mos, breaks = seq(0, 50, by = 2), dig.lab = 4, right = FALSE)
as.data.frame(table(frecuencias))

ggplot2 merge color and fill legends

I want to merge two legends in ggplot2. I use the following code:
ggplot(dat_ribbon, aes(x = x)) +
geom_ribbon(aes(ymin = ymin, ymax = ymax,
group = group, fill = "test4 test5"), alpha = 0.2) +
geom_line(aes(y = y, color = "Test2"), data = dat_m) +
scale_colour_manual(values=c("Test2" = "white", "test"="black", "Test3"="red")) +
scale_fill_manual(values = c("test4 test5"= "dodgerblue4")) +
theme(legend.title=element_blank(),
legend.position = c(0.8, 0.85),
legend.background = element_rect(fill="transparent"),
legend.key = element_rect(colour = 'purple', size = 0.5))
The output is shown below. There are two problems:
When I use two or more words in the fill legend, the alignment becomes wrong
I want to merge the two legends into one, such that the fill legend is just part of a block of 4.
Does anyone know how I can achieve this?
Edit: reproducible data:
dat_m <- read.table(text="x quantile y group
1 1 50 0.4967335 0
2 2 50 0.4978249 0
3 3 50 0.5113562 0
4 4 50 0.4977866 0
5 5 50 0.5013287 0
6 6 50 0.4997994 0
7 7 50 0.4961121 0
8 8 50 0.4991302 0
9 9 50 0.4976087 0
10 10 50 0.5011666 0")
dat_ribbon <- read.table(text="
x ymin group ymax
1 1 0.09779713 40 0.8992385
2 2 0.09979283 40 0.8996875
3 3 0.10309222 40 0.9004759
4 4 0.10058433 40 0.8985366
5 5 0.10259125 40 0.9043807
6 6 0.09643109 40 0.9031940
7 7 0.10199870 40 0.9022920
8 8 0.10018253 40 0.8965690
9 9 0.10292754 40 0.9010934
10 10 0.09399359 40 0.9053067
11 1 0.20164694 30 0.7974174
12 2 0.20082056 30 0.7980642
13 3 0.20837821 30 0.8056074
14 4 0.19903399 30 0.7973723
15 5 0.19903322 30 0.8050146
16 6 0.19965049 30 0.8051922
17 7 0.20592719 30 0.8042850
18 8 0.19810139 30 0.7956606
19 9 0.20537392 30 0.8007527
20 10 0.19325158 30 0.8023044
21 1 0.30016463 20 0.6953927
22 2 0.29803646 20 0.6976961
23 3 0.30803808 20 0.7048137
24 4 0.30045448 20 0.6991248
25 5 0.29562249 20 0.7031225
26 6 0.29647060 20 0.7043499
27 7 0.30159103 20 0.6991356
28 8 0.30369025 20 0.6949053
29 9 0.30196483 20 0.6998127
30 10 0.29578036 20 0.7015861
31 1 0.40045725 10 0.5981147
32 2 0.39796299 10 0.5974115
33 3 0.41056038 10 0.6057062
34 4 0.40046287 10 0.5943157
35 5 0.39708008 10 0.6014512
36 6 0.39594129 10 0.6011162
37 7 0.40052411 10 0.5996186
38 8 0.40128517 10 0.5959748
39 9 0.39917658 10 0.6004600
40 10 0.39791453 10 0.5999168")
You are not using ggplot2 according to its philosophy. That makes things difficult.
ggplot(dat_ribbon, aes(x = x)) +
geom_ribbon(aes(ymin = ymin, ymax = ymax, group = group, fill = "test4 test5"),
alpha = 0.2) +
geom_line(aes(y = y, color = "Test2"), data = dat_m) +
geom_blank(data = data.frame(x = rep(5, 4), y = 0.5,
group = c("test4 test5", "Test2", "test", "Test3")),
aes(y = y, color = group, fill = group)) +
scale_color_manual(name = "combined legend",
values=c("test4 test5"= NA, "Test2" = "white",
"test"="black", "Test3"="red")) +
scale_fill_manual(name = "combined legend",
values = c("test4 test5"= "dodgerblue4",
"Test2" = NA, "test"=NA, "Test3"=NA))

Resources