Related
I want to plot customized Horizontal dots using my data and the code given here
data:
df <- data.frame (origin = c("A","B","C","D","E","F","G","H","I","J"),
Percentage = c(23,16,32,71,3,60,15,21,44,60),
rate = c(10,12,20,200,-25,12,13,90,-105,23),
change = c(10,12,-5,12,6,8,0.5,-2,5,-2))
.
origin Percentage rate change
1 A 23 10 10.0
2 B 16 12 12.0
3 C 32 20 -5.0
4 D 71 200 12.0
5 E 3 -25 6.0
6 F 60 12 8.0
7 G 15 13 0.5
8 H 21 90 -2.0
9 I 44 -105 5.0
10 J 60 23 -2.0
obs from 'origin' column need be put on y-axis. corresponding values in 'change' and 'rate' column must be presented/differentiated through in box instead of circles, for example values from 'change' column in lightblue and values from 'rate' column in blue. In addition I want to add second vertical axis on right and put circles on it which size will be defined based on corresponding value in 'Percentage' column.
Output of code from the link:
Expected outcome (smth. like this:
Try this.
First, reshaping so that both rate and change are in one column better supports ggplot's general preference towards "long" data.
df2 <- reshape2::melt(df, id.vars = c("origin", "Percentage"))
(That can also be done using pivot_wider.)
The plot:
ggplot(df2, aes(value, origin)) +
geom_label(aes(label = value, fill = variable, color = variable)) +
geom_point(aes(size = Percentage), x = max(df2$value) +
20, shape = 21) +
scale_x_continuous(expand = expansion(add = c(15, 25))) +
scale_fill_manual(values = c(change="lightblue", rate="blue")) +
scale_color_manual(values = c(change="black", rate="white")) +
theme_bw() +
theme(panel.border = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank()) +
labs(x = NULL, y = NULL)
The legend and labels can be adjusted in the usual ggplot methods. Overlapping of labels is an issue with which you will need to contend.
Update on OP request: See comments:
gg_dot +
geom_text(aes(x = rate, y = origin,
label = paste0(round(rate, 1), "%")),
col = "black") +
geom_text(aes(x = change, y = origin,
label = paste0(round(change, 1), "%")),
col = "white") +
geom_text(aes(x = x, y = y, label = label, col = label),
data.frame(x = c(40 - 1.1, 180 + 0.6), y = 11,
label = c("change", "rate")), size = 6) +
scale_color_manual(values = c("#9DBEBB", "#468189"), guide = "none") +
scale_y_discrete(expand = c(0.2, 0))
First answer:
Something like this?
library(tidyverse)
library(dslabs)
gg_dot <- df %>%
arrange(rate) %>%
mutate(origin = fct_inorder(origin)) %>%
ggplot() +
# remove axes and superfluous grids
theme_classic() +
theme(axis.title = element_blank(),
axis.ticks.y = element_blank(),
axis.line = element_blank()) +
# add a dummy point for scaling purposes
geom_point(aes(x = 12, y = origin),
size = 0, col = "white") +
# add the horizontal discipline lines
geom_hline(yintercept = 1:10, col = "grey80") +
# add a point for each male success rate
geom_point(aes(x = rate, y = origin),
size = 11, col = "#9DBEBB") +
# add a point for each female success rate
geom_point(aes(x = change, y = origin),
size = 11, col = "#468189")
gg_dot +
geom_text(aes(x = rate, y = origin,
label = paste0(round(rate, 1))),
col = "black") +
geom_text(aes(x = change, y = origin,
label = paste0(round(change, 1))),
col = "white") +
geom_text(aes(x = x, y = y, label = label, col = label),
data.frame(x = c(40 - 1.1, 180 + 0.6), y = 11,
label = c("change", "rate")), size = 6) +
scale_color_manual(values = c("#9DBEBB", "#468189"), guide = "none") +
scale_y_discrete(expand = c(0.2, 0))
I am making a plot for 17 symptoms by age group. So far I got what I want when I use my code for just one symptom ( code and plot below), but when running the code through all the variables, I am getting a worng plot and still have no idea where I got it wrong.
This is my data:
x <- data.frame(symptoms=c("symptom1: 0 to 9","symptom1: 10 to 19","symptom1: 20 to 49","symptom1: 50+","symptom2: 0 to 9","symptom2: : 10 to 19",
"symptom2: : 20 to 49","symptom2: 50+","symptom3: 0 to 9","symptom3: 10 to 19","symptom3: 20 to 49","symptom3: 50+",
"symptom4: 0 to 9",
"symptom4: 10 to 19","symptom4: 20 to 49","symptom4:50+","symptom5: 0 to 9","symptom5: 10 to 19","symptom5: 20 to 49",
"symptom5: 50+",
"symptom6: 0 to 9","symptom6: 10 to 19","symptom6: 20 to 49","symptom6: 50+","symptom7: 0 to 9","symptom7: 10 to 19","symptom7: 20 to 49",
"symptom7: 50+", "symptom8: 0 to 9","symptom8: 10 to 19","symptom8: 20 to 49","symptom8: 50+",
"symptom9: 0 to 9","symptom9: 10 to 19","symptom9: 20 to 49","symptom9: 50+","symptom10: 0 to 9","symptom10: 10 to 19",
"symptom10: 20 to 49","symptom10: 50+","symptom11: 0 to 9","symptom11: 10 to 19","symptom11: 20 to 49",
"symptom11: 50+","symptom12: 0 to 9","symptom12: 10 to 19","symptom12: 20 to 49","symptom12: 50+","symptom13: 0 to 9",
"symptom13: 10 to 19","symptom13: 20 to 49","symptom13: 50+","symptom14: 0 to 9","symptom14: 10 to 19",
"symptom14: 20 to 49","symptom14: 50+","symptom15: 0 to 9","symptom15: 10 to 19","symptom15: 20 to 49","symptom15: 50+",
"symptom16:0 to 9","symptom16:10 to 19","symptom16:20 to 49","symptom16:50+","symptom17: 0 to 9","symptom17: 10 to 19",
"symptom17: 20 to 49","symptom17: 50+"),
OR=c(3.1,3,0.6,0.2,2,2.5,5,1.8,7.4,4.2,6.9,2.3,3.7,2.7,3.7,5.1,6.8,3.4,4.4,8.3,14540102.8,1036435.3,8070307.6,565044.8,2.9,1.7,2.6,4.2,3.4,1.3,2.5,2.9,1,1.6,48.4,2.6,1.3,1.9,2.6,4.5,0.8,0.7,3.6,0,7.5,14.8,2.7,3.8,1.5,3.2,3.1,0.8,2.4,12,4.5,1.7,2.8,1.8,3.1,1.9,3.3,25,5,1.4,430072.7,5.8,2.8,1.5),
Lower=c(1.3,1.6,0.2,0,1.6,1.7,1.6,0.7,2.2,1.3,2.6,0.3,1.9,1.8,1.4,2,3.3,2.2,2.2,3.2,0,0,0,0,1.5,1.2,1.3,1.5,1.8,0.9,1.3,1.2,0.3,0.6,1.3,0.4,0.9,1.2,1.3,1.7,0.2,0.3,0.4,NA,3.8,8,1.4,1.5,0.7,1.6,1.3,0.3,1.2,9.1,2.2,0.7,0.7,0.6,1.1,0.3,1.3,9,1.5,0.4,0,2.5,0.9,0.1),
Upper=c(8.7,6.3,2.2,4.2,6.1,3.8,8,4.7,26,7.9,19,14.7,7.6,4,6,15.1,14.1,5.3,8.8,22.8,NA,5.463E+98,NA,NA,5.5,4.6,5.2,15.5,6.6,2,5,7.5,3.2,4.2,165.4,22.4,3.5,2.8,5,12.3,2.6,1.6,76.8,2.0619295829016E+205,15.1,30.1,5.4,10.2,3,6.7,9.4,2.1,4.6,28,9.7,7.3,9.9,4.8,8,4.7,11,46.4,23.1,5.6,NA,16,9.1,38.8),
group=rep(c("0-9 years", "10-19 years", "20-49 years", "50+ years"), 17))
This is the code for just the first symptom:
ggplot(x[1:4,] , aes(x = OR, y = 4:1, group=group)) +
geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
geom_errorbarh(aes(xmax = Upper, xmin = Lower), size = 1, height = .1, color = "blue") +
geom_point(aes(shape=group, color=group), size = 5) +
scale_shape_manual(values=c(15,15,15,15)) +
scale_color_manual(values=c('red','green', 'orange', "grey")) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(breaks = 4:1, labels = x$symptoms[1:4]) +
scale_x_continuous(breaks = seq(0,20,1) ) +
ylab("") +
xlab("Odds ratio") +
ggtitle("Odd ratios (OR) with 95% COnfidence Interval")
and this is the plot that I got with just the first symptom by age group:
When I repeat this for all symptoms so I can have everything in one plot, the plot is a mess. See below code:
ggplot(x , aes(x = OR, y = 68:1, group=group)) +
geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
geom_errorbarh(aes(xmax = Upper, xmin = Lower), size = 1, height = .1, color = "blue") +
geom_point(aes(shape=group, color=group), size = 5) +
scale_shape_manual(values=c(15,15,15,15)) +
scale_color_manual(values=c('red','green', 'orange', "grey")) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(breaks = 68:1, labels = x$symptoms) +
scale_x_continuous(breaks = seq(0,20,1) ) +
ylab("") +
xlab("Odds ratio") +
ggtitle("Odd ratios (OR) with 95% Confidence Interval")
This is the ugly plot:
At the end, I should have something like the figure below, with the frequency tables at the left and values for OR with 95%CI. I haven't try that one yet (to add all the numbers etc), but suggestions are more than welcome.
Thanks a lot for helping me to debug my code
This is a forest plot. Your main problem is that a couple of your values are several orders of magnitude greater than the rest. Typically with a forest plot, you want a log scale for the odds ratio to make it symmetrical around one. However, even that won't be enough here to resolve the details on your plot, so I have simply filtered out the outliers (which appear nonsensical)
Since you effectively have nested factor levels, I have "silently" faceted the plot.
library(dplyr)
x %>%
mutate(Upper = replace(Upper, abs(Upper) > 100, NA),
Lower = replace(Lower, abs(Lower) > 100, NA),
OR = replace(OR, abs(OR) > 100, NA),
symptoms = factor(gsub(":.*$", "", symptoms),
levels = paste0("symptom", 1:17))) %>%
ggplot(aes(x = OR, y = group)) +
geom_rect(aes(xmin = 0.001, xmax = 1000,
ymin = -Inf, ymax = Inf, fill = symptoms)) +
geom_errorbarh(aes(xmin = Lower, xmax = Upper)) +
geom_point(aes(colour = group, shape = group), size = 5 ) +
geom_vline(aes(xintercept = 1), linetype = 2) +
scale_shape_manual(values = rep(15, 5)) +
scale_fill_manual(values = rep(c("#ffffff00", "#f0f0f090"), 9)[-1],
guide = "none") +
scale_x_log10() +
coord_cartesian(xlim = c(0.01, 100)) +
facet_grid(symptoms~., switch = "y") +
theme_bw() +
theme(panel.spacing.y = unit(0, "points"),
panel.border = element_blank(),
axis.text.y = element_blank(),
axis.ticks.length.y = unit(0, "points"),
strip.text.y.left = element_text(angle = 0),
strip.background.y = element_blank(),
strip.placement = "outside",
axis.line = element_line()
)
You may also wish to check out the ggforest package.
I am having this strange error regarding displaying the actual bars in a geom_col() plot.
Suppose I have a data set (called user_data) that contains a count of the total number of changes ('adjustments') done for a particular user (and a plethora of other columns). Let's say it looks like this:
User_ID total_adjustments additional column_1 additional column_2 ...
1 'Blah_17' 21 random_data random_data
2 'Blah_1' 47 random_data random_data
3 'foobar' 2 random_data random_data
4 'acbd1' 17 random_data random_data
5 'user27' 9 random_data random_data
I am using the following code to reduce it into a dataframe with only the two columns I care about:
total_adj_count = user_data %>%
select(User_ID, total_adjustments) %>%
arrange(desc(total_adjustments)) %>%
mutate(User_ID = factor(User_ID, User_ID))
This results in my dataframe (total_adj_count) looking like so:
User_ID total_adjustments
1 'Blah_1' 47
2 'Blah_17' 21
3 'acbd1' 17
4 'user27' 9
5 'foobar' 2
Moving along, here is the code I used to attempt to create a geom_col() plot of that data:
g = ggplot(data=total_adj_count, aes(x = User_ID, y = total_adjustments)) +
geom_bar(width=.5, alpha=1, show.legend = FALSE, fill="#000066", stat="identity") +
labs(x="", y="Adjustment Count", caption="(based on sample data)") +
theme_few(base_size = 10) + scale_color_few() +
theme(axis.text.x=element_text(angle = 45, hjust = 1)) +
geom_text(aes(label=round(total_adjustments, digits = 2)), size=3, nudge_y = 2000) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
p = ggplotly(g)
p = p %>%
layout(margin = m,
showlegend = FALSE,
title = "Number of Adjustments per User"
)
p
And for some strange reason when I try to view plot p it displays all parts of the plot as intended, but does not show the actual bars (or columns).
In fact I get this strange plot and am sort of stuck where to fix it:
Change nudge_y argument to a smaller number. Right now you have it set to 2000 which offsets the labels by 2000 on the y-axis. Below I've changed it to nudge_y = 2 and it looks like so:
g <-
ggplot(total_adj_count, aes(User_ID, total_adjustments)) +
geom_col(width = .5, alpha = 1, show.legend = FALSE, fill = "#000066") +
labs(x = "", y = "Adjustment Count", caption = "(based on sample data)") +
theme_few(base_size = 10) +
scale_color_few() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(aes(label = round(total_adjustments, digits = 2)), size = 3, nudge_y = 2) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
Full copy/paste:
library(ggplot2)
library(ggthemes)
library(plotly)
library(dplyr)
text <- " User_ID total_adjustments
1 'Blah_1' 47
2 'Blah_17' 21
3 'acbd1' 17
4 'user27' 9
5 'foobar' 2"
total_adj_count <- read.table(text = text, header = TRUE, stringsAsFactors = FALSE)
g <-
ggplot(total_adj_count, aes(User_ID, total_adjustments)) +
geom_col(width = .5, alpha = 1, show.legend = FALSE, fill = "#000066") +
labs(x = NULL, y = "Adjustment Count", caption = "(based on sample data)", title = "Number of Adjustments per User") +
theme_few(base_size = 10) +
scale_color_few() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(aes(label = round(total_adjustments, digits = 2)), size = 3, nudge_y = 2) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
p <- ggplotly(g)
p <- layout(p, showlegend = FALSE)
p
So I am using this data frame:
xym <- data.frame(
Var1 = c("vloga", "odločitve", "dolgoročno",
"krizno", "uživa v", "vloga", "odločitve",
"dolgoročno", "krizno", "uživa v", "vloga",
"odločitve","dolgoročno", "krizno", "uživa v",
"vloga","odločitve", "dolgoročno", "krizno",
"uživa v"),
Var2 = c("Nad","Nad", "Nad", "Nad", "Nad", "Pod",
"Pod", "Pod", "Pod", "Pod", "Enak","Enak",
"Enak", "Enak", "Enak", "Sam.", "Sam.", "Sam.",
"Sam.", "Sam."),
value = c(4, 3, 4, 4, 3, 3, 3, 2, 3, 3, 3, 2.5, 2.5,
2, 3.5 ,5 ,6 ,6 ,5 ,6))
And with this code:
p <- ggplot(xym, aes(x = Var1, y = value, fill = Var2)) + coord_flip()+
theme_bw() + scale_fill_manual(values = c("yellow", "deepskyblue1", "yellowgreen","orchid4")) + xlim(rev(levels(xym$Var1)))+ theme(axis.title=element_blank(),axis.ticks.y=element_blank(),legend.position = "bottom",
axis.text.x = element_text(angle = 0,vjust = 0.4)) +
geom_bar(stat = "identity", width = 0.7, position = position_dodge(width=0.7)) +
geom_text(aes(x = Var1, y =max(value), label = round(value, 2), fill = Var2),
angle = 0, position = position_dodge(width = 0.7), size = 4.2)
p + labs(fill="")
p + stat_summary(fun.y=mean, colour="red", geom="line", aes(group = 1))
I produce output:
But beside the red line which is marking total average by question (i.e. "dolgoročno", "krizno" etc.) I would like to add points and next to the bars as well as labels of the individual question group mean
My output should look something like the picture below, (I did it in paint), where the black dots represent my desired points and the value 3.6 of the first dot is the average of (6,2,4,2.5) and represents my desired value labels.
I've also looked at:
Plot average line in a facet_wrap
ggplot2: line connecting the means of grouped data
How to label graph with the mean of the values using ggplot2
One option would be the following. I followed your code and added a few lines.
# Your code
p <- ggplot(xym, aes(x = Var1, y = value, fill = Var2)) +
coord_flip() +
theme_bw() +
scale_fill_manual(values = c("yellow", "deepskyblue1", "yellowgreen","orchid4")) +
xlim(rev(levels(xym$Var1))) +
theme(axis.title = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "bottom",
axis.text.x = element_text(angle = 0,vjust = 0.4)) +
geom_bar(stat = "identity", width = 0.7, position = position_dodge(width = 0.7)) +
geom_text(aes(x = Var1, y = max(value), label = round(value, 2), fill = Var2),
angle = 0, position = position_dodge(width = 0.7), size = 4.2)
p + labs(fill = "")
Then, I added the following code. You can add dots changing geom to point in stat_summary. For labels, I chose to get data from ggplot_build() and crated a data frame called foo. (I think there are other ways to do the same job.) Using foo, I added annotation in the end.
p2 <- p +
stat_summary(fun.y = mean, color = "red", geom = "line", aes(group = 1)) +
stat_summary(fun.y = mean, color = "black", geom ="point", aes(group = 1), size = 5,
show.legend = FALSE)
# This is the data for your dots in the graph
foo <- as.data.frame(ggplot_build(p2)$data[[4]])
p2 +
annotate("text", x = foo$x, y = foo$y + 0.5, color = "black", label = foo$y)
I'm trying to create a plot in R using ggplot2 that shows horizontal lines for groundwater protection standards as well as a vertical line that shows the start of construction project. I have legends created already for units of measure and whether the sample was below the detection limit. All of these legends show up as I want them, but the legends from the horizontal and vertical line are overlain on the other legends. I've tried multiple ways using show_guide, using different data frames for the data, and override.aes = list() but nothing seems to get the desired results.
Here is what the data look like:
head(dmr_data)
location_id sample_date analysis_result lt_measure default_unit param_name
154 MWH1 2004-06-02 0.0150 mg/L Arsenic, dissolved
155 MWH1 2004-06-02 0.0850 mg/L Barium, dissolved
156 MWH1 2004-06-02 0.0002 < mg/L Beryllium, dissolved
158 MWH1 2004-06-02 0.0005 < mg/L Cadmium, dissolved
162 MWH1 2004-06-02 0.0020 < mg/L Lead, dissolved
164 MWH1 2004-06-02 0.0010 < mg/L Thallium, dissolved
DMR_limit GWPS non_detect
154 0.01000 0.010 0
155 0.17340 2.000 0
156 0.00005 0.004 1
158 0.00100 0.005 1
162 0.00500 0.015 1
164 0.00060 0.002 1
And here is the code for the plot:
combo_plot <- function(df){
limits = df
shaded_dates <- data.frame(xmin = c(as.POSIXct("2004-06-01", format = "%Y-%m-%d"),
as.POSIXct("2013-10-01", format = "%Y-%m-%d")),
xmax = c(as.POSIXct("2013-10-01", format="%Y-%m-%d"),
max(df$sample_date)),
ymin = c(-Inf, -Inf),
ymax = c(Inf, Inf),
years = c("background", "compliance"))
ggplot(data = df, aes(x = sample_date, y = analysis_result)) +
geom_point(data = df, aes(colour = default_unit, shape = factor(non_detect)), size = 4) +
geom_line(data = df, aes(colour = default_unit), size = 1) +
facet_wrap(~ param_name, scale="free") +
# Plot legends, labels, and titles
ggtitle(paste("Time Series Plots for Monitoring Well",
df$location_id[1], "\n", sep=" ")) +
ylab("Analysis Result") +
xlab("Sample Date") + scale_x_datetime(labels = date_format("%Y")) +
theme(axis.text.x = element_text(angle = 90)) +
theme(plot.margin = unit(c(0.75, 0.75, 0.75, 0.75), "in")) +
theme_bw() +
scale_colour_discrete(name = "Units", guide = "legend") +
# add rectangles for date ranges
geom_rect(data = shaded_dates, aes(xmin = xmin, ymin = ymin, xmax = xmax,
ymax = ymax, fill = years),
alpha = 0.2, inherit.aes = FALSE) +
scale_fill_manual(values=c("blue","green")) +
# add horizontal lines for EPA MCL and Upper Prediction Limit
geom_hline(data = limits, aes(yintercept = GWPS, linetype = "GWPS"), show_guide = TRUE, size = 0.75) +
geom_hline(data = limits, aes(yintercept = DMR_limit, linetype = "DMR Limit"), show_guide = TRUE, size = 0.75) +
# create custom legend using guide
theme(axis.title.x = element_text(size = 15, vjust=-.2)) +
theme(axis.title.y = element_text(size = 15, vjust=0.3)) +
guides(colour = guide_legend("Units"), fill = guide_legend("Dates"),
linetype = guide_legend("Limits")) +
scale_shape_manual(name = "Measure", labels = c("Non-Detect", "Detected"),
values = c("1" = 21, "0" = 4)) +
# add vertical line to show start of "East Pushout" construction
geom_vline(xintercept = as.numeric(as.POSIXct("2008-08-01", format="%Y-%m-%d")),
linetype="dotted", show_guide = T)
}
I then use plyr to create faceted plots for all the wells
d_ply(dmr_data, .(location_id), .progress = "text", failwith(NA, combo_plot), .print = TRUE)
Here is what the ouput looks like.
Any help would be appreciated!
You can get the desired effect by using override.aes = list(linetype = 0) in guides(), and by adding a new scale for linetype (so as to exclude the vertical construction line from showing up in the legend).
Replace your hline() section with:
## add horizontal lines for EPA MCL and Upper Prediction Limit
geom_hline(data = limits, aes(yintercept = GWPS, linetype = "GWPS"), colour = "black", size = 0.75, show_guide = T) +
geom_hline(data = limits, aes(yintercept = DMR_limit, linetype = "DMR Limit"), size = 0.75, show_guide = T) +
scale_linetype_manual(name = "Limits", labels = c("GWPS", "DMR Limit"), values = c("GWPS" = 1, "DMR Limit" = 2)) +
Replace your guides() line with:
guides(colour = guide_legend(override.aes = list(linetype = 0 )),
fill = guide_legend(override.aes = list(linetype = 0 )),
shape = guide_legend(override.aes = list(linetype = 0 )),
linetype = guide_legend()) +
If you do want the dotted vertical line to show up in the legend, add the appropriate arguments to geom_vline(aes()) and to scale_linetype_manual().