Related
I am using ggarrange to produce a figure with three plots. One of these plots includes fill and a pattern created using ggpattern. I am able to create a combined figure when I combine one standard ggplot and the ggpattern plot, but I get an error (Error in seq.default(from, to, by) : invalid '(to - from)/by') when I try to combine all 3. I've included a simplified example below.
#fake data
data <- structure(list(Level= c(0.2, 0.3, 0.25, 0.35, 0.4, 0.5, 0.5, 0.6, 0.15, 0.35),
Group= c("A", "A", "B", "B", "C", "C", "D", "D", "E", "E"),
Condition = c("no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes"),
Hx = c(0,1,1,1,0,1,0,0,0,0),
Type = c("T", "T", "T", "T", "T", "F", "F", "F", "F", "F")),
row.names = c(NA, -10L),
class = c("tbl_df", "tbl", "data.frame"))
#create plots
pattern_plot <-
ggplot(data = data, aes(x = Group, y = Level)) +
facet_grid(cols=vars(Type)) +
geom_bar_pattern(aes(pattern = Condition, fill = as.factor(Hx)),
stat = "identity",
position = "dodge",
color = "black",
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.1,
pattern_spacing = 0.025,
pattern_key_scale_factor = 0.6) +
scale_pattern_manual(values = c("none", "stripe")) +
labs(x = "", y = "", pattern = "Condition", fill = "History",
subtitle = "Percentage of people with condition") +
guides(pattern = guide_legend(override.aes = list(fill = "white")),
fill = guide_legend(override.aes = list(pattern = "none"))) +
theme_bw() +
theme(legend.position="left") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1))
no_pattern_plot <-
ggplot(data = data, aes(x = Group, y = Level, fill = Condition)) +
geom_bar(position = "dodge", stat = "identity") +
theme_bw() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(legend.position="left")
no_pattern_plot2 <- ggplot(data = data, aes(x = Group, y = Level, fill = as.factor(Hx))) +
geom_bar(position = "dodge", stat = "identity") +
theme_bw() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(legend.position="left")
This works:
ggarrange(pattern_plot, no_pattern_plot2, nrow = 2)
To create this
plot
However ggarrange(pattern_plot, no_pattern_plot, no_pattern_plot2, nrow = 3)
produces Error in seq.default(from, to, by) : invalid '(to - from)/by'
Any ideas?
The correlation plot has significant symbols and numbers are overlapped. Does anyone know how to locate the significant symbol below the number?
cor <- Hmisc::rcorr(mtcars %>% as.matrix())
corrplot::corrplot(cor$r, method="color", tl.cex = 1, tl.col = "black", number.cex = 0.8,
p.mat = cor$P, sig.level = c(.001, .01, .05), insig = 'label_sig',
pch = 10, pch.cex = 1, pch.col = "white", type = "lower", tl.srt = 45,
addCoef.col = "black", addgrid.col = "white", cl.pos = "n",
fn_left=135, fn_up = 20,
cl.lim=c(-1, 1))
ggplot2 may provide more flexibility
library(ggplot2)
nm = rownames(cor$r)
m = t(combn(nm, 2))
d = cbind(data.frame(m), R = cor$r[m], P = cor$P[m])
d$label = round(d$R, 2)
d$label[d$P < 0.001] = paste0(d$label[d$P < 0.001], "\n**")
d$X1 = factor(d$X1, nm)
d$X2 = factor(d$X2, rev(nm))
graphics.off()
ggplot(d, aes(X1, X2, fill = R, label = label)) +
geom_tile(color = "white") +
scale_fill_viridis_c() +
geom_text(color = ifelse(d$R > 0.35, "black", "white")) +
theme_bw() +
coord_equal()
Suppose I have data with both an ordinal variable and a categorical variable:
set.seed(35)
df <- data.frame(Class = factor(rep(c(1,2),times = 80), labels = c("Math","Science")),
StudyTime = factor(sort(sample(1:4, 16, prob = c(0.25,0.3,0.3,0.15), replace = TRUE)),labels = c("<5","5-10","10-20",">20")),
Nerd = factor(sapply(rep(c(0.1,0.3,0.5,0.8),c(30,50,50,30)), function(x)sample(c("Nerd","NotNerd"),size = 1, prob = c(x,1-x))),levels = c("NotNerd","Nerd")))
One could use ggplot and geom_bar with x, fill and alpha (or color) aesthetic mappings to visualize the relationship between these variables.
ggplot(data = df, aes(x = Class, fill = StudyTime, alpha = Nerd)) +
geom_bar(position = "dodge", color = "black") +
scale_alpha_manual(values = c(Nerd = 0.5, NotNerd = 1)) +
scale_fill_manual(values = colorRampPalette(c("#0066CC","#FFFFFF","#FF8C00"))(4)) +
labs(x = "Class", y = "Number of Students", alpha = "Nerd?") +
theme(legend.key.height = unit(1, "cm"))
However, alpha and color are not ideal. A better alternative might be to apply a pattern such as stripes or a crosshatch.
The accepted answer to this question from over 10 years ago says to use colors, and the most upvoted answer (while clever) uses over 100 lines of code.
This question received some upvotes but no new answers.
Is there any better alternative to adding a pattern such as can be seen here?
One approach is to use the ggpattern package written by Mike FC (no affiliation):
library(ggplot2)
#remotes::install_github("coolbutuseless/ggpattern")
library(ggpattern)
ggplot(data = df, aes(x = Class, fill = StudyTime, pattern = Nerd)) +
geom_bar_pattern(position = position_dodge(preserve = "single"),
color = "black",
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.1,
pattern_spacing = 0.025,
pattern_key_scale_factor = 0.6) +
scale_fill_manual(values = colorRampPalette(c("#0066CC","#FFFFFF","#FF8C00"))(4)) +
scale_pattern_manual(values = c(Nerd = "stripe", NotNerd = "none")) +
labs(x = "Class", y = "Number of Students", pattern = "Nerd?") +
guides(pattern = guide_legend(override.aes = list(fill = "white")),
fill = guide_legend(override.aes = list(pattern = "none")))
The package appears to support a number of common geometries. Here is an example of using geom_tile to combine a continuous variable with a categorical variable:
set.seed(40)
df2 <- data.frame(Row = rep(1:9,times=9), Column = rep(1:9,each=9),
Evaporation = runif(81,50,100),
TreeCover = sample(c("Yes", "No"), 81, prob = c(0.3,0.7), replace = TRUE))
ggplot(data=df2, aes(x=as.factor(Row), y=as.factor(Column),
pattern = TreeCover, fill= Evaporation)) +
geom_tile_pattern(pattern_color = NA,
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.5,
pattern_spacing = 0.025,
pattern_key_scale_factor = 1) +
scale_pattern_manual(values = c(Yes = "circle", No = "none")) +
scale_fill_gradient(low="#0066CC", high="#FF8C00") +
coord_equal() +
labs(x = "Row",y = "Column") +
guides(pattern = guide_legend(override.aes = list(fill = "white")))
I apologize that I am a beginner in R. I am trying to make the graph like the below picture.
This is what I did in code. But it does not work :
unemp <- read.csv("unemployment.csv", stringsAsFactors = FALSE)
# adding background colors for different presidents
name <- c("Truman", "Eisenhower", "Kennedy", "Johnson", "Nixon",
"Ford", "Carter", "Reagan", "Bush I", "Clinton", "Bush II",
"Obama")
start <- as.Date(c("1948-01-01", "1953-01-20", "1961-01-20", "1963-11-22",
"1969-01-20", "1974-08-09", "1977-01-20", "1981-01-20",
"1989-01-20", "1993-01-20", "2001-01-20", "2009-01-20"))
end <- c(start[-1], as.Date("2016-10-01"))
party <- c("D", "R", "D", "D", "R", "R", "D", "R", "R", "D", "R", "D")
pres <- data.frame(name, start, end, party, stringsAsFactors = FALSE)
head(unemp)
p <- ggplot(unemp) +
geom_rect(data = pres,
aes(xmin = start, xmax = end, fill = party),
ymin = -Inf, ymax = Inf, alpha = 0.2) +
geom_vline(aes(data = pres, xintercept = as.numeric(start)), colour = "grey50", alpha = 0.5) +
geom_text(data = pres, aes(x = start, y = 2500, label = name), size = 3, vjust = 0, hjust = 0, nudge_x = 50, check_overlap = TRUE) +
geom_line(data = pres aes(date, unemp)) + geom_rect(data = pres, aes(xmin = start, xmax = end),
ymin = 10000, ymax = Inf, alpha = 0.4, fill = "chartreuse")
Also, the used csv file("unemployment.csv") is like below
date uempmed
<date> <dbl>
1 1948-01-01 4.5
2 1948-02-01 4.7
3 1948-03-01 4.6
4 1948-04-01 4.9
5 1948-05-01 4.7
6 1948-06-01 4.8
What do I do for making the above picture?
Okay, here's a shot.
I slightly rewrote your pres data to fit a tidyverse style, and I created some random unemp data, since you didn't give us any (please do, in the future, as noted in the comments). I got HEX codes from here, which appear to match the ones you show.
Also, note that I'm using scales::label_percent(), which is from the newest scales1.3 release, so you may have to update your scales. Likewise, I don't know what scale your percentage data is on, and you may have to change the scale parameter to label_percent().
With that said, here goes:
library(glue)
library(lubridate)
library(tidyverse)
name <- c("Truman", "Eisenhower", "Kennedy", "Johnson", "Nixon",
"Ford", "Carter", "Reagan", "Bush I", "Clinton", "Bush II",
"Obama")
start <- as_date(c("1948-01-01", "1953-01-20", "1961-01-20", "1963-11-22",
"1969-01-20", "1974-08-09", "1977-01-20", "1981-01-20",
"1989-01-20", "1993-01-20", "2001-01-20", "2009-01-20"))
end <- c(start[-1], as_date("2016-10-01"))
party <- c("D", "R", "D", "D", "R", "R", "D", "R", "R", "D", "R", "D")
pres <- tibble(name, start, end, party)
unemp <- expand_grid(year = 1948:2016, month = 1:12) %>%
transmute(date = as_date(glue("{year}-{month}-01")),
unemployment = rnorm(n(), 5, 0.1) + rep(1:3, each = 100, length.out = n()))
min_unemp <- min(unemp$unemployment)
max_unemp <- max(unemp$unemployment)
ggplot(unemp,
aes(x = date,
y = unemployment)) +
geom_line() +
geom_vline(data = pres,
mapping = aes(xintercept = start),
colour = "grey50",
linetype = "dashed") +
geom_text(data = pres,
mapping = aes(x = start,
y = max_unemp + 0.25,
label = name),
angle = 90,
vjust = 1) +
geom_rect(data = pres,
mapping = aes(xmin = start,
xmax = end,
ymin = min_unemp,
ymax = max_unemp + 0.75,
fill = party),
inherit.aes = FALSE,
alpha = 0.25) +
coord_cartesian(expand = FALSE) +
scale_y_continuous(labels = scales::label_percent(scale = 1)) +
scale_fill_manual(name = "Party of President",
labels = c("Democratic", "Republican"),
values = c("#0015bc", "#ff0000")) +
labs(x = "Date",
y = "Unemplyment Rate") +
theme_minimal() +
theme(legend.position = "bottom")
Created on 2019-11-30 by the reprex package (v0.3.0)
I am trying to align three text labels i.e. mean, median and current value outside the crossbars.I appreciate any help.
My Data
structure(list(variable = structure(1:10, .Label = c("GrossNetEquity",
"GrossTotalEquityPerfAttr", "LongNetEquity", "LongTotalEquity",
"NetEquity", "NetEquityPerfAttr", "NetTotalEquity", "ShortNetEquity",
"ShortTotalEquity", "TotalNetEquity"), class = "factor"), mx = c(134.5,
8.1, 95.6, 106.4, 61, 6.8, 71.6, -21.4, -24.9, 148.7), mn = c(71.1,
-4.6, 49.7, 66.2, 27, -4.1, 36.4, -46.3, -47.4, 96), avg = c(112.173148148148,
1.14814814814815, 77.7388888888889, 84.5111111111111, 43.262037037037,
1.05092592592593, 48.0694444444444, -34.4194444444444, -36.4416666666667,
120.952777777778), sd = c(14.5968093202928, 2.39877232936504,
9.87368667081958, 8.7204382695887, 7.29159953981859, 2.24405738054356,
7.05196278547511, 6.04899711056417, 5.77265751334298, 13.0003483658092
), md = c(114.15, 1.4, 77.35, 82.65, 41.45, 1.25, 46.35, -34.1,
-35.55, 119.75), firstldiff = c(82.9795295075625, -3.64939651058193,
57.9915155472497, 67.0702345719337, 28.6788379573998, -3.4371888351612,
33.9655188734942, -46.5174386655728, -47.9869816933526, 94.9520810461593
), firstlsum = c(141.366766788734, 5.94569280687823, 97.4862622305281,
101.951987650289, 57.8452361166742, 5.53904068701305, 62.1733700153947,
-22.3214502233161, -24.8963516399807, 146.953474509396), secldiff = c(68.3827201872697,
-6.04816883994697, 48.1178288764302, 58.349796302345, 21.3872384175813,
-5.68124621570476, 26.9135560880191, -52.566435776137, -53.7596392066956,
81.9517326803501), seclsum = c(155.963576109027, 8.34446513624327,
107.359948901348, 110.672425919877, 65.1368356564928, 7.78309806755661,
69.2253328008698, -16.2724531127519, -19.1236941266377, 159.953822875205
), value = c(116.1, -1.2, 88, 92.3, 58.8, -1.2, 63, -28.1, -29.3,
121.6), Criteria = c(NA, NA, "", "", "orange", "", "orange",
"orange", "", "orange")), .Names = c("variable", "mx", "mn",
"avg", "sd", "md", "firstldiff", "firstlsum", "secldiff", "seclsum",
"value", "Criteria"), row.names = c(NA, -10L), class = "data.frame")
My Code
I am trying to show Mean, Median and Current Value in the form of bars on geom_crossbar.But finding it hard to align it.
ggplot(df3,aes(variable,mn))+
geom_crossbar(aes(ymin = mn, ymax = mx,fill = Criteria),
width = 0.5,alpha = 0.50,position =position_dodge())+
geom_point(data=df3, aes(x=variable,y=md,group=1),
shape = "|", size = 10,color ="brown1")+
geom_text(data=df3, aes(x=variable, y=md, label = paste("Median",md)),
size = 3, vjust = 2,hjust = -1.0,color = "brown1",
position = position_dodge(width=0.9))+
geom_point(data=df3, aes(x=variable,y=avg,group=1),
shape = "|", size = 10,color = "coral4")+
geom_text(data=df3, aes(x=variable, y=avg, label = paste("Mean",mn)),
size = 3, vjust = 2.5, hjust = -1.0,color ="coral4")+
geom_point(data=df3, aes(x=variable,y=value,group=1),
shape = "|", size = 10,color ="brown1")+
geom_text(data=df3,aes(x=variable, y=value,label = paste("Current Value",value)),
size = 2, vjust = 3, hjust = -1.0,color = "brown1")+
coord_flip()
If you wish to align your geom_text layers, you can assign them the same y value. I've included an example below. I also removed some repetitive parts from your code, where the different layers can inherit the data / aesthetic mappings from the top ggplot() level.
ggplot(df3, aes(variable, mx))+
geom_crossbar(aes(ymin = mn, ymax = mx, fill = Criteria),
width = 0.5, alpha = 0.50, position = position_dodge()) +
# vertical bars
geom_point(aes(y = md), shape = "|", size = 10, color ="brown1") +
geom_point(aes(y = avg), shape = "|", size = 10, color = "coral4") +
geom_point(aes(y = value), shape = "|", size = 10, color ="brown1") +
# labels (vjust used to move the three layers vertically away from one another;
# nudge_y used to shift them uniformly rightwards)
# note that the original label for "Mean" used paste("Mean", mn), but that didn't
# look right to me, since the vertical bar above used avg instead of mn, & mn appears
# to correspond to "min", not "mean".
geom_text(aes(label = paste("Median", md)),
size = 3, vjust = -1, nudge_y = 5, hjust = 0, color = "brown1") +
geom_text(aes(label = paste("Mean", avg)),
size = 3, vjust = 0, nudge_y = 5, hjust = 0, color ="coral4") +
geom_text(aes(label = paste("Current Value", value)),
size = 2, vjust = 1, nudge_y = 5, hjust = 0, color = "brown1") +
coord_flip() +
expand_limits(y = 200) # expand rightwards to give more space for labels
Note: The above follows the approach in your code, which repeats the same geom layers for different columns in the wide format data. In general, ggplot prefers to deal with data in long format. It looks cleaner, and would be easier to maintain as you only need to make changes (e.g. increase font size, change number of decimal places in the label) once, rather than repeat the change for every affected layer. A long format approach to this problem could look like this:
# create long format data frame for labels
df3.labels <- df3 %>%
select(variable, mx, md, avg, value) %>%
tidyr::gather(type, value, -variable, -mx) %>%
mutate(label = paste0(case_when(type == "md" ~ "Median",
type == "avg" ~ "Mean",
TRUE ~ "Current Value"),
": ",
round(value, 2)),
vjust = case_when(type == "md" ~ -1,
type == "avg" ~ 0,
TRUE ~ 1))
# place df3.labels in the top level call, since there are two geom layers that
# use it as the data source, & only one that uses df3.
ggplot(df3.labels,
aes(x = variable, y = value, color = type, label = label)) +
geom_crossbar(data = df3,
aes(x = variable, y = mn, ymin = mn, ymax = mx, fill = Criteria),
inherit.aes = FALSE,
width = 0.5, alpha = 0.50) +
geom_point(shape = "|", size = 10) +
geom_text(aes(y = mx, vjust = vjust), size = 3, nudge_y = 5, hjust = 0) +
# change colour mappings here
scale_color_manual(values = c("md" = "brown1", "avg" = "coral4", "value" = "brown1"),
guide = FALSE) +
coord_flip() +
expand_limits(y = 200)