I am trying to align three text labels i.e. mean, median and current value outside the crossbars.I appreciate any help.
My Data
structure(list(variable = structure(1:10, .Label = c("GrossNetEquity",
"GrossTotalEquityPerfAttr", "LongNetEquity", "LongTotalEquity",
"NetEquity", "NetEquityPerfAttr", "NetTotalEquity", "ShortNetEquity",
"ShortTotalEquity", "TotalNetEquity"), class = "factor"), mx = c(134.5,
8.1, 95.6, 106.4, 61, 6.8, 71.6, -21.4, -24.9, 148.7), mn = c(71.1,
-4.6, 49.7, 66.2, 27, -4.1, 36.4, -46.3, -47.4, 96), avg = c(112.173148148148,
1.14814814814815, 77.7388888888889, 84.5111111111111, 43.262037037037,
1.05092592592593, 48.0694444444444, -34.4194444444444, -36.4416666666667,
120.952777777778), sd = c(14.5968093202928, 2.39877232936504,
9.87368667081958, 8.7204382695887, 7.29159953981859, 2.24405738054356,
7.05196278547511, 6.04899711056417, 5.77265751334298, 13.0003483658092
), md = c(114.15, 1.4, 77.35, 82.65, 41.45, 1.25, 46.35, -34.1,
-35.55, 119.75), firstldiff = c(82.9795295075625, -3.64939651058193,
57.9915155472497, 67.0702345719337, 28.6788379573998, -3.4371888351612,
33.9655188734942, -46.5174386655728, -47.9869816933526, 94.9520810461593
), firstlsum = c(141.366766788734, 5.94569280687823, 97.4862622305281,
101.951987650289, 57.8452361166742, 5.53904068701305, 62.1733700153947,
-22.3214502233161, -24.8963516399807, 146.953474509396), secldiff = c(68.3827201872697,
-6.04816883994697, 48.1178288764302, 58.349796302345, 21.3872384175813,
-5.68124621570476, 26.9135560880191, -52.566435776137, -53.7596392066956,
81.9517326803501), seclsum = c(155.963576109027, 8.34446513624327,
107.359948901348, 110.672425919877, 65.1368356564928, 7.78309806755661,
69.2253328008698, -16.2724531127519, -19.1236941266377, 159.953822875205
), value = c(116.1, -1.2, 88, 92.3, 58.8, -1.2, 63, -28.1, -29.3,
121.6), Criteria = c(NA, NA, "", "", "orange", "", "orange",
"orange", "", "orange")), .Names = c("variable", "mx", "mn",
"avg", "sd", "md", "firstldiff", "firstlsum", "secldiff", "seclsum",
"value", "Criteria"), row.names = c(NA, -10L), class = "data.frame")
My Code
I am trying to show Mean, Median and Current Value in the form of bars on geom_crossbar.But finding it hard to align it.
ggplot(df3,aes(variable,mn))+
geom_crossbar(aes(ymin = mn, ymax = mx,fill = Criteria),
width = 0.5,alpha = 0.50,position =position_dodge())+
geom_point(data=df3, aes(x=variable,y=md,group=1),
shape = "|", size = 10,color ="brown1")+
geom_text(data=df3, aes(x=variable, y=md, label = paste("Median",md)),
size = 3, vjust = 2,hjust = -1.0,color = "brown1",
position = position_dodge(width=0.9))+
geom_point(data=df3, aes(x=variable,y=avg,group=1),
shape = "|", size = 10,color = "coral4")+
geom_text(data=df3, aes(x=variable, y=avg, label = paste("Mean",mn)),
size = 3, vjust = 2.5, hjust = -1.0,color ="coral4")+
geom_point(data=df3, aes(x=variable,y=value,group=1),
shape = "|", size = 10,color ="brown1")+
geom_text(data=df3,aes(x=variable, y=value,label = paste("Current Value",value)),
size = 2, vjust = 3, hjust = -1.0,color = "brown1")+
coord_flip()
If you wish to align your geom_text layers, you can assign them the same y value. I've included an example below. I also removed some repetitive parts from your code, where the different layers can inherit the data / aesthetic mappings from the top ggplot() level.
ggplot(df3, aes(variable, mx))+
geom_crossbar(aes(ymin = mn, ymax = mx, fill = Criteria),
width = 0.5, alpha = 0.50, position = position_dodge()) +
# vertical bars
geom_point(aes(y = md), shape = "|", size = 10, color ="brown1") +
geom_point(aes(y = avg), shape = "|", size = 10, color = "coral4") +
geom_point(aes(y = value), shape = "|", size = 10, color ="brown1") +
# labels (vjust used to move the three layers vertically away from one another;
# nudge_y used to shift them uniformly rightwards)
# note that the original label for "Mean" used paste("Mean", mn), but that didn't
# look right to me, since the vertical bar above used avg instead of mn, & mn appears
# to correspond to "min", not "mean".
geom_text(aes(label = paste("Median", md)),
size = 3, vjust = -1, nudge_y = 5, hjust = 0, color = "brown1") +
geom_text(aes(label = paste("Mean", avg)),
size = 3, vjust = 0, nudge_y = 5, hjust = 0, color ="coral4") +
geom_text(aes(label = paste("Current Value", value)),
size = 2, vjust = 1, nudge_y = 5, hjust = 0, color = "brown1") +
coord_flip() +
expand_limits(y = 200) # expand rightwards to give more space for labels
Note: The above follows the approach in your code, which repeats the same geom layers for different columns in the wide format data. In general, ggplot prefers to deal with data in long format. It looks cleaner, and would be easier to maintain as you only need to make changes (e.g. increase font size, change number of decimal places in the label) once, rather than repeat the change for every affected layer. A long format approach to this problem could look like this:
# create long format data frame for labels
df3.labels <- df3 %>%
select(variable, mx, md, avg, value) %>%
tidyr::gather(type, value, -variable, -mx) %>%
mutate(label = paste0(case_when(type == "md" ~ "Median",
type == "avg" ~ "Mean",
TRUE ~ "Current Value"),
": ",
round(value, 2)),
vjust = case_when(type == "md" ~ -1,
type == "avg" ~ 0,
TRUE ~ 1))
# place df3.labels in the top level call, since there are two geom layers that
# use it as the data source, & only one that uses df3.
ggplot(df3.labels,
aes(x = variable, y = value, color = type, label = label)) +
geom_crossbar(data = df3,
aes(x = variable, y = mn, ymin = mn, ymax = mx, fill = Criteria),
inherit.aes = FALSE,
width = 0.5, alpha = 0.50) +
geom_point(shape = "|", size = 10) +
geom_text(aes(y = mx, vjust = vjust), size = 3, nudge_y = 5, hjust = 0) +
# change colour mappings here
scale_color_manual(values = c("md" = "brown1", "avg" = "coral4", "value" = "brown1"),
guide = FALSE) +
coord_flip() +
expand_limits(y = 200)
Related
My goal is to use a heat map to plot the data below. I have obtained the layout of the heat map I wanted, but I am struggling to find a way to color the tiles as intended. My guess is that the code below colors the tiles proportional to the number inside the tile itself. However, this is not what I need. I would like to:
have the same color within each of the rectangles (yellow for the rectangle on the left and blue for the rectangle on the right);
a gradient from yellow to blue for the other tiles proportional to how far away the tiles are from the blue area.
Thanks to anyone who will help!
library(tidyverse)
# First, I create the simulated dataset with 200 individuals
set.seed(1243) # set seed for reproducibility
#I simulate test 1 scores
test1_score <- sample(c(3.5, 3.8, 4), 200, replace = TRUE)
#I simulate test 2 scores
test2_score <- round(runif(200, 0, 38))
#I create diagnostic classes based on test 2 scores
test2_class <- ifelse(test2_score < 20, "non meet",
ifelse(test2_score < 30, "below 40th",
ifelse(test2_score < 35, "above 40th",
ifelse(test2_score < 37, "meet", "master"))))
#I create exit_program variable based on test 1 scorea and test 2 classes
exit_program <- ifelse(test1_score == 4 & test2_class %in% c("above 40th", "meet", "master"), 1, 0)
#I create data frame with simulated data
df <- data.frame(ID = 1:200, test1_score, test2_class, exit_program)
#I calculate the group size for each combination of test 1 scores and test 2 classes
df2 <- df |>
group_by(test1_score, test2_class, exit_program) |>
summarize(n = n()) |>
mutate(test2_class = factor(test2_class, levels = c("non meet", "below 40th", "above 40th", "meet", "master"))) |> arrange(test2_class)
#plot data
df2 |>
ggplot(aes(x = test2_class, y = as.factor(test1_score))) +
theme(legend.position = "none", panel.background = element_rect(fill = "white"),
axis.text = element_text(size = 12), axis.title = element_text(size = 14)) +
geom_tile(aes(fill = n)) +
geom_text(aes(label = n), size = 7, family = "sans") +
labs(x = "classification", y = "scores", size = 10) +
scale_fill_gradient(low = "#56B4E9", high = "#F0E442", guide = "none") +
geom_rect(aes(xmin = 2.5, xmax = 5.5, ymin = 2.5, ymax = 3.5), linewidth = 2, color = "#0072B2", fill = NA) +
geom_rect(aes(xmin = 0.5, xmax = 1.5, ymin = 0.5, ymax = 3.5), linewidth = 2, color = "#E69F00", fill = NA)
Maybe this is what you are looking for. You are right. Mapping n on fill means to color the tiles by the value of n.
As I understand the question you want each "column" to have the same color and colored with a gradient from yellow on the left to blue on the right. To this end you could add a column with a measure of the distance from the left or the right end to your data which could then be mapped on the fill aes. One option would be to convert your test2_class to a numeric then compute the absolute distance from the maximum value (which corresponds to the "master" level).
library(tidyverse)
df2 <- df2 |>
ungroup() |>
mutate(
fill = as.numeric(test2_class),
fill = abs(fill - max(fill))
)
#plot data
df2 |>
ggplot(aes(x = test2_class, y = as.factor(test1_score))) +
theme(legend.position = "none", panel.background = element_rect(fill = "white"),
axis.text = element_text(size = 12), axis.title = element_text(size = 14)) +
geom_tile(aes(fill = fill)) +
geom_text(aes(label = n), size = 7, family = "sans") +
labs(x = "classification", y = "scores", size = 10) +
scale_fill_gradient(low = "#56B4E9", high = "#F0E442", guide = "none") +
geom_rect(aes(xmin = 2.5, xmax = 5.5, ymin = 2.5, ymax = 3.5), linewidth = 2, color = "#0072B2", fill = NA) +
geom_rect(aes(xmin = 0.5, xmax = 1.5, ymin = 0.5, ymax = 3.5), linewidth = 2, color = "#E69F00", fill = NA)
I want to show covered ranges (including overlaps) and (after some failures with stacked bar plots) I chose geom_rect. The following code works well for one type.
library(tidyverse)
# create dummy data
foo <- tibble(start = c(1, 150, 140, 75, 300),
end = c(150, 180, 170, 160, 400))
ggplot() +
geom_rect(data = foo, aes(xmin = start, xmax = end, ymin = 0, ymax = 1), fill = "green", linetype = "blank", alpha = 0.3) +
geom_rect(data = foo, aes(xmin = 1, xmax = max(end), ymin = 0, ymax = 1), fill = NA, colour = "black") +
scale_y_continuous(name = "", breaks = NULL, limits = c(0, 1)) +
scale_x_continuous(name = "", breaks = NULL) +
theme_minimal() +
theme(panel.grid = element_blank())
If I add more data (only one more type, but in the original data I do have some more) like below, I can add the data "by hand", i.e. add two lines of code for each type, but I'm looking for a way to do this by grouping, but didn't succeed.
foo <- foo %>%
mutate(type = "A", .before = 1)
bar <- tibble(type = "B",
start = c(1, 30, 40, 100, 150, 200, 310),
end = c(20, 50, 100, 120, 200, 300, 380))
foo <- bind_rows(foo, bar)
ggplot() +
geom_rect(data = foo %>% filter(type == "A"), aes(xmin = start, xmax = end, ymin = 0, ymax = 1), fill = "green", linetype = "blank", alpha = 0.3) +
geom_rect(data = foo, aes(xmin = 1, xmax = max(end), ymin = 0, ymax = 1), fill = NA, colour = "black") +
geom_rect(data = foo %>% filter(type == "B"), aes(xmin = start, xmax = end, ymin = 2, ymax = 3), fill = "green", linetype = "blank", alpha = 0.3) +
geom_rect(data = foo, aes(xmin = 1, xmax = max(end), ymin = 2, ymax = 3), fill = NA, colour = "black") +
scale_y_continuous(name = "", breaks = NULL, limits = c(0, 3)) +
scale_x_continuous(name = "", breaks = NULL) +
geom_text(aes(x = c(0, 0), y = c(0.5, 2.5), label = c("A", "B")), size = 4, hjust = 2) +
theme_minimal() +
theme(panel.grid = element_blank())
So, the graph already looks the way I want, but I'd prefer to get here by using grouping (or any other non-manual way).
Maybe there's also a different geom or method to get this kind of graph?
You can write a small helper function that positions a categorical value in continuous space. Example below.
helper <- function(x) {(match(x, sort(unique(x))) - 1) * 2}
ggplot(foo) +
geom_rect(
aes(xmin = start, xmax = end,
ymin = helper(type),
ymax = helper(type) + 1),
fill = "green", linetype = "blank", alpha = 0.3
) +
geom_rect(
aes(xmin = min(start), xmax = max(end),
ymin = helper(type),
ymax = helper(type) + 1),
fill = NA, colour = "black"
) +
scale_y_continuous(name = "", breaks = NULL, limits = c(0, 3)) +
scale_x_continuous(name = "", breaks = NULL) +
annotate(
"text", x = c(0, 0), y = c(0.5, 2.5), label = c("A", "B"),
size = 4, hjust = 2
) +
theme_minimal() +
theme(panel.grid = element_blank())
how do i correct the span of the y-axis, since the first diagramme has bigger range but the last diagramme has smaller range.
I tried using expand_limit() , but i have to define the limit. what i want is to specify my limit based on the mean value +- 10%
Data
structure(list(height = structure(1:21, .Label = c("_150_5_",
"_150_4_", "01_150_3_", "01_150_2_", "_150_1_",
"01_130_5_", "01_130_4_", "01_130_3_", "01_130_2_",
"L01_130_1_", "01_100_5_", "01_100_4_", "01_100_3_",
"01_100_2_", "01_100_1_", "01_60_5_", "01_60_4_",
"01_60_3_", "01_60_2_", "01_60_1_", "01_30_5_"
), class = "factor"), max = c(153.502609757564, 153.803890640307,
154.030628562627, 153.502609757564, 153.577819267489, 133.497584806195,
133.440753139611, 133.896765965376, 134.068575331457, 133.725396384362,
102.872441458794, 103.347289523556, 103.279185873129, 101.048462000305,
102.035263387027, 60.852713866229, 60.8645299271739, 60.9236791302129,
60.8763505777715, 61.0542129187662, 30.8972231764362), mean = c(152.038047221229,
151.858031107105, 152.211206935181, 151.759867764584, 150.344389742043,
131.874101333396, 131.706179220053, 131.043612919162, 132.264362261993,
130.599623937693, 101.774080628225, 102.110144624754, 102.239940146821,
100.053415273797, 100.577556727676, 60.299452319695, 60.3004949199648,
60.3066081777292, 60.3048844335163, 60.3267015589117, 30.347932670538
), min = c(150.120847282062, 148.344689600069, 148.767123457497,
148.20441093378, 146.06352708525, 129.15217516479, 129.258692422658,
127.367870428665, 129.418798152331, 127.006616339119, 99.7938010585627,
100.401130405172, 101.081047766832, 98.2917306757434, 99.1623945349401,
59.7507299132569, 59.7507299132569, 59.8077330900488, 59.7507299132569,
59.8191467795698, 29.7732075536612), sd = c(0.384120348675233,
0.996143559832467, 0.892389162104352, 0.668245088780541, 1.26871400480022,
0.717796939735463, 0.841062860547558, 1.09283360068465, 0.801961749792679,
1.40866403449516, 0.370811042540416, 0.387499052903713, 0.273143219592094,
0.372612511324188, 0.448178158096896, 0.141781338201885, 0.143328065432486,
0.140326202644008, 0.141854728955873, 0.139981570704421, 0.155319872754675
)), class = "data.frame", row.names = c(NA, -21L))
the code i have tried
ii=1
k=0
plot_list_stat=list()
par(mfcol = c(5, 1))
for (i in 1:4 ){
k=k+1
plot_list_stat[[ii]]=ggplot(stat.std_w[k:(k+4),],aes(x=height,y=mean,group=1))+
geom_ribbon(aes(x=height,ymax=max,ymin=min,color="min-max"
),alpha=0.6,fill= "skyblue",show.legend=TRUE)+
scale_fill_manual("",values ="skyblue", guide = FALSE)+
geom_line()+ylab("")+
geom_point()+
theme(axis.text.x = element_text(angle = 90))+
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd,color="mean±sd"
), width=.2,position=position_dodge(0.05))+theme(legend.position = "none")
print(ii); #plot_list_stat[[ii]]
ii=ii+1;k=k+4;)
library("cowplot")
pgrid=plot_grid(plotlist=plot_list_stat,nrow=1)
library(ggpubr)
pgrid=ggarrange(pgrid,common.legend = T)
annotate_figure(pgrid,
top = text_grob("statistic ", size = 14),
left = text_grob(" mean",rot = 90))
By default the plot area will be expanded by a small amount from the extreme values in the data. There are a number of mechanisms for manually overriding this behavior depending on your needs. In generaly they are much harder to implement in faceted plots, but the visual you're after really calls for faceting so below I'm showing a solution with geom_blank() which will work with this example.
library(tidyverse)
# parse text in 'height' variable to be sortable
df2 <- df %>%
separate(col = height, into = c("sub_grp", "grp", "order"), remove = F, fill = "left") %>%
mutate(grp = fct_rev(factor(as.numeric(grp))),
order = fct_rev(factor(as.numeric(order))))
#> Warning: Expected 3 pieces. Additional pieces discarded in 21 rows [1, 2, 3, 4,
#> 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# set fraction to expand plot by
# these are based on the requested +/- 10% in the question
fract_up <- 1.1
fract_down <- 0.9
# extract table of limits to expand plot by
df3 <- df2 %>%
group_by(grp) %>%
summarise(grp_mean = mean(mean), grp_order = mean(as.numeric(order))) %>%
mutate(grp_top = fract_up * grp_mean, grp_bot = fract_down * grp_mean, .keep = "unused") %>%
pivot_longer(-c(grp, grp_order), names_to = "type", names_prefix = "grp_")
# plot it all together
df2 %>%
ggplot(aes(x = order, y = mean, group = grp)) +
geom_ribbon(
aes(
ymax = max,
ymin = min,
color = "min-max",
group = grp
),
alpha = 0.6,
fill = "skyblue"
) +
geom_line() +
geom_point() +
geom_errorbar(
aes(
ymin = mean - sd,
ymax = mean + sd,
color = "mean±sd"
),
width = .2,
position = position_dodge(0.05)
) +
geom_blank(data = df3, aes(x = grp_order, y = value)) +
facet_wrap(facets = vars(grp),
scales = "free",
nrow = 1) +
scale_x_discrete(NULL, labels = df2$height) +
ggtitle("Standard statistic") +
ylab("Mean") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
plot.title = element_text(hjust = 0.5))
#> geom_path: Each group consists of only one observation. Do you need to adjust
#> the group aesthetic?
Created on 2021-03-15 by the reprex package (v1.0.0)
I'd use facets instead. With this for looping you're kind of making your own life really complicated.
library(tidyverse)
df %>%
# makes new variables
separate(height, into = c("x", "height", "index")) %>%
ggplot(aes(x = index, y = mean, group = 1)) +
geom_ribbon(aes(x = index, ymax = max, ymin = min, color = "min-max"), alpha = 0.6, fill = "skyblue", show.legend = TRUE) +
geom_line() +
geom_point() +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = "mean±sd"), width = .2, position = position_dodge(0.05)) +
scale_fill_manual(values = "skyblue", guide = FALSE) +
labs(y = NULL) + # use NULL, not "" !!
facet_grid(~height) +
theme(axis.text.x = element_text(angle = 90), legend.position = "none")
#> Warning: Expected 3 pieces. Additional pieces discarded in 21 rows [1, 2, 3, 4,
#> 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
#> geom_path: Each group consists of only one observation. Do you need to adjust
#> the group aesthetic?
Created on 2021-03-15 by the reprex package (v1.0.0)
I personally would prefer the same scale in this case, but if you want flexible y scales, use facet_wrap instead
df %>%
# makes new variables
separate(height, into = c("x", "height", "index")) %>%
ggplot(aes(x = index, y = mean, group = 1)) +
geom_ribbon(aes(x = index, ymax = max, ymin = min, color = "min-max"), alpha = 0.6, fill = "skyblue", show.legend = TRUE) +
geom_line() +
geom_point() +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = "mean±sd"), width = .2, position = position_dodge(0.05)) +
scale_fill_manual(values = "skyblue", guide = FALSE) +
labs(y = NULL) +
facet_wrap(~height, scales = "free_y", nrow = 1) +
theme(axis.text.x = element_text(angle = 90), legend.position = "none")
I am trying to add different colors to a text in ggplot using the function annotate().
The results are quite good, but I have to define manually which are the right y values to correctly overlap the text.
Plot with annotations for
I would love to know if there is a better way to overlap text in annotations in R.
Thank you,
Btw, here is the code I am using:
ex_3_1 %>%
ggplot(aes(x = DATE)) +
# geometries
geom_line(aes(y = if_else(ORIGIN == "ACTUAL" |
(YEAR == 2019 & MONTH == "JUN"),
SALES, NULL)), size = 1) +
geom_line(aes(y = if_else(ORIGIN == "FORECAST", SALES, NULL)),
linetype = "dashed", size = 1) +
geom_point(aes(y = REL_SALES), size = 3) +
geom_point(aes(y = if_else(MONTH == "JUL" & YEAR == 2018, SALES, NULL)),
shape = 21, fill = "darkorange", size = 3) +
geom_point(aes(y = if_else(MONTH == "FEB" & YEAR == 2019, SALES, NULL)),
shape = 21, fill = col, size = 3) +
geom_text(aes(y = SALES, label = dollar(round(REL_SALES,1),
suffix = "B", accuracy = 0.1)),
vjust = -1.5, hjust = 0.2, size = 3) +
# annotations
# square text
annotate(geom = "rect", xmin = as_date("2019-05-20"),
xmax = as_date("2020-01-10"), ymin = 0, ymax = 2.6,
alpha = 0.1) +
annotate(geom = "text",
x = as_date("2020-01-01"), y = 1, hjust = 1, vjust = -1,
label = expression(bold("2019 FORECAST")),
col = "gray60", size = 3.25) +
annotate(geom = "text",
x = as_date("2020-01-01"), y = 1, hjust = 1, vjust = 1,
label = paste0("This is provided by ABC\n",
"consultants and based on\n",
"market data through June.\n",
"The forecast assumes no\n",
"major market changes.\n"),
col = "gray60", size = 3.5) +
# 2018 notes
annotate(
geom = "text", x = as_date("2018-01-10"), y = 3.5, hjust = 0, vjust = 1,
label = paste0("2018: Jan-Jun was a period of stability, with\n",
"fairly steady growth (averaging +3% per\n",
"month). There was a nearly 20% decrease\n",
"in July, when Product X was recalled and\n",
"pulled from the market. Total sales remained\n",
"at reduced volume for the rest of the year."),
col = "gray60", size = 3.5) +
annotate(
geom = "text", x = as_date("2018-01-10"), y = 3.5, hjust = 0, vjust = 1,
label = expression(bold("2018:")),
col = "gray60", size = 3.5) +
annotate(
geom = "text", x = as_date("2018-01-10"), y = 3.19, hjust = 0, vjust = 1,
label = expression(phantom("month). There was a ")*
"nearly 20% decrease"),
size = 3.5, col = "darkorange") +
annotate(
geom = "text", x = as_date("2018-01-10"), y = 3.03, hjust = 0, vjust = 1,
label = "in July",
size = 3.5, col = "darkorange") +
# 2019 notes
annotate(
geom = "text", x = as_date("2019-01-10"), y = 3.5, hjust = 0, vjust = 1,
label = paste0("2019: The year started at less than $1.6B, but\n",
"Increased markedly in February, when a new\n",
"study was released. Total sales have increased\n",
"steadly since then and this projected to continue.\n",
"The latest forecast is for $2.4B in monthly sales by\n",
"the end of the year."),
col = "gray60", size = 3.5) +
annotate(
geom = "text", x = as_date("2019-01-10"), y = 3.5, hjust = 0, vjust = 1,
label = expression(bold("2019:")),
col = "gray60", size = 3.5) +
annotate(
geom = "text", x = as_date("2019-01-10"), y = 3.35, hjust = 0, vjust = 1,
label = "Increased markedly in February",
size = 3.5, col = col) +
# scales
scale_x_date(date_labels = "%b'%y", date_breaks = "3 month") +
scale_y_continuous(labels = dollar, breaks = c(seq(0,3.5,0.5)),
limits = c(0, 3.5)) +
# titles
labs("Market size over time") +
ylab("SALES ($USD BILLIONS)") +
# themes
theme_void() +
theme(
axis.line.x = element_line(color = "gray58"),
axis.text.y = element_text(size = 11, color = "gray58"),
axis.title.y = element_text(hjust = 1, color = "gray58"),
axis.text.x = element_text(size = 9, color = "gray58")
)
I have got this data set and want to generate a sunburst plot. The data is of 4 columns which are unit, weight, year16 and year17. The sunburst is based on the values in the weight column. The code is there and when adding the coding for the third layer it is giving me an error. I think the error is coming when I am adding the third layer.
library("ggnewscale")
library(ggplot2)
#read file
weight.eg = read.csv("Dummy Data.csv", header = FALSE, sep =
";",encoding = "UTF-8")
#change column names
colnames(weight.eg) <- c
("unit","weight","year16","year17")
#check the class
sapply(weight.eg, class)
#View(weight.eg)
#as weight column is factor change into integer
weight.eg$weight = as.numeric(levels(weight.eg$weight))
[as.integer(weight.eg$weight)]
weight.eg$year16 = as.numeric(levels(weight.eg$year16))
[as.integer(weight.eg$year16)]
weight.eg$year17 = as.numeric(levels(weight.eg$year17))
[as.integer(weight.eg$year17)]
#Nas are introduced, remove
weight.eg <- na.omit(weight.eg)
#Sum of the total weight
sum_total_weight = sum(weight.eg$weight)
#First layer
firstLevel = weight.eg %>% summarize(total_weight=sum(weight))
cs_fun <- function(x){(cumsum(x) + c(0, cumsum(head(x , -1))))/ 2}
ggplot(weight.eg) +
geom_col(data = firstLevel,
aes(x = 1, y = total_weight)) +
geom_text(data = firstLevel,
aes(x = 1, y = total_weight / 2,
label = paste("Total Weight:", total_weight)),
colour = "black") +
geom_col(aes(x = 2,
y = weight, fill = weight),
colour = "black", size = 0.6) +
scale_fill_gradient(name = "Weight",
low = "white", high = "lightblue") +
# Open up new fill scale for next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 3, y = weight, fill = year16),
size = 0.6, colour = "black") +
scale_fill_gradient(name = "Year16",
low = "red", high = "green") +
geom_text(aes(label = paste0(unit,year16), x = 3,
y = cs_fun(weight))) +
#next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 4, y = weight, fill = year17),
size = 0.6, colour = "black") +
scale_fill_gradient(name = "Year17",
low = "red", high = "green") +
geom_text(aes(label = paste0(unit,year17), x = 4,
y = cs_fun(weight))) +
coord_polar(theta = "y")
The output for dput(weight.eg) is
structure(list(unit = structure(1:6, .Label = c("A", "B", "C",
"D", "E", "F", "Unit"), class = "factor"), weight = c(30, 25,
10, 17, 5, 13), year16 = c(70, 80, 50, 30, 60, 40), year17 = c(50,
100, 20, 30, 70, 60)), .Names = c("unit", "weight", "year16",
"year17"), row.names = 2:7, class = "data.frame", na.action =
structure(1L, .Names = "1", class = "omit"))
I want to include year17 as well and in the future there will be
columns, so that has to be added as well. Because of the error I
am not able to figure out what is wrong.