I have the following data frame. It consists of two columns and ninety-four rows.
library(tidyverse)
ndat <- structure(list(sample_name = c("scFOOBAR_96_S98", "scFOOBAR_20_S22",
"scFOOBAR_83_S85", "scFOOBAR_24_S26", "scFOOBAR_76_S78", "scFOOBAR_72_S74",
"scFOOBAR_19_S21", "scFOOBAR_60_S62", "scFOOBAR_18_S20", "scFOOBAR_23_S25",
"scFOOBAR_92_S94", "scFOOBAR_67_S69", "scFOOBAR_08_S10", "scFOOBAR_77_S79",
"scFOOBAR_27_S29", "scFOOBAR_71_S73", "scFOOBAR_63_S65", "scFOOBAR_80_S82",
"scFOOBAR_36_S38", "scFOOBAR_31_S33", "scFOOBAR_86_S88", "scFOOBAR_82_S84",
"scFOOBAR_22_S24", "scFOOBAR_14_S16", "scFOOBAR_04_S6", "scFOOBAR_30_S32",
"scFOOBAR_10_S12", "scFOOBAR_88_S90", "scFOOBAR_91_S93", "scFOOBAR_46_S48",
"scFOOBAR_25_S27", "scFOOBAR_29_S31", "scFOOBAR_38_S40", "scFOOBAR_34_S36",
"scFOOBAR_51_S53", "scFOOBAR_85_S87", "scFOOBAR_35_S37", "scFOOBAR_79_S81",
"scFOOBAR_95_S97", "scFOOBAR_56_S58", "scFOOBAR_48_S50", "scFOOBAR_52_S54",
"scFOOBAR_03_S5", "scFOOBAR_47_S49", "scFOOBAR_73_S75", "scFOOBAR_87_S89",
"scFOOBAR_40_S42", "scFOOBAR_55_S57", "scFOOBAR_65_S67", "scFOOBAR_43_S45",
"scFOOBAR_41_S43", "scFOOBAR_09_S11", "scFOOBAR_05_S7", "scFOOBAR_33_S35",
"scFOOBAR_90_S92", "scFOOBAR_57_S59", "scFOOBAR_01_S3", "scFOOBAR_94_S96",
"scFOOBAR_70_S72", "scFOOBAR_49_S51", "scFOOBAR_81_S83", "scFOOBAR_75_S77",
"scFOOBAR_68_S70", "scFOOBAR_21_S23", "scFOOBAR_74_S76", "scFOOBAR_64_S66",
"scFOOBAR_17_S19", "scFOOBAR_53_S55", "scFOOBAR_26_S28", "scFOOBAR_78_S80",
"scFOOBAR_06_S8", "scFOOBAR_84_S86", "scFOOBAR_15_S17", "scFOOBAR_66_S68",
"scFOOBAR_28_S30", "scFOOBAR_44_S46", "scFOOBAR_32_S34", "scFOOBAR_50_S52",
"scFOOBAR_54_S56", "scFOOBAR_02_S4", "scFOOBAR_62_S64", "scFOOBAR_69_S71",
"scFOOBAR_07_S9", "scFOOBAR_59_S61", "scFOOBAR_13_S15", "scFOOBAR_45_S47",
"scFOOBAR_37_S39", "scFOOBAR_61_S63", "scFOOBAR_42_S44", "scFOOBAR_11_S13",
"scFOOBAR_58_S60", "scFOOBAR_16_S18", "scFOOBAR_12_S14", "scFOOBAR_39_S41"
), readcount = c(7.5e-05, 0.208259, 0.317617, 0.217022, 0.24163,
0.178144, 0.203187, 0.326574, 0.46154, 0.241296, 3.8e-05, 0.180657,
0.296669, 0.2436, 0.372329, 0.154357, 0.332183, 0.100498, 0.110694,
0.304405, 0.150185, 0.20115, 0.28345, 0.411268, 0.249103, 0.389757,
0.348236, 0.071293, 5.3e-05, 0.383666, 0.221019, 0.368074, 0.164428,
0.121094, 0.056566, 0.12801, 0.045516, 0.054762, 2.3e-05, 0.037221,
0.053614, 0.0308, 0.060173, 0.061752, 0.019005, 0.011073, 0.004948,
0.00827, 0.011163, 0.010636, 0.017856, 0.019902, 0.021611, 0.010224,
2.9e-05, 0.015984, 0.011805, 3.1e-05, 0.017305, 0.00265, 0.018211,
0.010304, 0.011447, 0.033347, 0.011484, 0.015949, 0.042047, 0.005027,
0.033604, 0.019413, 0.032072, 0.010956, 0.012573, 0.014042, 0.021858,
0.01491, 0.017772, 0.008882, 0.016791, 0.022836, 0.023896, 0.012391,
0.026814, 0.011281, 0.015943, 0.01875, 0.010579, 0.017783, 0.019474,
0.016439, 0.015619, 0.009522, 0.009722, 0.011995)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -94L))
ndat
#> # A tibble: 94 x 2
#> sample_name readcount
#> <chr> <dbl>
#> 1 scFOOBAR_96_S98 0.000075
#> 2 scFOOBAR_20_S22 0.208
#> 3 scFOOBAR_83_S85 0.318
#> 4 scFOOBAR_24_S26 0.217
#> 5 scFOOBAR_76_S78 0.242
#> 6 scFOOBAR_72_S74 0.178
#> 7 scFOOBAR_19_S21 0.203
#> 8 scFOOBAR_60_S62 0.327
#> 9 scFOOBAR_18_S20 0.462
#> 10 scFOOBAR_23_S25 0.241
#> # ... with 84 more rows
What I want to do is to make a cumulative plot.
This is what I use:
ggplot(data = ndat, aes(x = 1:dim(ndat)[1], y = cumsum(readcount))) +
geom_line() +
geom_point() +
theme(axis.text.x = element_text(angle=90, hjust = 1)) +
scale_x_discrete(labels = ndat$sample_name) +
ylab("Cumulative read counts (million)") +
xlab("barcode")
This is the result I get:
Notice that the x-axis tick labels are gone, despite of I have this line in my code: scale_x_discrete(labels = ndat$sample_name).
The text like scFOOBAR_96_S98 should appear as the tick label in x-axis.
What's the right way to make the plot?
Here's an approach where I made sample_name into an ordered factor so that it plots in the order of the table row instead of alphabetically.
ndat %>%
mutate(cuml_read = cumsum(readcount),
sample_name = fct_reorder(sample_name, row_number())) %>%
ggplot(aes(x = sample_name, y = cuml_read, group = 1)) +
geom_line() +
geom_point() +
theme(axis.text.x = element_text(angle=90, hjust = 1, size = 6)) +
ylab("Cumulative read counts (million)") +
xlab("barcode")
Edit: OP noted a problem with running the plot in plotly::ggplotly. Here's an alternative to try, which switches from using a factor for the x axis to a continuous numeric scale with labels taken from the sample_name column.
sample_names <- ndat$sample_name
ndat %>%
mutate(cuml_read = cumsum(readcount),
row = row_number(),
sample_name = fct_reorder(sample_name, row_number())) %>%
ggplot(aes(x = row, y = cuml_read, group = 1)) +
geom_line() +
geom_point() +
theme(axis.text.x = element_text(angle=90, hjust = 1, size = 6)) +
scale_x_continuous(breaks = 1:nrow(ndat),
labels = sample_names) +
ylab("Cumulative read counts (million)") +
xlab("barcode")
Based on Small ggplot2 plots placed on coordinates on a ggmap
I would like to have the same solution, but with ggplot function outside the pipeline, applied with purrr::map().
The data for small bar subplots indicating 2 values, may contain
lon, lat, id, valueA, valueB,
After tidyr::gather operation it may look like:
Town, Potential_Sum, lon, lat, component , sales
Aaa, 9.00, 20.80, 54.25, A, 5.000
Aaa, 9.00, 20.80, 54.25, B, 4.000
Bbb, 5.00, 19.60, 50.50, A, 3.000
Bbb, 5.00, 19.60, 50.50, B, 2.000
Current working solution is to use do() to generate sublopts and then ggplotGrob to generate a column with objects "grobs" to be placed at lon,lat locations on a ggmap.
maxSales <- max(df$sales)
df.grobs <- df %>%
do(subplots = ggplot(., aes(1, sales, fill = component)) +
geom_col(position = "dodge", alpha = 0.50, colour = "white") +
coord_cartesian(ylim = c(0, maxSales)) +
scale_fill_manual(values = c("green", "red"))+
geom_text(aes(label=if_else(sales>0,round(sales), NULL)), vjust=0.35,hjust=1.1, colour="black",
position=position_dodge(.9), size=2.5, angle=90)+
theme_void()+ guides(fill = F)) %>%
mutate(subgrobs = list(annotation_custom(ggplotGrob(subplots),
x = lon-0.14, y = lat-0.20,
xmax = lon+0.14, ymax = lat+1.2)))
df.grobs %>%
{p + geom_label(aes(x = 15, y = 49.8, label = "A"), colour = c("black"),fill = "green", size=3)+
geom_label(aes(x = 15, y = 5.01, label = "B"), colour = c("black"),fill = "red", size=3)+
.$subgrobs +
geom_text(data=df, aes(label = Miasto), vjust = 3.5,nudge_x = 0.05, size=2.5) +
geom_col(data = df,
aes(0,0, fill = component),
colour = "white")}
p is a ggmap object, map of Poland, on which I would like to place small plots:
# p <-
# get_googlemap(
# "Poland",
# maptype = "roadmap",
# zoom = 6,
# color = "bw",
# crop = T,
# style = "feature:all|element:labels|visibility:off" # 'feature:administrative.country|element:labels|visibility:off'
# ) %>% # or 'feature:all|element:labels|visibility:off'
# ggmap() + coord_cartesian() +
# scale_x_continuous(limits = c(14, 24.3), expand = c(0, 0)) +
# scale_y_continuous(limits = c(48.8, 55.5), expand = c(0, 0))
#
How to translate this solution to the syntax nest - apply -unnest so that the ggplot part should be outside of the piped expression as a function.
In other words. How to replace do() with map(parameters, GGPlot_function) and then plot grobs on a ggmap .
What I did so far was I tried to write a ggplot function
#----barplots----
maxSales <- max(df$sales)
fn_ggplot <- function (df, x, component, maxX) {
x <- enquo(x)
component <-enquo(component)
maxX <-enquo(maxX)
p <- ggplot(df, aes(1, !!x, fill = !!component)) +
geom_col(position = "dodge", alpha = 0.50, colour = "white") +
coord_cartesian(ylim = c(0, !!maxX)) +
scale_fill_manual(values = c("green", "red"))+
geom_text(aes(label=if_else(x>0,round(!!x), NULL)), vjust=0.35,hjust=1.1, colour="black",
position=position_dodge(.9), size=2.5, angle=90)+
theme_void()+ guides(fill = F)
return(p)
}
And got totaly confused trying to apply it like this (I am a constant beginner unfortunately)... this is not working, showing
df.grobs <- df %>%
mutate(subplots = pmap(list(.,sales,component,Potential_Sum),fn_ggplot)) %>%
mutate(subgrobs = list(annotation_custom(ggplotGrob(subplots),
x = lon-0.14, y = lat-0.20,
xmax = lon+0.14, ymax = lat+1.2)))
I get errors indicating I do not know what I am doing, ie lengths of arguments are incorrect and something else is expected.
message: Element 2 of `.l` must have length 1 or 7, not 2
class: `purrr_error_bad_element_length`
backtrace:
1. dplyr::mutate(...)
12. purrr:::stop_bad_length(...)
13. dplyr::mutate(...)
Call `rlang::last_trace()` to see the full backtrace
> rlang::last_trace()
x
1. +-`%>%`(...)
2. | +-base::withVisible(eval(quote(`_fseq`(`_lhs`)), env, env))
3. | \-base::eval(quote(`_fseq`(`_lhs`)), env, env)
4. | \-base::eval(quote(`_fseq`(`_lhs`)), env, env)
5. | \-global::`_fseq`(`_lhs`)
6. | \-magrittr::freduce(value, `_function_list`)
7. | \-function_list[[i]](value)
8. | +-dplyr::mutate(...)
9. | \-dplyr:::mutate.tbl_df(...)
10. | \-dplyr:::mutate_impl(.data, dots, caller_env())
11. +-purrr::pmap(list(., sales, component, Potential_Sum), fn_ggplot)
12. \-purrr:::stop_bad_element_length(...)
13. \-purrr:::stop_bad_length(...)
data
First let's build some sample data close to yours but reproducible without the need for an api key.
As a starting point we have a plot of a country map stored in p, and some data in long form to build the charts stored in plot_data.
library(maps)
library(tidyverse)
p <- ggplot(map_data("france"), aes(long,lat,group=group)) +
geom_polygon(fill = "lightgrey") +
theme_void()
set.seed(1)
plot_data <- tibble(lon = c(0,2,5), lat = c(44,48,46)) %>%
group_by(lon, lat) %>%
do(tibble(component = LETTERS[1:3], value = runif(3,min=1,max=5))) %>%
mutate(total = sum(value)) %>%
ungroup()
plot_data
# # A tibble: 9 x 5
# lon lat component value total
# <dbl> <dbl> <chr> <dbl> <dbl>
# 1 0 44 A 2.06 7.84
# 2 0 44 B 2.49 7.84
# 3 0 44 C 3.29 7.84
# 4 2 48 A 4.63 11.0
# 5 2 48 B 1.81 11.0
# 6 2 48 C 4.59 11.0
# 7 5 46 A 4.78 11.9
# 8 5 46 B 3.64 11.9
# 9 5 46 C 3.52 11.9
define a plotting function
we isolate the plotting code in a separate function
my_plot_fun <- function(data){
ggplot(data, aes(1, value, fill = component)) +
geom_col(position = position_dodge(width = 1),
alpha = 0.75, colour = "white") +
geom_text(aes(label = round(value, 1), group = component),
position = position_dodge(width = 1),
size = 3) +
theme_void()+ guides(fill = F)
}
build a wrapper
This function takes a data set, some coordinates and the plotting function as parameters, to annotate at the right spot.
annotation_fun <- function(data, lat,lon, plot_fun) {
subplot = plot_fun(data)
sub_grob <- annotation_custom(ggplotGrob(subplot),
x = lon-0.5, y = lat-0.5,
xmax = lon+0.5, ymax = lat+0.5)
}
The final code
The the code becomes simple, using nest and pmap
subgrobs <- plot_data %>%
nest(-lon,-lat) %>%
pmap(annotation_fun,plot_fun = my_plot_fun)
p + subgrobs
I'm a novice with R and ggplot. I recognize the power of R and elegance of ggplot and am trying to learn. Normally, I can find a solution online but have had no luck this time.
I am trying to generate a chart in ggplot comparing Economic Freedom scores with Life Expectancy and Infant mortality using World Bank data (the csv data is included at the bottom of the post). I have had some success using this code (using the example at https://rpubs.com/MarkusLoew/226759):
p <- ggplot(mydata, aes(x = Score))
p <- p + geom_point(aes(y = Longevity, colour = "Life Expectancy"))
p <- p + geom_point(aes(y = Infant/1, colour = "Infant mortality (per
capita)"))
p <- p + scale_y_continuous(sec.axis = sec_axis(~.*1, name = "Infant
mortality (per capita)"))
p <- p + scale_colour_manual(values = c("blue", "red"))
p <- p + labs(y = "Life Expectancy (years)",
x = "Score",
colour = " ")
p
This has produced the following:
my messed up chart
I can't manage to properly scale the primary y-axis. Scaling the graphs as in the example (link above) doesn't work: I just expand out or squash the Longevity data. I tried loading the Longevity data on the secondary y but it still didn't work.
The other issue is that I would like to add LOESS smooth trendlines to each set of data. I have tried following various examples but nothing works.
If anyone has a solution it will be much appreciated!
Thanks
Data:
Country Name,Score,GDP,Infant,Longevity,,,,,,,,,
Afghanistan,48.9,585.850064,53.2,63.673,,,,,,,,,
Albania,64.4,4537.86249,8.1,78.345,,,,,,,,,
Algeria,46.5,4.12E+03,21,76.078,,,,,,,,,
Angola,48.5,4.17E+03,55.8,61.547,,,,,,,,,
Argentina,50.4,1.44E+04,9.7,76.577,,,,,,,,,
Armenia,70.3,3936.79832,11.9,74.618,,,,,,,,,
Australia,81,5.38E+04,3.1,82.5,,,,,,,,,
Austria,72.3,4.73E+04,3,80.8902439,,,,,,,,,
Azerbaijan,63.6,4131.61831,21.9,72.026,,,,,,,,,
Bahrain,68.5,23655.0356,6.4,76.9,,,,,,,,,
Bangladesh,55,1.52E+03,28.3,72.489,,,,,,,,,
Barbados,54.5,16788.6839,11.9,75.906,,,,,,,,,
Belarus,58.6,5726.02967,2.9,73.82682927,,,,,,,,,
Belgium,67.8,4.33E+04,3.1,80.99268293,,,,,,,,,
Belize,58.6,4905.50628,12.8,70.384,,,,,,,,,
Benin,59.2,829.797231,65.1,60.907,,,,,,,,,
Bhutan,58.4,3110.23011,26.5,70.197,,,,,,,,,
Bolivia,47.7,3393.95582,29,69.125,,,,,,,,,
Bosnia and Herzegovina,60.2,5180.6363,5.1,76.911,,,,,,,,,
Botswana,70.1,7595.59585,32.3,66.797,,,,,,,,,
Brazil,52.9,9.82E+03,14.6,75.509,,,,,,,,,
Brunei Darussalam,69.8,28290.5852,9,77.203,,,,,,,,,
Bulgaria,67.9,8031.59844,6.7,74.61463415,,,,,,,,,
Burkina Faso,59.6,670.705913,52.6,60.361,,,,,,,,,
Burundi,53.2,320.08687,44.1,57.481,,,,,,,,,
Cabo Verde,56.9,3209.69112,15.9,72.798,,,,,,,,,
Cambodia,59.5,1384.42319,26.3,68.981,,,,,,,,,
Cameroon,51.8,1446.70289,56.6,58.073,,,,,,,,,
Canada,78.5,4.50E+04,4.6,82.3005122,,,,,,,,,
Central African Republic,51.8,418.411287,89.2,52.171,,,,,,,,,
Chad,49,669.886426,75,52.903,,,,,,,,,
Chile,76.5,1.53E+04,6.6,79.522,,,,,,,,,
China,57.4,8.83E+03,8.6,76.252,,,,,,,,,
Colombia,69.7,6.30E+03,13.1,74.381,,,,,,,,,
Comoros,55.8,797.286368,53.6,63.701,,,,,,,,,
Costa Rica,65,11630.6684,8,79.831,,,,,,,,,
Cote d'Ivoire,63,1662.44247,66,53.582,,,,,,,,,
Croatia,59.4,13294.5149,4,78.02195122,,,,,,,,,
Cyprus,67.9,25233.571,2.2,80.508,,,,,,,,,
Czech Republic,73.3,2.04E+04,2.6,78.33170732,,,,,,,,,
Denmark,75.1,5.63E+04,3.7,80.70487805,,,,,,,,,
Djibouti,46.7,1927.58971,53,62.465,,,,,,,,,
Dominica,63.7,7609.61435,30.4,,,,,,,,,,
Dominican Republic,62.9,7052.25884,25.6,73.861,,,,,,,,,
Ecuador,49.3,6.20E+03,12.7,76.327,,,,,,,,,
"Egypt, Arab Rep.",52.6,2.41E+03,19.4,71.484,,,,,,,,,
El Salvador,64.1,3889.30877,12.9,73.512,,,,,,,,,
Equatorial Guinea,45,9850.01358,67.4,57.681,,,,,,,,,
Estonia,79.1,19704.655,2.3,77.73658537,,,,,,,,,
Ethiopia,52.7,767.563478,42.5,65.475,,,,,,,,,
Fiji,63.4,5589.38883,21.1,70.269,,,,,,,,,
Finland,74,4.57E+04,1.9,81.7804878,,,,,,,,,
France,63.3,3.85E+04,3.5,82.27317073,,,,,,,,,
Gabon,58.6,7220.68724,36.1,66.105,,,,,,,,,
Georgia,76,4078.25488,10.2,73.261,,,,,,,,,
Germany,73.8,4.45E+04,3.2,80.64146341,,,,,,,,,
Ghana,56.2,1641.48662,37.2,62.742,,,,,,,,,
Greece,55,1.86E+04,4.2,81.03658537,,,,,,,,,
Guatemala,63,4470.98957,23.9,73.409,,,,,,,,,
Guinea,47.6,825.34493,58.1,60.015,,,,,,,,,
Guinea-Bissau,56.1,723.658622,57.4,57.403,,,,,,,,,
Guyana,58.5,4725.31906,26.7,66.65,,,,,,,,,
Haiti,49.6,765.683925,55,63.33,,,,,,,,,
Honduras,58.8,2480.12593,16.2,73.575,,,,,,,,,
"Hong Kong SAR, China",88.6,4.62E+04,,84.22682927,,,,,,,,,
Hungary,65.8,1.42E+04,4.1,75.56829268,,,,,,,,,
Iceland,74.4,70056.8734,1.7,82.46829268,,,,,,,,,
This should give you a good start. You can play around with scale_ratio & dif if you want to
library(tidyverse)
mydata <- read_csv(text, col_types = paste0(c("c", rep("d", 4), rep("_", 9)), collapse = ""))
mydata
#> # A tibble: 67 x 5
#> `Country Name` Score GDP Infant Longevity
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Afghanistan 48.9 586. 53.2 63.7
#> 2 Albania 64.4 4538. 8.1 78.3
#> 3 Algeria 46.5 4120 21 76.1
#> 4 Angola 48.5 4170 55.8 61.5
#> 5 Argentina 50.4 14400 9.7 76.6
#> 6 Armenia 70.3 3937. 11.9 74.6
#> 7 Australia 81 53800 3.1 82.5
#> 8 Austria 72.3 47300 3 80.9
#> 9 Azerbaijan 63.6 4132. 21.9 72.0
#> 10 Bahrain 68.5 23655. 6.4 76.9
#> # ... with 57 more rows
Calculate ratios needed to scale the two y-axes
scale_ratio <- (max(mydata$Infant, na.rm = TRUE) - min(mydata$Infant, na.rm = TRUE)) /
(max(mydata$Longevity, na.rm = TRUE) - min(mydata$Longevity, na.rm = TRUE))
dif <- min(mydata$Longevity, na.rm = TRUE) - min(mydata$Infant, na.rm = TRUE)
myColor <- c("#d95f02", "#1b9e77")
p <- ggplot(mydata, aes(x = Score, y = Longevity)) +
geom_point(aes(colour = "Life Expectancy"),
shape = "triangle",
alpha = 0.7, size = 2) +
geom_point(aes(y = Infant/scale_ratio + dif,
colour = "Infant mortality (per capita)"),
alpha = 0.7, size = 2) +
scale_y_continuous(sec.axis = sec_axis(~ (. - dif) * scale_ratio,
name = "Infant mortality (per capita)")) +
scale_colour_manual(values = myColor) +
theme_bw(base_size = 14) +
labs(y = "Life Expectancy (years)",
x = "Score",
colour = " ") +
guides(colour = guide_legend(title = "",
override.aes = list(shape = c("circle", "triangle")))) +
theme(legend.position = 'bottom') +
NULL
p
Add fitted lines and their corresponding equations/R2
### https://docs.r4photobiology.info/ggpmisc/articles/user-guide.html
library(ggpmisc)
formula <- y ~ poly(x, 2, raw = TRUE)
p +
stat_smooth(aes(y = Longevity),
method = "lm", formula = formula, se = FALSE, size = 1, color = myColor[2]) +
stat_smooth(aes(y = Infant/scale_ratio + dif),
method = "lm", formula = formula, se = FALSE, size = 1, color = myColor[1]) +
stat_poly_eq(aes(y = Longevity,
label = paste(..eq.label.., ..adj.rr.label..,
sep = "~~italic(\"with\")~~")),
geom = "text", alpha = 0.7,
formula = formula, parse = TRUE,
color = myColor[2],
label.x.npc = 0.5,
label.y.npc = 0.95) +
stat_poly_eq(aes(y = Infant/scale_ratio + dif,
label = paste(..eq.label.., ..adj.rr.label..,
sep = "~~italic(\"with\")~~")),
geom = "text", alpha = 0.7,
color = myColor[1],
formula = formula, parse = TRUE,
label.x.npc = 0.75,
label.y.npc = 0.15) +
NULL
Created on 2018-10-07 by the reprex package (v0.2.1.9000)