Adding a point to box plots - r

I made a box plot of the 3 replicates of each 40s and 80s (first 6 columns), with 40s in red and 80s in blue. I would now like to add the 7th column which is labelled "control" as a point on each pair of box plots to the corresponding "gene" in a different such that the resulting plot would have 2 box plots for 40s and 80s and a control point for each entry in the "gene" column
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
marte <- tribble(
~gene, ~rep1_40s, ~rep2_40s, ~rep3_40s, ~rep1_80s, ~rep2_80s, ~rep3_80s, ~control,
"AAA", 0.021383202, 0.016654469, 0.022484448, 0.025311535, 0.025495724, 0.02017925, 0.024144802,
"TAG", 0.019927531, 0.018790672, 0.015649845, 0.02230479, 0.019363723, 0.02017925 , 0.013676519,
"AGC", 0.013209711, 0.016891825, 0.014520044, 0.014302046, 0.014804269, 0.012904701 , 0.016027898,
"TGT", 0.033757832, 0.034990209, 0.031899462, 0.034446096, 0.034475058, 0.031047513 , 0.027512454,
"GAT", 0.013006661, 0.007615167, 0.009163947, 0.010540757, 0.008234933, 0.012000828 , 0.01232813,
"CAC", 0.011210666, 0.015012758, 0.01241387, 0.011478221, 0.011045046, 0.013579884 , 0.014466955,
"GGG", 0.012712095, 0.011927132, 0.013222864, 0.011558249, 0.014292494, 0.014647108 , 0.014706078,
"CGA", 0.00230219, 0.000969203, 0.001325076, 0.002034983, 0.001656292, 0.002036438 , 0.002198605,
"TCG", 0.004169681, 0.007377811, 0.005649008, 0.005133189, 0.006057561, 0.006131094 , 0.002630355,
"ACA", 0.011259284, 0.013825979, 0.010656401, 0.009900537, 0.010617015, 0.009670358 , 0.017110594,
"TTT", 0.033305974, 0.04751073, 0.043476442, 0.035212073, 0.037359611, 0.043669073 , 0.037754899,
"ATC", 0.01259198, 0.013905097, 0.013222864, 0.013501772, 0.012087207, 0.010356431 , 0.010269014,
"CCC", 0.009591983, 0.004213066, 0.006764862, 0.010506459, 0.006560031, 0.009354547 , 0.015795417,
"GCT", 0.021391781, 0.025397077, 0.022972634, 0.024545558, 0.023513757, 0.024426367 , 0.019063434,
"CTA", 0.012680636, 0.011848013, 0.013976065, 0.01610838, 0.014794964, 0.016128154 , 0.011570907,
"GTG", 0.018394644, 0.015507249, 0.01591486, 0.01932091, 0.019856889, 0.017086478 , 0.020245766,
"GGA", 0.017702557, 0.014182012, 0.018871872, 0.017926146, 0.018237817, 0.016411295 , 0.015895051,
"CGG", 0.00377216, 0.00346144, 0.003361509, 0.00339545, 0.004029069, 0.004258007 , 0.002889406,
"GAC", 0.019684443, 0.018790672, 0.025469356, 0.02854693, 0.027663791, 0.02270574 , 0.010773829,
"CAT", 0.015297412, 0.014083114, 0.01355762, 0.013879044, 0.013771413, 0.01256711 , 0.016114248,
"AGT", 0.018391784, 0.013054572, 0.012288337, 0.018532068, 0.014376239, 0.014745119 , 0.018087014,
"TGC", 0.020382249, 0.025041043, 0.02170335, 0.02225906, 0.021708586, 0.02506888 , 0.018425772,
"AAG", 0.016818859, 0.011393081, 0.018258153, 0.016920087, 0.016293071, 0.01251266 , 0.018731318,
"TAA", 0.016710184, 0.019324723, 0.023697938, 0.021150109, 0.0213643, 0.019351607 , 0.016067752,
"CTG", 0.034733046, 0.040864767, 0.036265238, 0.035143478, 0.036084824, 0.035555991 , 0.02738625,
"GTA", 0.014556707, 0.012560081, 0.012706782, 0.014667886, 0.014320409, 0.013863025 , 0.011776818,
"CCT", 0.015168718, 0.012184267, 0.012818367, 0.010735109, 0.012264002, 0.010988054 , 0.021819993,
"GCC", 0.009183023, 0.007239354, 0.00776913, 0.007808391, 0.008197713, 0.007198319 , 0.013530389,
"TTC", 0.018537637, 0.015922622, 0.018244205, 0.015639648, 0.017967972, 0.02003768 , 0.020591166,
"ATT", 0.018097218, 0.017069842, 0.014743214, 0.015045158, 0.017204961, 0.017184488 , 0.018425772,
"TCA", 0.015606277, 0.012757877, 0.013794739, 0.013936207, 0.011510296, 0.010323761 , 0.01798738,
"ACG", 0.003929453, 0.002650474, 0.00380785, 0.003829885, 0.003535903, 0.003528374 , 0.002789771,
"CCA", 0.013678729, 0.010601895, 0.016012498, 0.012964445, 0.014004038, 0.013340303 , 0.017369645,
"GCG", 0.001401333, 0.000929644, 0.000864786, 0.00050303, 0.00042803, 0.000457382 , 0.002504151,
"CTC", 0.021940875, 0.01946318, 0.021215164, 0.02198468, 0.022145921, 0.026560815 , 0.019262703,
"GTT", 0.019249743, 0.023537789, 0.020141155, 0.020967189, 0.019614959, 0.020854433 , 0.017827964,
"TCT", 0.032416556, 0.033704532, 0.033405864, 0.032696925, 0.034102858, 0.037919131 , 0.025539688,
"ACC", 0.011316481, 0.013034792, 0.015133763, 0.011775466, 0.012384967, 0.013547214 , 0.011697111,
"TTG", 0.029728284, 0.040330716, 0.03107652, 0.030993483, 0.031739385, 0.03275725 , 0.02247094,
"ATA", 0.011276443, 0.010839251, 0.011395654, 0.008402881, 0.010635625, 0.013971925 , 0.013457323,
"GAG", 0.011645365, 0.009454675, 0.007992301, 0.009397508, 0.008830453, 0.00750324 , 0.016745267,
"CAA", 0.009388933, 0.007536048, 0.010935364, 0.008208529, 0.009658599, 0.009332767 , 0.013955497,
"GGT", 0.010687311, 0.012738098, 0.00974977, 0.008711558, 0.01001219, 0.010977164 , 0.013556958,
"CGC", 0.001201143, 0.001305457, 0.001325076, 0.001017492, 0.001079381, 0.001785967 , 0.00212554,
"AAC", 0.013266908, 0.019146706, 0.015747482, 0.013193095, 0.014962454, 0.014418417 , 0.012441049,
"TAT", 0.012557662, 0.011630437, 0.011814099, 0.011924088, 0.011435856, 0.011761247 , 0.01454002,
"AGA", 0.019558609, 0.019502739, 0.017477055, 0.01982394, 0.017716737, 0.016160824 , 0.019667884,
"TGG", 0.024234486, 0.028818957, 0.02414428, 0.024111124, 0.023439317, 0.024143226 , 0.021766855,
"TTA", 0.015523341, 0.018493977, 0.01705861, 0.019572425, 0.016311681, 0.013111612 , 0.017827964,
"ATG", 0.014385115, 0.006903099, 0.013153123, 0.013593232, 0.011742921, 0.012175068 , 0.016917967,
"TCC", 0.014041931, 0.007872303, 0.00974977, 0.009671888, 0.009798174, 0.009909939 , 0.016898041,
"ACT", 0.027183006, 0.030045295, 0.031020727, 0.034777638, 0.034130773, 0.033987825 , 0.016446363,
"CTT", 0.02351094, 0.027256364, 0.026766536, 0.025631645, 0.02585862, 0.025656942 , 0.02465626,
"GTC", 0.012952323, 0.012678759, 0.008745502, 0.008814451, 0.008076748, 0.00761214 , 0.012965792,
"CCG", 0.002599616, 0.00114722, 0.002189862, 0.001783469, 0.002149457, 0.003081882 , 0.002949186,
"GCA", 0.013684448, 0.02231145, 0.016486735, 0.016005488, 0.01599531, 0.018817995 , 0.014792428,
"AGG", 0.014244982, 0.008722828, 0.013334449, 0.012381388, 0.012766472, 0.010770253 , 0.018113584,
"TGA", 0.025881768, 0.019008248, 0.023098167, 0.023848177, 0.025532944, 0.023500713 , 0.0193092,
"AAT", 0.016489975, 0.014538046, 0.018788183, 0.014004802, 0.018098242, 0.017881451 , 0.016094321,
"TAC", 0.011036214, 0.00860415, 0.008271264, 0.008402881, 0.008635048, 0.011216745 , 0.010355364,
"GGC", 0.01302668, 0.014142453, 0.011409602, 0.01280439, 0.011361416, 0.011576116 , 0.013317835,
"CGT", 0.004718775, 0.006428388, 0.006346417, 0.004778781, 0.005378295, 0.00509654 , 0.003659914,
"GAA", 0.020542402, 0.017841248, 0.018007086, 0.015102321, 0.018247122, 0.016171714 , 0.017230156,
"CAG", 0.017162043, 0.018414858, 0.016472787, 0.016531382, 0.014981064, 0.014244177, 0.022743275
)
colnames(marte) <- c("gene", "a40", "b40", "c40", "a80", "b80", "c80", "control" )
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "40s", "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
ggplot(aes(x = group, y = value, color = group)) +
geom_boxplot() +
geom_jitter(alpha = .5) +
coord_flip() +
facet_wrap(~gene, ncol = 4)

If I understand you, it just needs a third category, and to split up the geom's aesthetics' information.
marte_long <- marte %>%
pivot_longer(-gene, names_to = "name") %>%
mutate(group = case_when(str_detect(name, "40") ~ "40s",
name == "control" ~ "Control",
TRUE ~ "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
)
marte_long_control <- filter(marte_long, group == "Control")
marte_long <- filter(marte_long, group != "Control")
ggplot() +
geom_boxplot(data = marte_long, aes(x = group, y = value, color = group)) +
geom_jitter(data = marte_long, aes(x = group, y = value, color = group), alpha = .5) +
geom_point(data = marte_long_control, aes(x = group, y = value)) +
coord_flip() +
facet_wrap(~gene, ncol = 4)

Related

How to get this ggplot legend to be same order as bars?

The data and code I am trying to use:
library(ggplot2)
case_counts <- c(1800, 2064, 7118, 8697, 8737, 10738, 13602, 15007,15781, 17221, 17735, 18535, 19827, 20042, 22632, 25542,28920)
phenotpyes <- c("Known or suspected fetal abnormality affecting management of mother" ,
"Cardiac and circulatory congenital anomalies","Viral infection" ,
"Disorder of skin and subcutaneous tissue NOS" ,"Allergies other" ,"Cataract",
"Other anemias" ,"Cancer suspected or other" ,"Other symptoms/disorders or the urinary system" ,
"Pain","Abdominal pain" ,"Mood disorders","Other symptoms of respiratory system" ,
"Diseases of esophagus" ,"Disorders of lipoid metabolism" ,"Pain in joint" ,"Hypertension")
categories <- c("pregnancy complications" ,"congenital anomalies" ,"infectious diseases" ,"dermatologic"
,"injuries & poisonings" ,"sense organs" ,"hematopoietic" ,
"neoplasms" ,"genitourinary" ,"neurological" ,"symptoms" ,"mental disorders" ,
"respiratory" ,"digestive" ,"endocrine/metabolic" ,"musculoskeletal" ,"circulatory system")
data <- data.frame(case_count=case_counts,
phenotype=phenotpyes,
category=categories)
data$phenotype <- factor(data$phenotype , levels = data$phenotype)
p <-ggplot(data=data, aes(x = phenotype, y = case_count,fill = category)) + geom_bar(stat="identity")
p <- p + coord_flip()
p <- p + labs(x='phenotype', y='number of cases', fill='category')
p <- p + scale_y_continuous(breaks = seq(0,30000,by=5000),limits = c(0,30000))
p
The plot it produces:
How do I get the legend to follow the same order as the bars from top to bottom: red, blue, green etc?
OP here, figured it out:
library(ggplot2)
case_counts <- c(1800, 2064, 7118, 8697, 8737, 10738, 13602, 15007,15781, 17221, 17735, 18535, 19827, 20042, 22632, 25542,28920)
phenotpyes <- c("Known or suspected fetal abnormality affecting management of mother" ,
"Cardiac and circulatory congenital anomalies","Viral infection" ,
"Disorder of skin and subcutaneous tissue NOS" ,"Allergies other" ,"Cataract",
"Other anemias" ,"Cancer suspected or other" ,"Other symptoms/disorders or the urinary system" ,
"Pain","Abdominal pain" ,"Mood disorders","Other symptoms of respiratory system" ,
"Diseases of esophagus" ,"Disorders of lipoid metabolism" ,"Pain in joint" ,"Hypertension")
categories <- c("pregnancy complications" ,"congenital anomalies" ,"infectious diseases" ,"dermatologic"
,"injuries & poisonings" ,"sense organs" ,"hematopoietic" ,
"neoplasms" ,"genitourinary" ,"neurological" ,"symptoms" ,"mental disorders" ,
"respiratory" ,"digestive" ,"endocrine/metabolic" ,"musculoskeletal" ,"circulatory system")
data <- data.frame(case_count=case_counts,
phenotype=phenotpyes,
category=categories)
data$phenotype <- factor(data$phenotype , levels = data$phenotype)
p <-ggplot(data=data, aes(x = phenotype, y = case_count,fill = category)) + geom_bar(stat="identity")
p <- p + coord_flip()
p <- p +scale_fill_discrete(breaks=rev(data$category))
p <- p + labs(x='phenotype', y='number of cases', fill='category')
p <- p + scale_y_continuous(breaks = seq(0,30000,by=5000),limits = c(0,30000))
p
You can i.e. specify the factor levels for categories in the order you want.
Update the lines in which you create the graph, leave the rest unchanged.
p <-ggplot(data=data, aes(x = phenotype, y = case_count,fill = factor(category, levels = rev(categories)))) + geom_bar(stat="identity")

Plotting a box plot from a table with 7 columns

I am trying to plot the following data (paste-bin link) https:[enter image description here][1]//pastebin.com/w1WaEcPd as a box plot with the trinucleotide identity as the x column and the Frequency as the y column. I have attached a picture of the graph I am envisioning and the code I have so far. I am getting the error:
"Error in FUN(X[[i]], ...) : object 'gene' not found".
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
marte <- tribble(
~gene, ~funnyName1, ~funnyName2, ~funnyName3, ~funnyName4, ~funnyName5, ~funnyName6, ~control
"AAA", 0.021383202, 0.016654469, 0.022484448, 0.025311535, 0.025495724, 0.02017925, 0.024144802,
"TAG", 0.019927531, 0.018790672, 0.015649845, 0.02230479, 0.019363723, 0.02017925 , 0.013676519,
"AGC", 0.013209711, 0.016891825, 0.014520044, 0.014302046, 0.014804269, 0.012904701 , 0.016027898,
"TGT", 0.033757832, 0.034990209, 0.031899462, 0.034446096, 0.034475058, 0.031047513 , 0.027512454,
"GAT", 0.013006661, 0.007615167, 0.009163947, 0.010540757, 0.008234933, 0.012000828 , 0.01232813,
"CAC", 0.011210666, 0.015012758, 0.01241387, 0.011478221, 0.011045046, 0.013579884 , 0.014466955,
"GGG", 0.012712095, 0.011927132, 0.013222864, 0.011558249, 0.014292494, 0.014647108 , 0.014706078,
"CGA", 0.00230219, 0.000969203, 0.001325076, 0.002034983, 0.001656292, 0.002036438 , 0.002198605,
"TCG", 0.004169681, 0.007377811, 0.005649008, 0.005133189, 0.006057561, 0.006131094 , 0.002630355,
"ACA", 0.011259284, 0.013825979, 0.010656401, 0.009900537, 0.010617015, 0.009670358 , 0.017110594,
"TTT", 0.033305974, 0.04751073, 0.043476442, 0.035212073, 0.037359611, 0.043669073 , 0.037754899,
"ATC", 0.01259198, 0.013905097, 0.013222864, 0.013501772, 0.012087207, 0.010356431 , 0.010269014,
"CCC", 0.009591983, 0.004213066, 0.006764862, 0.010506459, 0.006560031, 0.009354547 , 0.015795417,
"GCT", 0.021391781, 0.025397077, 0.022972634, 0.024545558, 0.023513757, 0.024426367 , 0.019063434,
"CTA", 0.012680636, 0.011848013, 0.013976065, 0.01610838, 0.014794964, 0.016128154 , 0.011570907,
"GTG", 0.018394644, 0.015507249, 0.01591486, 0.01932091, 0.019856889, 0.017086478 , 0.020245766,
"GGA", 0.017702557, 0.014182012, 0.018871872, 0.017926146, 0.018237817, 0.016411295 , 0.015895051,
"CGG", 0.00377216, 0.00346144, 0.003361509, 0.00339545, 0.004029069, 0.004258007 , 0.002889406,
"GAC", 0.019684443, 0.018790672, 0.025469356, 0.02854693, 0.027663791, 0.02270574 , 0.010773829,
"CAT", 0.015297412, 0.014083114, 0.01355762, 0.013879044, 0.013771413, 0.01256711 , 0.016114248,
"AGT", 0.018391784, 0.013054572, 0.012288337, 0.018532068, 0.014376239, 0.014745119 , 0.018087014,
"TGC", 0.020382249, 0.025041043, 0.02170335, 0.02225906, 0.021708586, 0.02506888 , 0.018425772,
"AAG", 0.016818859, 0.011393081, 0.018258153, 0.016920087, 0.016293071, 0.01251266 , 0.018731318,
"TAA", 0.016710184, 0.019324723, 0.023697938, 0.021150109, 0.0213643, 0.019351607 , 0.016067752,
"CTG", 0.034733046, 0.040864767, 0.036265238, 0.035143478, 0.036084824, 0.035555991 , 0.02738625,
"GTA", 0.014556707, 0.012560081, 0.012706782, 0.014667886, 0.014320409, 0.013863025 , 0.011776818,
"CCT", 0.015168718, 0.012184267, 0.012818367, 0.010735109, 0.012264002, 0.010988054 , 0.021819993,
"GCC", 0.009183023, 0.007239354, 0.00776913, 0.007808391, 0.008197713, 0.007198319 , 0.013530389,
"TTC", 0.018537637, 0.015922622, 0.018244205, 0.015639648, 0.017967972, 0.02003768 , 0.020591166,
"ATT", 0.018097218, 0.017069842, 0.014743214, 0.015045158, 0.017204961, 0.017184488 , 0.018425772,
"TCA", 0.015606277, 0.012757877, 0.013794739, 0.013936207, 0.011510296, 0.010323761 , 0.01798738,
"ACG", 0.003929453, 0.002650474, 0.00380785, 0.003829885, 0.003535903, 0.003528374 , 0.002789771,
"CCA", 0.013678729, 0.010601895, 0.016012498, 0.012964445, 0.014004038, 0.013340303 , 0.017369645,
"GCG", 0.001401333, 0.000929644, 0.000864786, 0.00050303, 0.00042803, 0.000457382 , 0.002504151,
"CTC", 0.021940875, 0.01946318, 0.021215164, 0.02198468, 0.022145921, 0.026560815 , 0.019262703,
"GTT", 0.019249743, 0.023537789, 0.020141155, 0.020967189, 0.019614959, 0.020854433 , 0.017827964,
"TCT", 0.032416556, 0.033704532, 0.033405864, 0.032696925, 0.034102858, 0.037919131 , 0.025539688,
"ACC", 0.011316481, 0.013034792, 0.015133763, 0.011775466, 0.012384967, 0.013547214 , 0.011697111,
"TTG", 0.029728284, 0.040330716, 0.03107652, 0.030993483, 0.031739385, 0.03275725 , 0.02247094,
"ATA", 0.011276443, 0.010839251, 0.011395654, 0.008402881, 0.010635625, 0.013971925 , 0.013457323,
"GAG", 0.011645365, 0.009454675, 0.007992301, 0.009397508, 0.008830453, 0.00750324 , 0.016745267,
"CAA", 0.009388933, 0.007536048, 0.010935364, 0.008208529, 0.009658599, 0.009332767 , 0.013955497,
"GGT", 0.010687311, 0.012738098, 0.00974977, 0.008711558, 0.01001219, 0.010977164 , 0.013556958,
"CGC", 0.001201143, 0.001305457, 0.001325076, 0.001017492, 0.001079381, 0.001785967 , 0.00212554,
"AAC", 0.013266908, 0.019146706, 0.015747482, 0.013193095, 0.014962454, 0.014418417 , 0.012441049,
"TAT", 0.012557662, 0.011630437, 0.011814099, 0.011924088, 0.011435856, 0.011761247 , 0.01454002,
"AGA", 0.019558609, 0.019502739, 0.017477055, 0.01982394, 0.017716737, 0.016160824 , 0.019667884,
"TGG", 0.024234486, 0.028818957, 0.02414428, 0.024111124, 0.023439317, 0.024143226 , 0.021766855,
"TTA", 0.015523341, 0.018493977, 0.01705861, 0.019572425, 0.016311681, 0.013111612 , 0.017827964,
"ATG", 0.014385115, 0.006903099, 0.013153123, 0.013593232, 0.011742921, 0.012175068 , 0.016917967,
"TCC", 0.014041931, 0.007872303, 0.00974977, 0.009671888, 0.009798174, 0.009909939 , 0.016898041,
"ACT", 0.027183006, 0.030045295, 0.031020727, 0.034777638, 0.034130773, 0.033987825 , 0.016446363,
"CTT", 0.02351094, 0.027256364, 0.026766536, 0.025631645, 0.02585862, 0.025656942 , 0.02465626,
"GTC", 0.012952323, 0.012678759, 0.008745502, 0.008814451, 0.008076748, 0.00761214 , 0.012965792,
"CCG", 0.002599616, 0.00114722, 0.002189862, 0.001783469, 0.002149457, 0.003081882 , 0.002949186,
"GCA", 0.013684448, 0.02231145, 0.016486735, 0.016005488, 0.01599531, 0.018817995 , 0.014792428,
"AGG", 0.014244982, 0.008722828, 0.013334449, 0.012381388, 0.012766472, 0.010770253 , 0.018113584,
"TGA", 0.025881768, 0.019008248, 0.023098167, 0.023848177, 0.025532944, 0.023500713 , 0.0193092,
"AAT", 0.016489975, 0.014538046, 0.018788183, 0.014004802, 0.018098242, 0.017881451 , 0.016094321,
"TAC", 0.011036214, 0.00860415, 0.008271264, 0.008402881, 0.008635048, 0.011216745 , 0.010355364,
"GGC", 0.01302668, 0.014142453, 0.011409602, 0.01280439, 0.011361416, 0.011576116 , 0.013317835,
"CGT", 0.004718775, 0.006428388, 0.006346417, 0.004778781, 0.005378295, 0.00509654 , 0.003659914,
"GAA", 0.020542402, 0.017841248, 0.018007086, 0.015102321, 0.018247122, 0.016171714 , 0.017230156,
"CAG", 0.017162043, 0.018414858, 0.016472787, 0.016531382, 0.014981064, 0.014244177, 0.022743275
)
colnames(marte) <- c("gene", "a40", "b40", "c40", "a80", "b80", "c80", "control" )
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "40s", "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
ggplot(aes(x = group, y = value, color = group)) +
geom_boxplot() +
geom_jitter(alpha = .5) +
coord_flip() +
facet_wrap(~gene, ncol = 4)
That would be a possible solution using the tidyverse packages. Here I recreated the data table, you would need just to rename the columns and then run the parte with the pivot_longer and mutate to prepare the data for plotting and then plot with ggplot2
I am making a few assumptions here, if it is not exactly what you were thinking, please write a comment.
library(dplyr)
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.1.0
library(tidyr)
library(stringr)
marte <- tribble(
~gene, ~funnyName1, ~funnyName2, ~funnyName3, ~funnyName4, ~funnyName5, ~funnyName6,
"AAA", 0.021383202, 0.016654469, 0.022484448, 0.025311535, 0.025495724, 0.02017925,
"TAG", 0.019927531, 0.018790672, 0.015649845, 0.02230479, 0.019363723, 0.02017925,
"AGC", 0.013209711, 0.016891825, 0.014520044, 0.014302046, 0.014804269, 0.012904701,
"TGT", 0.033757832, 0.034990209, 0.031899462, 0.034446096, 0.034475058, 0.031047513,
"AAA", 0.013006661, 0.007615167, 0.009163947, 0.010540757, 0.008234933, 0.012000828,
"TAG", 0.011210666, 0.015012758, 0.01241387, 0.011478221, 0.011045046, 0.013579884,
"AGC", 0.012712095, 0.011927132, 0.013222864, 0.011558249, 0.014292494, 0.014647108,
"TGT", 0.00230219 , 0.000969203, 0.001325076, 0.002034983, 0.001656292, 0.002036438,
"AAA", 0.004169681, 0.007377811, 0.005649008, 0.005133189, 0.006057561, 0.006131094,
"TAG", 0.011259284, 0.013825979, 0.010656401, 0.009900537, 0.010617015, 0.009670358,
"AGC", 0.033305974, 0.04751073, 0.043476442, 0.035212073, 0.037359611, 0.043669073,
"TGT", 0.01259198 , 0.013905097, 0.013222864, 0.013501772, 0.012087207, 0.010356431,
"CCC", 0.009591983, 0.004213066, 0.006764862, 0.010506459, 0.006560031, 0.009354547,
"GCT", 0.021391781, 0.025397077, 0.022972634, 0.024545558, 0.023513757, 0.024426367,
"CTA", 0.012680636, 0.011848013, 0.013976065, 0.01610838, 0.014794964, 0.016128154,
"GTG", 0.018394644, 0.015507249, 0.01591486, 0.01932091, 0.019856889, 0.017086478,
"GGA", 0.017702557, 0.014182012, 0.018871872, 0.017926146, 0.018237817, 0.016411295,
"CGG", 0.00377216 , 0.00346144, 0.003361509, 0.00339545, 0.004029069, 0.004258007
)
colnames(marte) <- c("gene", "a40", "b40", "c40", "a80", "b80", "c80")
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "group 1", "group 2"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
filter(gene %in% c("AAA", "TAG", "AGC", "TGT")) %>%
ggplot(aes(x = rep, y = value, color = group)) +
geom_boxplot() +
coord_flip() +
facet_wrap(~gene, ncol = 1)
Created on 2021-07-01 by the reprex package (v2.0.0)
Edit
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "40s", "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
ggplot(aes(x = group, y = value, color = group)) +
geom_boxplot() +
geom_jitter(alpha = .5) +
coord_flip() +
facet_wrap(~gene, ncol = 1)
Edit 2
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
marte <- tribble(
~gene, ~funnyName1, ~funnyName2, ~funnyName3, ~funnyName4, ~funnyName5, ~funnyName6,
"AAA", 0.021383202, 0.016654469, 0.022484448, 0.025311535, 0.025495724, 0.02017925,
"TAG", 0.019927531, 0.018790672, 0.015649845, 0.02230479, 0.019363723, 0.02017925 ,
"AGC", 0.013209711, 0.016891825, 0.014520044, 0.014302046, 0.014804269, 0.012904701 ,
"TGT", 0.033757832, 0.034990209, 0.031899462, 0.034446096, 0.034475058, 0.031047513 ,
"GAT", 0.013006661, 0.007615167, 0.009163947, 0.010540757, 0.008234933, 0.012000828 ,
"CAC", 0.011210666, 0.015012758, 0.01241387, 0.011478221, 0.011045046, 0.013579884 ,
"GGG", 0.012712095, 0.011927132, 0.013222864, 0.011558249, 0.014292494, 0.014647108 ,
"CGA", 0.00230219, 0.000969203, 0.001325076, 0.002034983, 0.001656292, 0.002036438 ,
"TCG", 0.004169681, 0.007377811, 0.005649008, 0.005133189, 0.006057561, 0.006131094 ,
"ACA", 0.011259284, 0.013825979, 0.010656401, 0.009900537, 0.010617015, 0.009670358 ,
"TTT", 0.033305974, 0.04751073, 0.043476442, 0.035212073, 0.037359611, 0.043669073 ,
"ATC", 0.01259198, 0.013905097, 0.013222864, 0.013501772, 0.012087207, 0.010356431 ,
"CCC", 0.009591983, 0.004213066, 0.006764862, 0.010506459, 0.006560031, 0.009354547 ,
"GCT", 0.021391781, 0.025397077, 0.022972634, 0.024545558, 0.023513757, 0.024426367 ,
"CTA", 0.012680636, 0.011848013, 0.013976065, 0.01610838, 0.014794964, 0.016128154 ,
"GTG", 0.018394644, 0.015507249, 0.01591486, 0.01932091, 0.019856889, 0.017086478 ,
"GGA", 0.017702557, 0.014182012, 0.018871872, 0.017926146, 0.018237817, 0.016411295 ,
"CGG", 0.00377216, 0.00346144, 0.003361509, 0.00339545, 0.004029069, 0.004258007 ,
"GAC", 0.019684443, 0.018790672, 0.025469356, 0.02854693, 0.027663791, 0.02270574 ,
"CAT", 0.015297412, 0.014083114, 0.01355762, 0.013879044, 0.013771413, 0.01256711 ,
"AGT", 0.018391784, 0.013054572, 0.012288337, 0.018532068, 0.014376239, 0.014745119 ,
"TGC", 0.020382249, 0.025041043, 0.02170335, 0.02225906, 0.021708586, 0.02506888 ,
"AAG", 0.016818859, 0.011393081, 0.018258153, 0.016920087, 0.016293071, 0.01251266 ,
"TAA", 0.016710184, 0.019324723, 0.023697938, 0.021150109, 0.0213643, 0.019351607 ,
"CTG", 0.034733046, 0.040864767, 0.036265238, 0.035143478, 0.036084824, 0.035555991 ,
"GTA", 0.014556707, 0.012560081, 0.012706782, 0.014667886, 0.014320409, 0.013863025 ,
"CCT", 0.015168718, 0.012184267, 0.012818367, 0.010735109, 0.012264002, 0.010988054 ,
"GCC", 0.009183023, 0.007239354, 0.00776913, 0.007808391, 0.008197713, 0.007198319 ,
"TTC", 0.018537637, 0.015922622, 0.018244205, 0.015639648, 0.017967972, 0.02003768 ,
"ATT", 0.018097218, 0.017069842, 0.014743214, 0.015045158, 0.017204961, 0.017184488 ,
"TCA", 0.015606277, 0.012757877, 0.013794739, 0.013936207, 0.011510296, 0.010323761 ,
"ACG", 0.003929453, 0.002650474, 0.00380785, 0.003829885, 0.003535903, 0.003528374 ,
"CCA", 0.013678729, 0.010601895, 0.016012498, 0.012964445, 0.014004038, 0.013340303 ,
"GCG", 0.001401333, 0.000929644, 0.000864786, 0.00050303, 0.00042803, 0.000457382 ,
"CTC", 0.021940875, 0.01946318, 0.021215164, 0.02198468, 0.022145921, 0.026560815 ,
"GTT", 0.019249743, 0.023537789, 0.020141155, 0.020967189, 0.019614959, 0.020854433 ,
"TCT", 0.032416556, 0.033704532, 0.033405864, 0.032696925, 0.034102858, 0.037919131 ,
"ACC", 0.011316481, 0.013034792, 0.015133763, 0.011775466, 0.012384967, 0.013547214 ,
"TTG", 0.029728284, 0.040330716, 0.03107652, 0.030993483, 0.031739385, 0.03275725 ,
"ATA", 0.011276443, 0.010839251, 0.011395654, 0.008402881, 0.010635625, 0.013971925 ,
"GAG", 0.011645365, 0.009454675, 0.007992301, 0.009397508, 0.008830453, 0.00750324 ,
"CAA", 0.009388933, 0.007536048, 0.010935364, 0.008208529, 0.009658599, 0.009332767 ,
"GGT", 0.010687311, 0.012738098, 0.00974977, 0.008711558, 0.01001219, 0.010977164 ,
"CGC", 0.001201143, 0.001305457, 0.001325076, 0.001017492, 0.001079381, 0.001785967 ,
"AAC", 0.013266908, 0.019146706, 0.015747482, 0.013193095, 0.014962454, 0.014418417 ,
"TAT", 0.012557662, 0.011630437, 0.011814099, 0.011924088, 0.011435856, 0.011761247 ,
"AGA", 0.019558609, 0.019502739, 0.017477055, 0.01982394, 0.017716737, 0.016160824 ,
"TGG", 0.024234486, 0.028818957, 0.02414428, 0.024111124, 0.023439317, 0.024143226 ,
"TTA", 0.015523341, 0.018493977, 0.01705861, 0.019572425, 0.016311681, 0.013111612 ,
"ATG", 0.014385115, 0.006903099, 0.013153123, 0.013593232, 0.011742921, 0.012175068 ,
"TCC", 0.014041931, 0.007872303, 0.00974977, 0.009671888, 0.009798174, 0.009909939 ,
"ACT", 0.027183006, 0.030045295, 0.031020727, 0.034777638, 0.034130773, 0.033987825 ,
"CTT", 0.02351094, 0.027256364, 0.026766536, 0.025631645, 0.02585862, 0.025656942 ,
"GTC", 0.012952323, 0.012678759, 0.008745502, 0.008814451, 0.008076748, 0.00761214 ,
"CCG", 0.002599616, 0.00114722, 0.002189862, 0.001783469, 0.002149457, 0.003081882 ,
"GCA", 0.013684448, 0.02231145, 0.016486735, 0.016005488, 0.01599531, 0.018817995 ,
"AGG", 0.014244982, 0.008722828, 0.013334449, 0.012381388, 0.012766472, 0.010770253 ,
"TGA", 0.025881768, 0.019008248, 0.023098167, 0.023848177, 0.025532944, 0.023500713 ,
"AAT", 0.016489975, 0.014538046, 0.018788183, 0.014004802, 0.018098242, 0.017881451 ,
"TAC", 0.011036214, 0.00860415, 0.008271264, 0.008402881, 0.008635048, 0.011216745 ,
"GGC", 0.01302668, 0.014142453, 0.011409602, 0.01280439, 0.011361416, 0.011576116 ,
"CGT", 0.004718775, 0.006428388, 0.006346417, 0.004778781, 0.005378295, 0.00509654 ,
"GAA", 0.020542402, 0.017841248, 0.018007086, 0.015102321, 0.018247122, 0.016171714 ,
"CAG", 0.017162043, 0.018414858, 0.016472787, 0.016531382, 0.014981064, 0.014244177,
)
colnames(marte) <- c("gene", "a40", "b40", "c40", "a80", "b80", "c80")
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "40s", "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
ggplot(aes(x = group, y = value, color = group)) +
geom_boxplot() +
geom_jitter(alpha = .5) +
coord_flip() +
facet_wrap(~gene, ncol = 4)
Created on 2021-07-09 by the reprex package (v2.0.0)

Problem in R: NAs introduced by coercion in a scatterplot

I am trying to create this
but in doing so R says the following:
In plot.xy(xy, type, ...) : NAs introduced by coercion`
I will attach my entire code below:
lnmass <- MoleRat$lnMass
lnenergy <- MoleRat$lnEnergy
caste <- MoleRat$caste
infrequent <- MoleRat[caste == "lazy", ]
frequent <- MoleRat[caste == "worker", ]
lm.infrequent <- lm(lnEnergy ~ lnMass, data = infrequent) #, subset=caste=="lazy")
lm.frequent <- lm(lnEnergy ~ lnMass, data = frequent)
plot(lnmass, lnenergy, pch = as.numeric(caste), col = as.numeric(caste))
abline(lm.infrequent)
abline(lm.frequent)
Here is my data:
dput(MoleRat)
structure(list(caste = c("worker", "worker", "worker", "worker",
"worker", "worker", "worker", "worker", "worker", "worker", "worker",
"worker", "worker", "worker", "worker", "worker", "worker", "worker",
"worker", "worker", "worker", "lazy", "lazy", "lazy", "lazy",
"lazy", "lazy", "lazy", "lazy", "lazy", "lazy", "lazy", "lazy",
"lazy", "lazy"), lnMass = c(3.850147602, 3.988984047, 4.110873864,
4.17438727, 4.248495242, 4.262679877, 4.343805422, 4.48863637,
4.510859507, 3.951243719, 3.988984047, 4.158883083, 4.234106505,
4.276666119, 4.248495242, 4.465908119, 4.532599493, 4.510859507,
4.828313737, 4.753590191, 4.875197323, 4.382026635, 4.543294782,
4.912654886, 4.700480366, 4.700480366, 4.762173935, 4.859812404,
5.056245805, 5.262690189, 5.147494477, 5.087596335, 4.997212274,
4.875197323, 4.812184355), lnEnergy = c(3.688879454, 3.688879454,
3.688879454, 3.663561646, 3.871201011, 3.850147602, 3.931825633,
3.688879454, 3.951243719, 4.110873864, 4.189654742, 4.143134726,
4.262679877, 4.248495242, 4.510859507, 4.394449155, 4.219507705,
4.48863637, 4.644390899, 5.017279837, 5.043425117, 3.828641396,
4.143134726, 3.555348061, 4.060443011, 4.094344562, 4.304065093,
4.094344562, 4.418840608, 4.234106505, 4.49980967, 4.574710979,
4.532599493, 4.615120517, 4.48863637)), class = "data.frame", row.names = c(NA,
-35L))
Why not use ggplot ?
ggplot(MoleRat, aes(lnmass,lnenergy, color=caste))+geom_point()+
geom_smooth(method='lm',se=FALSE)+ theme_minimal()
You can do it with plot:
plot(lnEnergy ~ lnMass, MoleRat)
points(lnEnergy ~ lnMass, infrequent, col = "red", pch = 19)
points(lnEnergy ~ lnMass, frequent , col = "red")
abline(lm.infrequent)
abline(lm.frequent)
or (easier) with ggplot:
library(ggplot2)
ggplot(MoleRat, aes(x = lnMass, y = lnEnergy, colour = caste)) +
geom_point(size = 2) +
geom_smooth(formula = y~x, method = "lm", se = FALSE) +
theme_classic() +
labs(x = "ln(body mass)",
y = "ln(daily energy expenditure)")
However, the image you posted is created by this other model:
lm(lnEnergy ~ lnMass + caste, data = MoleRat)
And based on that, that's the image you will get:
lm.graph <- lm(lnEnergy ~ lnMass + caste, data = MoleRat)
plot(lnEnergy ~ lnMass, MoleRat)
points(lnEnergy ~ lnMass, infrequent, col = "red", pch = 19)
points(lnEnergy ~ lnMass, frequent , col = "red")
lmcoef <- coef(lm.graph)
abline(a = lmcoef[1], b = lmcoef[2])
abline(a = lmcoef[1] + lmcoef[3], b = lmcoef[2])
And with ggplot:
MoleRat$prd <- predict(lm.graph, MoleRat)
ggplot(MoleRat, aes(x = lnMass, colour = caste)) +
geom_point(aes(y = lnEnergy), size = 2) +
geom_line(aes(y = prd), size = 1) +
theme_classic() +
labs(x = "ln(body mass)",
y = "ln(daily energy expenditure)")

Error: number of rows of matrices must match (ggplot facet)

I'm trying to assign labels to my ggplot2 facets. As I'm thinking this is a character problem, I'm using the labels exactly as they are in my dataset, so it's a little long, I apologize.
set.seed(123)
names <- c("acquisitionsmergers", "analystratings", "assets", "balanceofpayments",
"bankruptcy", "civilunrest", "corporateresponsibility", "credit",
"creditratings", "crime", "dividends", "earnings", "equityactions",
"exploration", "government", "indexes", "industrialaccidents",
"insidertrading", "investorrelations", "laborissues", "legal",
"marketing", "orderimbalances", "partnerships", "pricetargets",
"productsservices", "publicopinion", "regulatory", "revenues",
"security", "stockprices", "taxes", "technicalanalysis", "transportation",
"warconflict")
mylabels <- c("acquisitionsmergers" = "Acquisitions/Mergers",
"analystratings" = "Analyst Ratings",
"assets" = "Assets",
"balanceofpayments" = "Balance of Payments",
"bankruptcy" = "Bankruptcy",
"civilunrest" = "Civil Unrest",
"corporateresponsibility" = "Corporate Responsibility",
"credit" = "Credit",
"creditratings" = "Credit Ratings",
"crime" = "Crime",
"dividends" = "Dividends",
"earnings" = "Earnings",
"equityactions" = "Equity Actions",
"exploration" = "Exploration",
"government" = "Government",
"indexes" = "Indexes",
"industrialaccidents" = "Industrial Accidents",
"insidertrading" = "Insider Trading",
"investorrelations" = "Investor Relations",
"laborissues" = "Labor Issues",
"legal" = "Legal",
"marketing" = "Marketing",
"orderimbalances" = "Order Imbalances",
"partnerships" = "Partnerships",
"pricetargets" = "Price Targets",
"productsservices" = "Product Services",
"publicopinion" = "Public Opinion",
"regulatory" = "Regulatory",
"revenues" = "Revenues",
"security" = "Security",
"stockprices" = "Stockprices",
"taxes" = "Taxes",
"technicalanalysis" = "Technical Analysis",
"transportation" = "Transportation",
"warconflict" = "War Conflict")
df <- data.frame(item = rep(names, each=5), value=rnorm(5*35,5,2), date = rep(seq(as.Date("2000/1/1"), by = "month", length.out = 5),35))
Then,
library(ggplot2)
ggplot(df, aes(x=date, y=value, color=item)) +
geom_line() +
facet_wrap( ~ item, ncol=4, scales="free_y", labeller = mylabels)
Produces
Error in cbind(labels = list(), list(`{`, if (!is.null(.rows) || !is.null(.cols)) { :
number of rows of matrices must match (see arg 2)
I've used labeller before without problems, so I'm not sure why it's throwing this error. I checked a few things, such as making sure there is a match:
all(names(mylabels) %in% names)
length(mylabels) == length(names)
Thanks for any help!
What about this?
df$item <- factor(df$item,
labels = c("Acquisitions/Mergers","Analyst Ratings","Assets", "Balance of Payments","Bankruptcy", "Civil Unrest",
"Corporate Responsibility", "Credit", "Credit Ratings", "Crime", "Dividends", "Earnings", "Equity Actions",
"Exploration", "Government", "Indexes", "Industrial Accidents", "Insider Trading", "Investor Relations",
"Labor Issues", "Legal", "Marketing", "Order Imbalances","Partnerships", "Price Targets",
"Product Services", "Public Opinion","Regulatory", "Revenues","Security", "Stockprices",
"Taxes", "Technical Analysis", "Transportation", "War Conflict"))
ggplot(df, aes(x=date, y=value, color=item)) +
geom_line() +
facet_wrap( ~ item, ncol=4, scales="free_y")
UPDATE
to address the questions in the comment
First, are the label factors taken as the unique order? In other words, the original "item" vector in the dataframe should be sorted so it is in the same order as labels?
Answer
The order of levels in the labels vector must be the same as the order of levels in item vector.
Below are the levels of item
levels(df$item)
[1] "acquisitionsmergers" "analystratings" "assets" "balanceofpayments" "bankruptcy"
[6] "civilunrest" "corporateresponsibility" "credit" "creditratings" "crime"
[11] "dividends" "earnings" "equityactions" "exploration" "government"
[16] "indexes" "industrialaccidents" "insidertrading" "investorrelations" "laborissues"
[21] "legal" "marketing" "orderimbalances" "partnerships" "pricetargets"
[26] "productsservices" "publicopinion" "regulatory" "revenues" "security"
[31] "stockprices" "taxes" "technicalanalysis" "transportation" "warconflict"
I usually copy paste them inside labels add commas, remove the numbers and change the names as I like.
There is another way, below, to change the names of the levels of item using dplyr and forcats
library(dplyr)
library(forcats)
df <- df %>%
mutate(item_update = item) %>% # create new column called item_update to change the names of item levels
mutate(item_update = fct_recode(item_update,
"Acquisitions/Mergers" = "acquisitionsmergers" ,
"Analyst Ratings" = "analystratings" ,
"Assets" = "assets",
"Balance of Payments" = "balanceofpayments",
"Bankruptcy" = "bankruptcy",
"Civil Unrest" = "civilunrest",
"Corporate Responsibility" = "corporateresponsibility",
"Credit" = "credit",
"Credit Ratings" = "creditratings",
"Crime" = "crime",
"Dividends" = "dividends",
"Earnings" = "earnings",
"Equity Actions" = "equityactions",
"Exploration" = "exploration",
"Government" = "government",
"Indexes" = "indexes",
"Industrial Accidents" = "industrialaccidents",
"Insider Trading" = "insidertrading",
"Investor Relations" = "investorrelations",
"Labor Issues" = "laborissues",
"Legal" = "legal" ,
"Marketing" = "marketing",
"Order Imbalances" = "orderimbalances",
"Partnerships" = "partnerships",
"Price Targets" = "pricetargets",
"Product Services" = "productsservices",
"Public Opinion" = "publicopinion" ,
"Regulatory" = "regulatory",
"Revenues" = "revenues",
"Security" = "security",
"Stockprices" = "stockprices",
"Taxes" = "taxes",
"Technical Analysis" = "technicalanalysis",
"Transportation" = "transportation" ,
"War Conflict" = "warconflict"
))
and we can plot it as below
ggplot(df, aes(x=date, y=value, color=item)) +
geom_line() +
facet_wrap( ~ item_update, ncol=4, scales="free_y")
Second, does this appear to be a bug, which I should file with the ggplot2 page?
Answer
It is not a bug.
Your approach will work fine if you edit mylabels to be
mylabels <- c(acquisitionsmergers = "Acquisitions/Mergers",
analystratings = "Analyst Ratings",
assets = "Assets",
balanceofpayments = "Balance of Payments",
bankruptcy = "Bankruptcy",
civilunrest = "Civil Unrest",
corporateresponsibility = "Corporate Responsibility",
credit = "Credit",
creditratings = "Credit Ratings",
crime = "Crime",
dividends = "Dividends",
earnings = "Earnings",
equityactions = "Equity Actions",
exploration = "Exploration",
government = "Government",
indexes = "Indexes",
industrialaccidents = "Industrial Accidents",
insidertrading = "Insider Trading",
investorrelations = "Investor Relations",
laborissues = "Labor Issues",
legal = "Legal",
marketing = "Marketing",
orderimbalances = "Order Imbalances",
partnerships = "Partnerships",
pricetargets = "Price Targets",
productsservices = "Product Services",
publicopinion = "Public Opinion",
regulatory = "Regulatory",
revenues = "Revenues",
security = "Security",
stockprices = "Stockprices",
taxes = "Taxes",
technicalanalysis = "Technical Analysis",
transportation = "Transportation",
warconflict = "War Conflict")
and the plot to be
ggplot(df, aes(x=date, y=value, color=item)) +
geom_line() +
facet_wrap( ~ item, ncol=4, scales="free_y", labeller = labeller(item = mylabels))

Add new variable (column) in the fly to a reactive dataframe in Shiny

I am trying to add in a reactive dataframe the outpust of both a non-linear regression model and a multivariate analysis. I managed to create the reactive dataframe which is updated anytime I filter my data. I now want to update the model outputs whenever I filter the dataframe and add the prediction values of the model to the reactive dataframe. Below is a subset of the dataset I am using as well as the ui and server files I use to create the shiny App.
Load package
library (shiny)
library(ggvis)
library(dplyr)
library(rbokeh)
library (minpack.lm)
library (hydroGOF)
library(caret)
The dataframe I use:
Flux_Data_df<- structure(list(Site_ID = structure(c(1L, 3L, 5L, 7L, 8L), .Label = c("AR-Slu",
"AR-Vir", "AU-Tum", "AU-Wac", "BE-Bra", "BE-Jal", "BE-Vie", "BR-Cax",
"BR-Ma2", "BR-Sa1", "BR-Sa3", "BW-Ma1", "CA-Ca1", "CA-Ca2", "CA-Ca3",
"CA-Gro", "Ca-Man", "CA-NS1", "CA-NS2", "CA-NS3", "CA-NS4", "CA-NS5",
"CA-NS6", "CA-NS7", "CA-Oas", "CA-Obs", "CA-Ojp", "CA-Qcu", "CA-Qfo",
"CA-SF1", "CA-SF2", "CA-SF3", "CA-SJ1", "CA-SJ2", "CA-SJ3", "CA-TP1",
"CA-TP2", "CA-TP3", "CA-TP4", "CA-Wp1", "CN-Bed", "CN-Cha", "CN-Din",
"CN-Ku1", "CN-Qia", "CZ-Bk1", "De-Bay", "DE-Hai", "DE-Har", "DE-Lkb",
"DE-Meh", "DE-Obe", "DE-Tha", "DE-Wet", "DK-Sor", "ES-Es1", "FI-Hyy",
"FI-Sod", "FR-Fon", "FR-Hes", "FR-Lbr", "FR-Pue", "GF-Guy", "ID-Pag",
"IL-Yat", "IS-Gun", "IT-Col", "IT-Cpz", "IT-Lav", "IT-Lma", "IT-Noe",
"IT-Non", "IT-Pt1", "IT-Ro1", "IT-Ro2", "IT-Sro", "JP-Tak", "JP-Tef",
"JP-Tom", "MY-Pso", "NL-Loo", "PA-Spn", "PT-Esp", "RU-Fyo", "RU-Skp",
"RU-Zot", "SE-Abi", "SE-Fla", "SE-Nor", "SE-Sk1", "SE-Sk2", "SE-St1",
"UK-Gri", "UK-Ham", "US-Bar", "US-Blo", "US-Bn1", "US-Bn2", "Us-Bn3",
"US-Dk2", "US-Dk3", "US-Fmf", "US-Fuf", "US-Fwf", "US-Ha1", "US-Ha2",
"US-Ho1", "US-Ho2", "US-Lph", "US-Me1", "US-Me3", "US-Me4", "US-Me6",
"US-Moz", "US-NC1", "US-Nc2", "US-NR1", "US-Oho", "US-So2", "US-So3",
"US-Sp1", "US-Sp2", "US-Sp3", "US-Syv", "US-Umb", "US-Wbw", "US-Wcr",
"US-Wi0", "US-Wi1", "US-Wi2", "US-Wi4", "US-Wi8", "VU-Coc", "CA-Cbo",
"CN-Lao", "ID-Buk", "JP-Fuj", "RU-Ab", "RU-Be", "RU-Mix"), class = "factor"),
Ecosystem = structure(c(5L, 3L, 5L, 5L, 3L), .Label = c("DBF",
"DNF", "EBF", "ENF", "MF", "SHB", "WSA"), class = "factor"),
Climate = structure(c(3L, 3L, 3L, 3L, 4L), .Label = c("Arid",
"Continental", "Temperate", "Tropical"), class = "factor"),
Management = structure(c(4L, 2L, 3L, 4L, 4L), .Label = c("High",
"Low", "Moderate", "None"), class = "factor"), Stand_Age = c(50,
99, 77.0833333333333, 66.2, 97), NEP = c(1262.24986565392,
251.665998718498, 89.590110051402, 467.821910494384, 560),
GPP = c(2437.9937774539, 1837.82835206203, 1353.91140903122,
1740.68843840394, 3630), NEP_GPP = c(0.517741217113419, 0.143353622997247,
0.0760076059028116, 0.270737440100469, 0.1542699725), Uncert = c(7.29706486170583,
12.3483066698996, 7.59406340226036, 8.2523670901841, 12.1
), Gap_filled = c(0.953310527540233, 0.969648973753497, 0.9395474605477,
0.923408280276339, 1), MAT = c(19.0438821722383, 9.67003296799878,
10.7728316162948, 8.2796213684244, 27.341666667), MAT_An = c(-0.0413522012578611,
0.840055031446541, 0.705896226415094, 0.805524109014675,
0.191666666666667), MAT_Trend = c(0.0119577487502016, 0.0196238509917756,
0.0305871364833632, 0.0381007095629741, 0.0194619147449338
), MAP = c(351.700001291931, 1107.49999958277, 844.158499979666,
998.205467054248, 2279.5), MAP_CRU = c(592.2, 850.925, 852.591666666667,
1098.98, 2279.5), SPI_CRU_Mean = c(-0.352735702252502, 0.188298093749456,
0.0830157542916604, 0.397632136136383, 1.31028809089487),
MAP_An = c(4.14188988095238, -15.8198660714286, 5.39074900793651,
2.28799107142857, 1.55565476190476), MAP_Trend = c(1.38787584993337,
0.147192657259031, 0.747167885331603, 0.104885622031644,
0.841903850753408), CEC_Total_1km = c(14.05, 10.25, 17.975,
21, 9.95), Clay_Silt = c(36.65, 42.125, 32.275, 55, 54.825
), Clay_1km = c(26.425, 31.425, 11.25, 22.45, 38.075), Silt_1km = c(10.225,
10.7, 21.025, 32.55, 16.75), Sand_1km = c(63.35, 57.325,
67.65, 45, 45.275), NOy = c(1.73752826416889, 2.76055219091326,
4.96187381895543, 5.06857284157762, 0.90948457442513), NHx = c(2.50363534311763,
2.99675999696687, 11.2747222582845, 13.9207300067467, 1.53292533883169
), Soil_C_1km = c(3.6, 17, 23.575, 26.65, 8.15), Lat = c(-33.4648,
-35.6566, 51.3092, 50.3051, -1.72000003), Long = c(-66.4598,
148.1516, 4.5206, 5.9981, -51.4500008)), .Names = c("Site_ID",
"Ecosystem", "Climate", "Management", "Stand_Age", "NEP", "GPP",
"NEP_GPP", "Uncert", "Gap_filled", "MAT", "MAT_An", "MAT_Trend",
"MAP", "MAP_CRU", "SPI_CRU_Mean", "MAP_An", "MAP_Trend", "CEC_Total_1km",
"Clay_Silt", "Clay_1km", "Silt_1km", "Sand_1km", "NOy", "NHx",
"Soil_C_1km", "Lat", "Long"), row.names = c(NA, 5L), class = "data.frame")
Choose x and y variable to choose
axis_vars <- c(
"NEP observed [gC.m-2.y-1]" = "NEP",
"NEP predicted [gC.m-2.y-1]" = "prediction",
"CUEe" = "NEP_GPP",
"GPP [gC.m-2.y-1]" = "GPP",
"Forest Age [years]" = "Stand_Age",
"MAT [°C]" = "MAT",
"SPI" = "SPI_CRU_Mean",
"MAP [mm.y-1]" = "MAP",
"MAP trend [mm.y-1]" = "MAP_Trend",
"MAT tremd [°C.y-1]" = "MAT_Trend",
"Clay content [kg.kg-1]" = "Clay_1km",
"N deposition [kg N.ha-1.y-1]" = "NHx"
)
The ui file:
ui<- actionLink <- function(inputId, ...) {
tags$a(href='javascript:void',
id=inputId,
class='action-button',
...)
}
shinyUI(fluidPage(
titlePanel("Data exploration"),
p('Interactive tool for data exploration'),
em('by, ', a('Simon Besnard', href = 'http://www.bgc-jena.mpg.de/bgi/index.php/People/SimonBesnard')),
fluidRow(
column(4,
wellPanel(
selectInput("xvar", "X-axis variable", axis_vars, selected = "Stand_Age"),
selectInput("yvar", "Y-axis variable", axis_vars, selected = "NEP")
),
wellPanel(
h4("Filter data"),
sliderInput("Gap_filled", "Fraction gap filling", 0, 1, value = c(0, 1)),
sliderInput("Uncert", "Uncertainties", 0, 45, value = c(0, 45),
step = 1),
sliderInput("Stand_Age", "Forest age [years]", 0, 400, value = c(0, 400),
0, 400, 400, step = 5),
sliderInput("GPP", "GPP [gC.m-2.y-1]", 0, 4000, value = c(0, 4000),
0, 4000, 4000, step = 100),
sliderInput("MAT", "MAT [°C]", -10, 30, value = c(-10, 30),
-10, 30, 30, step = 1),
sliderInput("MAP", "MAP [mm.y-1]", 0, 4000, value = c(0, 4000),
0, 4000, 400, step = 100),
checkboxGroupInput("Management", "Intensity of management", c("None", "Low", "Moderate", "High"),
selected= c("None", "Low", "Moderate", "High"), inline = T),
checkboxGroupInput("Climate", "Type of climate",
c("Arid", "Continental", "Temperate", "Tropical"),
selected=c("Arid", "Continental", "Temperate", "Tropical"), inline=T),
checkboxGroupInput("Ecosystem",
label="PFTs",
choices=list("DBF", "DNF", "EBF", "ENF", "MF", "SHB"),
selected=c("DBF", "DNF", "EBF", "ENF", "MF","SHB"), inline=T)
)),
mainPanel(
navlistPanel(
tabPanel("Plot", rbokehOutput("rbokeh")),
tabPanel("Statistics", tableOutput("summaryTable")),
tabPanel("Variable importance", plotOutput("Var_Imp")),
tabPanel("Spatial distribution - Flux tower", rbokehOutput("Map_Site"))
),
downloadLink('downloadData', 'Download'))
))
)
And the server file:
server<- shinyServer(function(input, output, session) {
# A reactive expression for filtering dataframe
Update_df <- reactive({
# Lables for axes
xvar_name <- names(axis_vars)[axis_vars == input$xvar]
yvar_name <- names(axis_vars)[axis_vars == input$yvar]
xvar <- prop("x", as.symbol(input$xvar))
yvar <- prop("y", as.symbol(input$yvar))
Flux_Data_df %>%
filter(
Gap_filled >= input$Gap_filled[1] &
Gap_filled <= input$Gap_filled[2] &
Uncert > input$Uncert[1] &
Uncert < input$Uncert[2] &
Stand_Age >= input$Stand_Age[1] &
Stand_Age <= input$Stand_Age[2] &
GPP > input$GPP[1] &
GPP < input$GPP[2] &
MAT > input$MAT[1] &
MAT < input$MAT[2] &
MAP > input$MAP[1] &
MAP < input$MAP[2]) %>%
filter(
Management %in% input$Management &
Climate %in% input$Climate &
Ecosystem %in% input$Ecosystem) %>% as.data.frame()
})
# A reactive expression to add model predicion to a new dataframe
Update_df<- reactive({
for(id in unique(Update_df()$Site_ID)){
lm.Age<- try(nlsLM(NEP~offset + A*(1-exp(k*Stand_Age)), data = Update_df()[Update_df()$Site_ID != id,],
start = list(A= 711.5423, k= -0.2987, offset= -444.2672),
lower= c(A = -Inf, k = -Inf, offset= -1500), control = list(maxiter = 500), weights = 1/Uncert), silent=TRUE);
Update_df()$f_Age[Update_df()$Site_ID == id] <- predict(object = lm.Age, newdata = Update_df()[Update_df()$Site_ID == id,])
} %>% as.data.frame()
})
#Plot scatter plot
output$rbokeh <- renderRbokeh({
plot_data<- Update_df()
g<- figure() %>%
ly_points(x = input$xvar, y = input$yvar, data=plot_data, hover= c(Site_ID, year)) %>%
x_axis("x", label = names(axis_vars)[axis_vars == input$xvar]) %>%
y_axis("y", label = names(axis_vars)[axis_vars == input$yvar])
return(g)
})
output$Map_Site <- renderRbokeh({
plot_data<- Update_df()
p<- gmap(lat=0, lng=0, zoom = 2, width = 600, height = 600, map_type ="hybrid") %>%
ly_points(x=Long, y=Lat, data = plot_data, hover= c(Site_ID), col = "red", size=5) %>%
tool_box_select() %>%
tool_lasso_select() %>%
tool_reset()
return(p)
})
output$downloadData <- downloadHandler(
filename = function() {
paste('data-', Sys.Date(), '.csv', sep='')
},
content = function(con) {
write.csv(data, con)
}
)
})
shinyApp(ui, server)
Basically, I would like to add a prediction column to the updated dataframe anytime a filtering action is done in the shiny app based on the filtering set-up in the ui file. Anyone can help me out with it?
Here is the way the server.R file should be done:
# Provide R code to build the object.
shinyServer(function(input, output, session) {
# A reactive expression for filtering dataframe
Update_df1 <- reactive({
Flux_Data_df %>%
filter(
Gap_filled >= input$Gap_filled[1] &
Gap_filled <= input$Gap_filled[2] &
Uncert > input$Uncert[1] &
Uncert < input$Uncert[2] &
Stand_Age >= input$Stand_Age[1] &
Stand_Age <= input$Stand_Age[2] &
GPP > input$GPP[1] &
GPP < input$GPP[2] &
MAT > input$MAT[1] &
MAT < input$MAT[2] &
MAP > input$MAP[1] &
MAP < input$MAP[2]) %>%
filter(
Management %in% input$Management &
Disturbance %in% input$Disturbance &
Climate %in% input$Climate &
Ecosystem %in% input$Ecosystem) %>% as.data.frame()
})
# A reactive expression to add model predicion to a new dataframe
Age<-reactive({
prediction<- Update_df1()
for(id in unique(prediction$Site_ID)){
lm_Age<- try(nlsLM(NEP~offset + A*(1-exp(k*Stand_Age)), data = prediction[prediction$Site_ID != id,],
start = list(A= 711.5423, k= -0.2987, offset= -444.2672),
lower= c(A = -Inf, k = -Inf, offset= -1500), control = list(maxiter = 500), weights = 1/Uncert), silent=TRUE)
prediction$f_Age[prediction$Site_ID == id] <- predict(object = lm_Age, newdata = prediction[prediction$Site_ID == id,])
}
return(prediction)
})
Final_df<-reactive({
df<- Age()
for(id in unique(df$Site_ID)){
lm_NEP<- lm(NEP~ (f_Age + Stand_Age + GPP)^2 +
Clay_1km + GPP:MAP + SPI_CRU_Mean:NHx + Stand_Age:NHx,
data = df[df$Site_ID != id,], weights = 1/Uncert)
df$prediction[df$Site_ID == id] <- predict(object = lm_NEP, newdata = df[df$Site_ID == id,])
}
return(df)
})
Model_Performance<- reactive({
Stat<- data.frame(matrix(ncol = 3, nrow = 1))
colnames(Stat)<- c("R2", "MEF", "RMSE")
Stat$R2<- round(cor(Final_df()$prediction, Final_df()$NEP, use="complete")^2, digits = 2)
Stat$RMSE <- round(rmse(Final_df()$prediction, Final_df()$NEP), digits = 2)
Stat$MEF<-round(NSE(Final_df()$prediction, Final_df()$NEP, na.rm=TRUE), digits=2)
return(Stat)
})
Var_Imp<- reactive({
Imp<- data.frame(matrix(ncol = 7, nrow = 1))
colnames(Imp)<- c("Age", "GPP*Age", "GPP*MAP", "Clay content", "Ndepo*SPI", "GPP", "Ndepo*Age")
VarImp_NEP<- varImp(lm(NEP ~ (f_Age + Stand_Age + GPP)^2 +
Clay_1km + GPP:MAP + SPI_CRU_Mean:NHx + Stand_Age:NHx,
data=Final_df(), weights = 1/Uncert))
Imp$Age<- (VarImp_NEP$Overall[1] + VarImp_NEP$Overall[2] + VarImp_NEP$Overall[5])/ sum(VarImp_NEP$Overall)
Imp["GPP*Age"]<- (VarImp_NEP$Overall[6] + VarImp_NEP$Overall[7])/ sum(VarImp_NEP$Overall)
Imp["GPP*MAP"]<- VarImp_NEP$Overall[8]/ sum(VarImp_NEP$Overall)
Imp["Clay content"]<- VarImp_NEP$Overall[4]/ sum(VarImp_NEP$Overall)
Imp["Ndepo*SPI"]<- VarImp_NEP$Overall[9]/ sum(VarImp_NEP$Overall)
Imp["GPP"]<- VarImp_NEP$Overall[3]/ sum(VarImp_NEP$Overall)
Imp["Ndepo*Age"]<- VarImp_NEP$Overall[10]/ sum(VarImp_NEP$Overall)
Imp<- gather(Imp)
colnames(Imp)<- c("Variable", "Percentage")
Imp$Percentage<- round(Imp$Percentage*100, digits = 1)
return(Imp)
})
#Plot Univariate
output$Univariate <- renderRbokeh({
plot_data<- Final_df()
plot_data$Stand_Age<- round(plot_data$Stand_Age, digits = 0)
plot_data$Stand_Age<- round(plot_data$Stand_Age, digits = 0)
g<- figure() %>%
ly_points(x = input$xvar, y = input$yvar, data=plot_data, hover= c(Site_ID, Stand_Age)) %>%
x_axis("x", label = names(axis_vars)[axis_vars == input$xvar]) %>%
y_axis("y", label = names(axis_vars)[axis_vars == input$yvar])
return(g)
})
#Plot model performance
output$Model_perf <- renderRbokeh({
plot_data<- Final_df()
plot_data$Stand_Age<- round(plot_data$Stand_Age, digits = 0)
g<- figure() %>%
ly_points(x = prediction, y = NEP, data=plot_data, hover= c(Site_ID, Stand_Age, Ecosystem)) %>%
ly_abline(a=0, b=1) %>%
x_axis("NEP predicted [gC.m-2.y-1]") %>%
y_axis("NEP observed [gC.m-2.y-1]") %>%
x_range(c(-700, 1500)) %>%
y_range(c(-700, 1500))
return(g)
})
#Plot Variable importance
output$Var_Imp <- renderRbokeh({
plot_data<- Var_Imp()
g<- figure() %>%
ly_points(x =Percentage, y = Variable, data=plot_data, hover= c(Percentage)) %>%
x_axis("Percentage [%]") %>%
y_axis("")
return(g)
})
output$Map_Site <- renderRbokeh({
plot_data<- Final_df()
plot_data$Stand_Age<- round(plot_data$Stand_Age, digits = 0)
p<- gmap(lat=0, lng=0, zoom = 2, width = 600, height = 1000, map_type ="hybrid") %>%
ly_points(x=Long, y=Lat, data = plot_data, hover= c(Site_ID, Stand_Age), col = "red", size=5) %>%
tool_box_select() %>%
tool_lasso_select() %>%
tool_reset() %>%
tool_resize()
return(p)
})
output$Update_data = renderDataTable({
Final_df()
})
output$Summary_Table = renderDataTable({
Model_Performance()
})
output$downloadData <- downloadHandler(
filename = function() {paste('Updated.csv', sep='') },
content = function(file) {
write.csv(Final_df(), file)
}
)
})

Resources