Related
I am trying to plot the following data (paste-bin link) https:[enter image description here][1]//pastebin.com/w1WaEcPd as a box plot with the trinucleotide identity as the x column and the Frequency as the y column. I have attached a picture of the graph I am envisioning and the code I have so far. I am getting the error:
"Error in FUN(X[[i]], ...) : object 'gene' not found".
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
marte <- tribble(
~gene, ~funnyName1, ~funnyName2, ~funnyName3, ~funnyName4, ~funnyName5, ~funnyName6, ~control
"AAA", 0.021383202, 0.016654469, 0.022484448, 0.025311535, 0.025495724, 0.02017925, 0.024144802,
"TAG", 0.019927531, 0.018790672, 0.015649845, 0.02230479, 0.019363723, 0.02017925 , 0.013676519,
"AGC", 0.013209711, 0.016891825, 0.014520044, 0.014302046, 0.014804269, 0.012904701 , 0.016027898,
"TGT", 0.033757832, 0.034990209, 0.031899462, 0.034446096, 0.034475058, 0.031047513 , 0.027512454,
"GAT", 0.013006661, 0.007615167, 0.009163947, 0.010540757, 0.008234933, 0.012000828 , 0.01232813,
"CAC", 0.011210666, 0.015012758, 0.01241387, 0.011478221, 0.011045046, 0.013579884 , 0.014466955,
"GGG", 0.012712095, 0.011927132, 0.013222864, 0.011558249, 0.014292494, 0.014647108 , 0.014706078,
"CGA", 0.00230219, 0.000969203, 0.001325076, 0.002034983, 0.001656292, 0.002036438 , 0.002198605,
"TCG", 0.004169681, 0.007377811, 0.005649008, 0.005133189, 0.006057561, 0.006131094 , 0.002630355,
"ACA", 0.011259284, 0.013825979, 0.010656401, 0.009900537, 0.010617015, 0.009670358 , 0.017110594,
"TTT", 0.033305974, 0.04751073, 0.043476442, 0.035212073, 0.037359611, 0.043669073 , 0.037754899,
"ATC", 0.01259198, 0.013905097, 0.013222864, 0.013501772, 0.012087207, 0.010356431 , 0.010269014,
"CCC", 0.009591983, 0.004213066, 0.006764862, 0.010506459, 0.006560031, 0.009354547 , 0.015795417,
"GCT", 0.021391781, 0.025397077, 0.022972634, 0.024545558, 0.023513757, 0.024426367 , 0.019063434,
"CTA", 0.012680636, 0.011848013, 0.013976065, 0.01610838, 0.014794964, 0.016128154 , 0.011570907,
"GTG", 0.018394644, 0.015507249, 0.01591486, 0.01932091, 0.019856889, 0.017086478 , 0.020245766,
"GGA", 0.017702557, 0.014182012, 0.018871872, 0.017926146, 0.018237817, 0.016411295 , 0.015895051,
"CGG", 0.00377216, 0.00346144, 0.003361509, 0.00339545, 0.004029069, 0.004258007 , 0.002889406,
"GAC", 0.019684443, 0.018790672, 0.025469356, 0.02854693, 0.027663791, 0.02270574 , 0.010773829,
"CAT", 0.015297412, 0.014083114, 0.01355762, 0.013879044, 0.013771413, 0.01256711 , 0.016114248,
"AGT", 0.018391784, 0.013054572, 0.012288337, 0.018532068, 0.014376239, 0.014745119 , 0.018087014,
"TGC", 0.020382249, 0.025041043, 0.02170335, 0.02225906, 0.021708586, 0.02506888 , 0.018425772,
"AAG", 0.016818859, 0.011393081, 0.018258153, 0.016920087, 0.016293071, 0.01251266 , 0.018731318,
"TAA", 0.016710184, 0.019324723, 0.023697938, 0.021150109, 0.0213643, 0.019351607 , 0.016067752,
"CTG", 0.034733046, 0.040864767, 0.036265238, 0.035143478, 0.036084824, 0.035555991 , 0.02738625,
"GTA", 0.014556707, 0.012560081, 0.012706782, 0.014667886, 0.014320409, 0.013863025 , 0.011776818,
"CCT", 0.015168718, 0.012184267, 0.012818367, 0.010735109, 0.012264002, 0.010988054 , 0.021819993,
"GCC", 0.009183023, 0.007239354, 0.00776913, 0.007808391, 0.008197713, 0.007198319 , 0.013530389,
"TTC", 0.018537637, 0.015922622, 0.018244205, 0.015639648, 0.017967972, 0.02003768 , 0.020591166,
"ATT", 0.018097218, 0.017069842, 0.014743214, 0.015045158, 0.017204961, 0.017184488 , 0.018425772,
"TCA", 0.015606277, 0.012757877, 0.013794739, 0.013936207, 0.011510296, 0.010323761 , 0.01798738,
"ACG", 0.003929453, 0.002650474, 0.00380785, 0.003829885, 0.003535903, 0.003528374 , 0.002789771,
"CCA", 0.013678729, 0.010601895, 0.016012498, 0.012964445, 0.014004038, 0.013340303 , 0.017369645,
"GCG", 0.001401333, 0.000929644, 0.000864786, 0.00050303, 0.00042803, 0.000457382 , 0.002504151,
"CTC", 0.021940875, 0.01946318, 0.021215164, 0.02198468, 0.022145921, 0.026560815 , 0.019262703,
"GTT", 0.019249743, 0.023537789, 0.020141155, 0.020967189, 0.019614959, 0.020854433 , 0.017827964,
"TCT", 0.032416556, 0.033704532, 0.033405864, 0.032696925, 0.034102858, 0.037919131 , 0.025539688,
"ACC", 0.011316481, 0.013034792, 0.015133763, 0.011775466, 0.012384967, 0.013547214 , 0.011697111,
"TTG", 0.029728284, 0.040330716, 0.03107652, 0.030993483, 0.031739385, 0.03275725 , 0.02247094,
"ATA", 0.011276443, 0.010839251, 0.011395654, 0.008402881, 0.010635625, 0.013971925 , 0.013457323,
"GAG", 0.011645365, 0.009454675, 0.007992301, 0.009397508, 0.008830453, 0.00750324 , 0.016745267,
"CAA", 0.009388933, 0.007536048, 0.010935364, 0.008208529, 0.009658599, 0.009332767 , 0.013955497,
"GGT", 0.010687311, 0.012738098, 0.00974977, 0.008711558, 0.01001219, 0.010977164 , 0.013556958,
"CGC", 0.001201143, 0.001305457, 0.001325076, 0.001017492, 0.001079381, 0.001785967 , 0.00212554,
"AAC", 0.013266908, 0.019146706, 0.015747482, 0.013193095, 0.014962454, 0.014418417 , 0.012441049,
"TAT", 0.012557662, 0.011630437, 0.011814099, 0.011924088, 0.011435856, 0.011761247 , 0.01454002,
"AGA", 0.019558609, 0.019502739, 0.017477055, 0.01982394, 0.017716737, 0.016160824 , 0.019667884,
"TGG", 0.024234486, 0.028818957, 0.02414428, 0.024111124, 0.023439317, 0.024143226 , 0.021766855,
"TTA", 0.015523341, 0.018493977, 0.01705861, 0.019572425, 0.016311681, 0.013111612 , 0.017827964,
"ATG", 0.014385115, 0.006903099, 0.013153123, 0.013593232, 0.011742921, 0.012175068 , 0.016917967,
"TCC", 0.014041931, 0.007872303, 0.00974977, 0.009671888, 0.009798174, 0.009909939 , 0.016898041,
"ACT", 0.027183006, 0.030045295, 0.031020727, 0.034777638, 0.034130773, 0.033987825 , 0.016446363,
"CTT", 0.02351094, 0.027256364, 0.026766536, 0.025631645, 0.02585862, 0.025656942 , 0.02465626,
"GTC", 0.012952323, 0.012678759, 0.008745502, 0.008814451, 0.008076748, 0.00761214 , 0.012965792,
"CCG", 0.002599616, 0.00114722, 0.002189862, 0.001783469, 0.002149457, 0.003081882 , 0.002949186,
"GCA", 0.013684448, 0.02231145, 0.016486735, 0.016005488, 0.01599531, 0.018817995 , 0.014792428,
"AGG", 0.014244982, 0.008722828, 0.013334449, 0.012381388, 0.012766472, 0.010770253 , 0.018113584,
"TGA", 0.025881768, 0.019008248, 0.023098167, 0.023848177, 0.025532944, 0.023500713 , 0.0193092,
"AAT", 0.016489975, 0.014538046, 0.018788183, 0.014004802, 0.018098242, 0.017881451 , 0.016094321,
"TAC", 0.011036214, 0.00860415, 0.008271264, 0.008402881, 0.008635048, 0.011216745 , 0.010355364,
"GGC", 0.01302668, 0.014142453, 0.011409602, 0.01280439, 0.011361416, 0.011576116 , 0.013317835,
"CGT", 0.004718775, 0.006428388, 0.006346417, 0.004778781, 0.005378295, 0.00509654 , 0.003659914,
"GAA", 0.020542402, 0.017841248, 0.018007086, 0.015102321, 0.018247122, 0.016171714 , 0.017230156,
"CAG", 0.017162043, 0.018414858, 0.016472787, 0.016531382, 0.014981064, 0.014244177, 0.022743275
)
colnames(marte) <- c("gene", "a40", "b40", "c40", "a80", "b80", "c80", "control" )
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "40s", "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
ggplot(aes(x = group, y = value, color = group)) +
geom_boxplot() +
geom_jitter(alpha = .5) +
coord_flip() +
facet_wrap(~gene, ncol = 4)
That would be a possible solution using the tidyverse packages. Here I recreated the data table, you would need just to rename the columns and then run the parte with the pivot_longer and mutate to prepare the data for plotting and then plot with ggplot2
I am making a few assumptions here, if it is not exactly what you were thinking, please write a comment.
library(dplyr)
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.1.0
library(tidyr)
library(stringr)
marte <- tribble(
~gene, ~funnyName1, ~funnyName2, ~funnyName3, ~funnyName4, ~funnyName5, ~funnyName6,
"AAA", 0.021383202, 0.016654469, 0.022484448, 0.025311535, 0.025495724, 0.02017925,
"TAG", 0.019927531, 0.018790672, 0.015649845, 0.02230479, 0.019363723, 0.02017925,
"AGC", 0.013209711, 0.016891825, 0.014520044, 0.014302046, 0.014804269, 0.012904701,
"TGT", 0.033757832, 0.034990209, 0.031899462, 0.034446096, 0.034475058, 0.031047513,
"AAA", 0.013006661, 0.007615167, 0.009163947, 0.010540757, 0.008234933, 0.012000828,
"TAG", 0.011210666, 0.015012758, 0.01241387, 0.011478221, 0.011045046, 0.013579884,
"AGC", 0.012712095, 0.011927132, 0.013222864, 0.011558249, 0.014292494, 0.014647108,
"TGT", 0.00230219 , 0.000969203, 0.001325076, 0.002034983, 0.001656292, 0.002036438,
"AAA", 0.004169681, 0.007377811, 0.005649008, 0.005133189, 0.006057561, 0.006131094,
"TAG", 0.011259284, 0.013825979, 0.010656401, 0.009900537, 0.010617015, 0.009670358,
"AGC", 0.033305974, 0.04751073, 0.043476442, 0.035212073, 0.037359611, 0.043669073,
"TGT", 0.01259198 , 0.013905097, 0.013222864, 0.013501772, 0.012087207, 0.010356431,
"CCC", 0.009591983, 0.004213066, 0.006764862, 0.010506459, 0.006560031, 0.009354547,
"GCT", 0.021391781, 0.025397077, 0.022972634, 0.024545558, 0.023513757, 0.024426367,
"CTA", 0.012680636, 0.011848013, 0.013976065, 0.01610838, 0.014794964, 0.016128154,
"GTG", 0.018394644, 0.015507249, 0.01591486, 0.01932091, 0.019856889, 0.017086478,
"GGA", 0.017702557, 0.014182012, 0.018871872, 0.017926146, 0.018237817, 0.016411295,
"CGG", 0.00377216 , 0.00346144, 0.003361509, 0.00339545, 0.004029069, 0.004258007
)
colnames(marte) <- c("gene", "a40", "b40", "c40", "a80", "b80", "c80")
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "group 1", "group 2"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
filter(gene %in% c("AAA", "TAG", "AGC", "TGT")) %>%
ggplot(aes(x = rep, y = value, color = group)) +
geom_boxplot() +
coord_flip() +
facet_wrap(~gene, ncol = 1)
Created on 2021-07-01 by the reprex package (v2.0.0)
Edit
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "40s", "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
ggplot(aes(x = group, y = value, color = group)) +
geom_boxplot() +
geom_jitter(alpha = .5) +
coord_flip() +
facet_wrap(~gene, ncol = 1)
Edit 2
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
marte <- tribble(
~gene, ~funnyName1, ~funnyName2, ~funnyName3, ~funnyName4, ~funnyName5, ~funnyName6,
"AAA", 0.021383202, 0.016654469, 0.022484448, 0.025311535, 0.025495724, 0.02017925,
"TAG", 0.019927531, 0.018790672, 0.015649845, 0.02230479, 0.019363723, 0.02017925 ,
"AGC", 0.013209711, 0.016891825, 0.014520044, 0.014302046, 0.014804269, 0.012904701 ,
"TGT", 0.033757832, 0.034990209, 0.031899462, 0.034446096, 0.034475058, 0.031047513 ,
"GAT", 0.013006661, 0.007615167, 0.009163947, 0.010540757, 0.008234933, 0.012000828 ,
"CAC", 0.011210666, 0.015012758, 0.01241387, 0.011478221, 0.011045046, 0.013579884 ,
"GGG", 0.012712095, 0.011927132, 0.013222864, 0.011558249, 0.014292494, 0.014647108 ,
"CGA", 0.00230219, 0.000969203, 0.001325076, 0.002034983, 0.001656292, 0.002036438 ,
"TCG", 0.004169681, 0.007377811, 0.005649008, 0.005133189, 0.006057561, 0.006131094 ,
"ACA", 0.011259284, 0.013825979, 0.010656401, 0.009900537, 0.010617015, 0.009670358 ,
"TTT", 0.033305974, 0.04751073, 0.043476442, 0.035212073, 0.037359611, 0.043669073 ,
"ATC", 0.01259198, 0.013905097, 0.013222864, 0.013501772, 0.012087207, 0.010356431 ,
"CCC", 0.009591983, 0.004213066, 0.006764862, 0.010506459, 0.006560031, 0.009354547 ,
"GCT", 0.021391781, 0.025397077, 0.022972634, 0.024545558, 0.023513757, 0.024426367 ,
"CTA", 0.012680636, 0.011848013, 0.013976065, 0.01610838, 0.014794964, 0.016128154 ,
"GTG", 0.018394644, 0.015507249, 0.01591486, 0.01932091, 0.019856889, 0.017086478 ,
"GGA", 0.017702557, 0.014182012, 0.018871872, 0.017926146, 0.018237817, 0.016411295 ,
"CGG", 0.00377216, 0.00346144, 0.003361509, 0.00339545, 0.004029069, 0.004258007 ,
"GAC", 0.019684443, 0.018790672, 0.025469356, 0.02854693, 0.027663791, 0.02270574 ,
"CAT", 0.015297412, 0.014083114, 0.01355762, 0.013879044, 0.013771413, 0.01256711 ,
"AGT", 0.018391784, 0.013054572, 0.012288337, 0.018532068, 0.014376239, 0.014745119 ,
"TGC", 0.020382249, 0.025041043, 0.02170335, 0.02225906, 0.021708586, 0.02506888 ,
"AAG", 0.016818859, 0.011393081, 0.018258153, 0.016920087, 0.016293071, 0.01251266 ,
"TAA", 0.016710184, 0.019324723, 0.023697938, 0.021150109, 0.0213643, 0.019351607 ,
"CTG", 0.034733046, 0.040864767, 0.036265238, 0.035143478, 0.036084824, 0.035555991 ,
"GTA", 0.014556707, 0.012560081, 0.012706782, 0.014667886, 0.014320409, 0.013863025 ,
"CCT", 0.015168718, 0.012184267, 0.012818367, 0.010735109, 0.012264002, 0.010988054 ,
"GCC", 0.009183023, 0.007239354, 0.00776913, 0.007808391, 0.008197713, 0.007198319 ,
"TTC", 0.018537637, 0.015922622, 0.018244205, 0.015639648, 0.017967972, 0.02003768 ,
"ATT", 0.018097218, 0.017069842, 0.014743214, 0.015045158, 0.017204961, 0.017184488 ,
"TCA", 0.015606277, 0.012757877, 0.013794739, 0.013936207, 0.011510296, 0.010323761 ,
"ACG", 0.003929453, 0.002650474, 0.00380785, 0.003829885, 0.003535903, 0.003528374 ,
"CCA", 0.013678729, 0.010601895, 0.016012498, 0.012964445, 0.014004038, 0.013340303 ,
"GCG", 0.001401333, 0.000929644, 0.000864786, 0.00050303, 0.00042803, 0.000457382 ,
"CTC", 0.021940875, 0.01946318, 0.021215164, 0.02198468, 0.022145921, 0.026560815 ,
"GTT", 0.019249743, 0.023537789, 0.020141155, 0.020967189, 0.019614959, 0.020854433 ,
"TCT", 0.032416556, 0.033704532, 0.033405864, 0.032696925, 0.034102858, 0.037919131 ,
"ACC", 0.011316481, 0.013034792, 0.015133763, 0.011775466, 0.012384967, 0.013547214 ,
"TTG", 0.029728284, 0.040330716, 0.03107652, 0.030993483, 0.031739385, 0.03275725 ,
"ATA", 0.011276443, 0.010839251, 0.011395654, 0.008402881, 0.010635625, 0.013971925 ,
"GAG", 0.011645365, 0.009454675, 0.007992301, 0.009397508, 0.008830453, 0.00750324 ,
"CAA", 0.009388933, 0.007536048, 0.010935364, 0.008208529, 0.009658599, 0.009332767 ,
"GGT", 0.010687311, 0.012738098, 0.00974977, 0.008711558, 0.01001219, 0.010977164 ,
"CGC", 0.001201143, 0.001305457, 0.001325076, 0.001017492, 0.001079381, 0.001785967 ,
"AAC", 0.013266908, 0.019146706, 0.015747482, 0.013193095, 0.014962454, 0.014418417 ,
"TAT", 0.012557662, 0.011630437, 0.011814099, 0.011924088, 0.011435856, 0.011761247 ,
"AGA", 0.019558609, 0.019502739, 0.017477055, 0.01982394, 0.017716737, 0.016160824 ,
"TGG", 0.024234486, 0.028818957, 0.02414428, 0.024111124, 0.023439317, 0.024143226 ,
"TTA", 0.015523341, 0.018493977, 0.01705861, 0.019572425, 0.016311681, 0.013111612 ,
"ATG", 0.014385115, 0.006903099, 0.013153123, 0.013593232, 0.011742921, 0.012175068 ,
"TCC", 0.014041931, 0.007872303, 0.00974977, 0.009671888, 0.009798174, 0.009909939 ,
"ACT", 0.027183006, 0.030045295, 0.031020727, 0.034777638, 0.034130773, 0.033987825 ,
"CTT", 0.02351094, 0.027256364, 0.026766536, 0.025631645, 0.02585862, 0.025656942 ,
"GTC", 0.012952323, 0.012678759, 0.008745502, 0.008814451, 0.008076748, 0.00761214 ,
"CCG", 0.002599616, 0.00114722, 0.002189862, 0.001783469, 0.002149457, 0.003081882 ,
"GCA", 0.013684448, 0.02231145, 0.016486735, 0.016005488, 0.01599531, 0.018817995 ,
"AGG", 0.014244982, 0.008722828, 0.013334449, 0.012381388, 0.012766472, 0.010770253 ,
"TGA", 0.025881768, 0.019008248, 0.023098167, 0.023848177, 0.025532944, 0.023500713 ,
"AAT", 0.016489975, 0.014538046, 0.018788183, 0.014004802, 0.018098242, 0.017881451 ,
"TAC", 0.011036214, 0.00860415, 0.008271264, 0.008402881, 0.008635048, 0.011216745 ,
"GGC", 0.01302668, 0.014142453, 0.011409602, 0.01280439, 0.011361416, 0.011576116 ,
"CGT", 0.004718775, 0.006428388, 0.006346417, 0.004778781, 0.005378295, 0.00509654 ,
"GAA", 0.020542402, 0.017841248, 0.018007086, 0.015102321, 0.018247122, 0.016171714 ,
"CAG", 0.017162043, 0.018414858, 0.016472787, 0.016531382, 0.014981064, 0.014244177,
)
colnames(marte) <- c("gene", "a40", "b40", "c40", "a80", "b80", "c80")
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "40s", "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
ggplot(aes(x = group, y = value, color = group)) +
geom_boxplot() +
geom_jitter(alpha = .5) +
coord_flip() +
facet_wrap(~gene, ncol = 4)
Created on 2021-07-09 by the reprex package (v2.0.0)
I'm trying to find a neat Dplyr solution to convert this dataframe;
Rule <- c('Rule 1', 'Rule 1', 'Rule 1', 'Rule 1', 'Rule 2', 'Rule 2', 'Rule 2')
Condition <- c('1 of 4', '2 of 4', '3 of 4', '4 of 4', '1 of 3', '2 of 3', '3 of 3')
Clause <- c('Temperature > 60', 'Temperature < 90', 'Rain = 0', 'Wind < 20', 'Temperature > 55', 'Temperature < 85', 'Rain <= 2')
Lift <- c('1.30', '1.30', '1.30', '1.30', '1.60', '1.60', '1.60')
Coverage <- c('20%','20%','20%','20%','35%','35%','35%')
DF <- data.frame(Rule, Condition, Clause, Lift, Coverage)
Into this dataframe;
Rule <- c('Rule 1', 'Rule 1', 'Rule 1', 'Rule 1','', 'Rule 2', 'Rule 2', 'Rule 2')
Condition <- c('1 of 4', '2 of 4', '3 of 4', '4 of 4','', '1 of 3', '2 of 3', '3 of 3')
Clause <- c('Temperature > 60', 'Temperature < 90', 'Rain = 0', 'Wind < 20','', 'Temperature > 55', 'Temperature < 85', 'Rain <= 2')
Lift <- c('', '', '', '1.30', '','', '', '1.60')
Coverage <- c('','','','20%','','','','35%')
Result <- data.frame(Rule, Condition, Clause, Lift, Coverage)
Notice new blank rows which separates rules and repetitive Lift and Coverage metrics have been removed. Only retaining the Lift and Coverage from the final row of each rule.
You can create a blank row to insert in every Rule :
empty_df <- data.frame(matrix('', nrow = 1, ncol = ncol(DF),
dimnames = list(NULL, names(DF))))
Split the data for each unique Rule, replace the repeating values in Lift and Coverage column with blank add empty_df and combine the result.
library(dplyr)
DF %>%
group_split(Rule) %>%
purrr::map_df(~.x %>%
mutate(across(c(Lift, Coverage),
~replace(., duplicated(., fromLast = TRUE), ''))) %>%
bind_rows(empty_df)
) %>%
#Remove the blank row from last `Rule`.
slice(-n())
# Rule Condition Clause Lift Coverage
# <chr> <chr> <chr> <chr> <chr>
#1 "Rule 1" "1 of 4" "Temperature > 60" "" ""
#2 "Rule 1" "2 of 4" "Temperature < 90" "" ""
#3 "Rule 1" "3 of 4" "Rain = 0" "" ""
#4 "Rule 1" "4 of 4" "Wind < 20" "1.30" "20%"
#5 "" "" "" "" ""
#6 "Rule 2" "1 of 3" "Temperature > 55" "" ""
#7 "Rule 2" "2 of 3" "Temperature < 85" "" ""
#8 "Rule 2" "3 of 3" "Rain <= 2" "1.60" "35%"
I am trying to create a map plot using ggplot2, I want to show the counties with 0 acreages with the color blue and higher acreage with a gradient from red to green color. I am able to achieve this in the map's fill-in colors, but the legend does not show the blue for the 0.0 acreage value. I pre-divided the color and values and used scale_fill_gradientn(), however the legend is not to success. I am very unsure of what is going wrong, help is highly appreciated. Please refer to the reproducible code below.
library(tidyverse)
library(BBmisc)
d <- data.frame(fips = c(10001 , 10003 , 10005 , 21001 , 21003 , 21005 , 21007 , 21009 , 21011 , 21015 , 21017 , 21021 , 21023 , 21027 , 21029 , 21031 , 21033 , 21035 , 21037 , 21039 , 21041 , 21043 , 21045 , 21047 , 21049 , 21053 , 21055 , 21057 , 21059 , 21061 , 21065 , 21067 , 21069 , 21073 , 21075 , 21077 , 21079 , 21081 , 21083 , 21085 , 21087 , 21089 , 21091 , 21093 , 21097 , 21099 , 21101 , 21103 , 21105 , 21107 , 21109 , 21111 , 21113 , 21117 , 21121 , 21123 , 21125 , 21135 , 21137 , 21139 , 21141 , 21143 , 21145 , 21147 , 21149 , 21151 , 21155 , 21157 , 21159 , 21161 , 21163 , 21165 , 21167 , 21169 , 21171 , 21173 , 21175 , 21177 , 21179 , 21181 , 21183 , 21185 , 21187 , 21191 , 21197 , 21199 , 21201 , 21203 , 21205 , 21207 , 21209 , 21211 , 21213 , 21215 , 21217 , 21219 , 21221 , 21223 , 21225 , 21227 , 21229 , 21231 , 21233 , 21235 , 21239 , 24001 , 24003 , 24005 , 24009 , 24011 , 24013 , 24015 , 24017 , 24019 , 24021 , 24023 , 24025 , 24027 , 24029 , 24031 , 24033 , 24035 , 24037 , 24039 , 24041 , 24043 , 24045 , 24047 , 37001 , 37003 , 37005 , 37007 , 37009 , 37011 , 37013 , 37015 , 37017 , 37019 , 37021 , 37023 , 37025 , 37027 , 37029 , 37031 , 37033 , 37035 , 37037 , 37039 , 37041 , 37043 , 37045 , 37047 , 37049 , 37051 , 37053 , 37055 , 37057 , 37059 , 37061 , 37063 , 37065 , 37067 , 37069 , 37071 , 37073 , 37077 , 37079 , 37081 , 37083 , 37085 , 37087 , 37089 , 37091 , 37093 , 37095 , 37097 , 37099 , 37101 , 37103 , 37105 , 37107 , 37109 , 37111 , 37113 , 37115 , 37117 , 37119 , 37123 , 37125 , 37127 , 37129 , 37131 , 37133 , 37135 , 37137 , 37139 , 37141 , 37143 , 37145 , 37147 , 37149 , 37151 , 37153 , 37155 , 37157 , 37159 , 37161 , 37163 , 37165 , 37167 , 37169 , 37171 , 37173 , 37175 , 37177 , 37179 , 37181 , 37183 , 37185 , 37187 , 37189 , 37191 , 37193 , 37195 , 37197 , 37199 , 42009 , 42011 , 42013 , 42017 , 42025 , 42027 , 42029 , 42037 , 42041 , 42043 , 42045 , 42055 , 42057 , 42061 , 42067 , 42071 , 42075 , 42077 , 42087 , 42089 , 42091 , 42093 , 42095 , 42097 , 42099 , 42101 , 42107 , 42109 , 42119 , 42133 , 51001 , 51003 , 51005 , 51007 , 51009 , 51011 , 51015 , 51017 , 51019 , 51021 , 51023 , 51025 , 51029 , 51031 , 51033 , 51035 , 51036 , 51037 , 51041 , 51043 , 51045 , 51047 , 51049 , 51051 , 51053 , 51057 , 51059 , 51061 , 51063 , 51065 , 51067 , 51069 , 51071 , 51073 , 51075 , 51077 , 51079 , 51081 , 51083 , 51085 , 51087 , 51089 , 51091 , 51093 , 51095 , 51097 , 51099 , 51101 , 51103 , 51105 , 51107 , 51109 , 51111 , 51113 , 51115 , 51117 , 51119 , 51121 , 51125 , 51127 , 51131 , 51133 , 51135 , 51137 , 51139 , 51141 , 51143 , 51145 , 51147 , 51149 , 51153 , 51155 , 51157 , 51159 , 51161 , 51163 , 51165 , 51167 , 51169 , 51171 , 51173 , 51175 , 51177 , 51179 , 51181 , 51183 , 51185 , 51187 , 51191 , 51193 , 51195 , 51197 , 51199 , 51515 , 51520 , 51530 , 51550 , 51590 , 51595 , 51620 , 51630 , 51640 , 51650 , 51660 , 51678 , 51680 , 51683 , 51700 , 51730 , 51740 , 51750 , 51770 , 51775 , 51790 , 51800 , 51810 , 51820 , 51840),
avg_area_acres = c(274826347 , 111810520 , 356958995 , 12225467 , 9850285 , 0 , 161517982 , 36513962 , 10627321 , 8500976 , 11011159 , 5127695 , 0 , 67608594 , 16920827 , 57891895 , 90677813 , 174713438 , 0 , 111737090 , 9175389 , 0 , 9109121 , 256162406 , 4402135 , 2327886 , 52759747 , 5463256 , 319309902 , 15382455 , 3036180 , 10039222 , 16030896 , 3197991 , 212521576 , 4046856 , 0 , 0 , 264745451 , 38377025 , 14006839 , 3338657 , 49178736 , 109703257 , 8824662 , 11173159 , 320523707 , 16273015 , 186374168 , 154012358 , 0 , 6070285 , 3642904 , 0 , 0 , 78336064 , 0 , 13759312 , 17568827 , 61018542 , 236490220 , 27424132 , 96956005 , 0 , 206767773 , 0 , 31173369 , 63229941 , 0 , 15128662 , 61045205 , 0 , 4586437 , 4128988 , 2226557 , 3643585 , 0 , 65578941 , 59265028 , 2428114 , 126995190 , 13925398 , 0 , 4209674 , 8296056 , 28642983 , 0 , 3710771 , 3339207 , 14816021 , 4856228 , 76817157 , 155771635 , 20563941 , 28336357 , 178198387 , 72704615 , 12853799 , 238138534 , 107598640 , 13661127 , 22993123 , 155120806 , 0 , 6206647 , 0 , 18314776 , 43712808 , 11738189 , 196002816 , 82823831 , 73270421 , 46040458 , 189830024 , 112293371 , 0 , 43395064 , 13963462 , 160870422 , 58778462 , 16594705 , 238589996 , 64963441 , 69212640 , 169997441 , 36023074 , 96909783 , 141439602 , 14451553 , 5627142 , 0 , 45419938 , 0 , 0 , 250418385 , 85662981 , 82094768 , 36026453 , 0 , 3060241 , 28376260 , 3218257 , 121200604 , 100712697 , 8906102 , 26029460 , 10120473 , 910542.7 , 29137366 , 890308.4 , 41413487 , 209038639 , 90114900 , 79827082 , 63393639 , 4586437 , 31454134 , 26271517 , 195279264 , 3895807 , 137398970 , 12913433 , 74078849 , 4301357 , 61049669 , 20523472 , 109219080 , 36594962 , 98945220 , 108001879 , 0 , 0 , 55219178 , 46797190 , 108652520 , 50683743 , 0 , 226125804 , 53359636 , 26675951 , 116590206 , 21697878 , 0 , 0 , 0 , 62506851 , 4857862 , 2175578 , 21050066 , 122578107 , 2495561 , 109878020 , 55702537 , 13561244 , 78614157 , 186377501 , 58814313 , 128087113 , 49549681 , 227994714 , 1315228 , 40116230 , 7958818 , 399060448 , 18095735 , 66551696 , 4047642 , 229363918 , 40847556 , 75376798 , 6880835 , 35299653 , 0 , 0 , 139744197 , 212121480 , 29671505 , 70598678 , 31292071 , 133027044 , 0 , 229286061 , 1854809 , 139092802 , 58819874 , 0 , 12750034 , 100796778 , 13602342 , 36732059 , 3508323 , 35866276 , 60717516 , 44689240 , 57780664 , 46795932 , 0 , 61018149 , 3238271 , 13053705 , 24693872 , 108004520 , 80801534 , 54323399 , 17708926 , 7993956 , 14673155 , 24285225 , 55048171 , 80636580 , 30279791 , 0 , 31268017 , 32222596 , 40641504 , 164917845 , 136580985 , 6476463 , 0 , 23802746 , 0 , 4654828 , 14087839 , 0 , 0 , 0 , 2023428 , 29851146 , 2428114 , 6315359 , 73594672 , 0 , 33032534 , 7083806 , 6962731 , 6396171 , 0 , 28336357 , 2226714 , 0 , 60721708 , 86062133 , 0 , 19673255 , 0 , 2428114 , 3374162 , 3136942 , 0 , 32708722 , 13196839 , 0 , 0 , 47560780 , 14006336 , 77238415 , 20642661 , 0 , 0 , 79749226 , 7014551 , 57969280 , 21351097 , 61288393 , 29446382 , 0 , 14370898 , 9958599 , 11536134 , 15545147 , 7589192 , 37162842 , 25584243 , 0 , 2430000 , 24936054 , 88654133 , 67926935 , 4453240 , 15464398 , 2529678 , 0 , 13035090 , 5505674 , 0 , 43800252 , 6638856 , 0 , 0 , 53434851 , 0 , 0 , 20806626 , 0 , 0 , 11820782 , 0 , 131809529 , 9067913 , 5424297 , 67685193 , 105413589 , 0 , 0 , 0 , 66551193 , 0 , 0 , 0 , 0 , 0 , 0 , 116260925 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 85624964 , 55463781 , 0 , 0))
st.id = unique(sapply(as.character(d$fip), function(x) substr(x, 1,2)))
us.map <- tigris::counties(state=st.id, cb = TRUE, year = 2016)
# Projuce counties map
county_map <- fortify(us.map, region="GEOID")
# Obtian state map, to build the state boundaries
#states_map <- map_data("state")
state_dat = tigris::states(cb = TRUE, year = 2016)
state_dat = state_dat[state_dat$STATEFP %in% c(st.id), ]
# Remove Alaska(2), Hawaii(15), Puerto Rico (72), Guam (66), Virgin Islands (78), American Samoa (60), Mariana Islands (69), Micronesia (64), Marshall Islands (68), Palau (70), Minor Islands (74)
state_dat <- state_dat[!state_dat$STATEFP %in% c("02", "15", "72", "66", "78", "60", "69", "64", "68", "70", "74"),]
# Make sure other outling islands are removed.
state_dat <- state_dat[!state_dat$STATEFP %in% c("81", "84", "86", "87", "89", "71", "76", "95", "79"),]
# Projuce state map
state_dat=fortify(state_dat, region = "STUSPS")
str_fipsselect <- 'all'
#str_fipsselect <- 'zero acres'
#str_fipsselect <- 'non-zero acres'
if(str_fipsselect == 'zero acres'){
d <- d %>% filter(avg_area_acres == 0)
} else if(str_fipsselect == 'non-zero acres'){
d <- d %>% filter(avg_area_acres > 0)
}
d$norm_avg_area_acres = normalize(d$avg_area_acres, method = "range")
if (min(d$avg_area_acres) <= 0){
c_cols = c("blue","red","green")
v_cols = c(as.numeric(d[which(d$avg_area_acres==min(d$avg_area_acres))[1],"norm_avg_area_acres"]),
as.numeric(d[which(d$avg_area_acres == min(d$avg_area_acres[d$avg_area_acres!=min(d$avg_area_acres)]))[1], "norm_avg_area_acres"][1]),
as.numeric(d[which(d$avg_area_acres==max(d$avg_area_acres))[1],"norm_avg_area_acres"]))
} else {
c_cols = c("green", "red")
v_cols = c(as.numeric(d[which(d$avg_area_acres==min(d$avg_area_acres))[1],"norm_avg_area_acres"]),
as.numeric(d[which(d$avg_area_acres==max(d$avg_area_acres))[1],"norm_avg_area_acres"]))
}
ggplot() +
geom_polygon(data=state_dat,
aes(x=long, y=lat, group=group), fill="gray94", color=NA, show.legend = FALSE) + # this is for shade within state boundaries
geom_polygon(data=state_dat,
aes(x=long, y=lat, group=group), fill=NA, color="gray25", size=0.1, show.legend = FALSE) + # this is for state boundaries
geom_map(aes(fill=avg_area_acres, map_id = fips),
data = d, map=county_map, color="palevioletred3", size=0.05, show.legend = TRUE) + # this is to show the FIPS which are under study
scale_fill_gradientn(colors = adjustcolor(c_cols, alpha.f = 0.7),
values = v_cols,
labels = function(x) format(x, digits = 2, scientific = TRUE),
name = "Average acres") +
coord_map()
enter image description here
It seems that the issue is more to do with visualizing such a small block of color in the color_bar. The color_bar guide can help you tweak this.Two potential options:
Lengthen the color bar
You can lengthen the color bar to make the blue more visible. The example below is a bit ridiculous but you can play around with adjusting the color_bar height and the size of the overall plot.
ggplot() +
geom_polygon(data=state_dat,
aes(x=long, y=lat, group=group), fill="gray94", color=NA, show.legend = FALSE) + # this is for shade within state boundaries
geom_polygon(data=state_dat,
aes(x=long, y=lat, group=group), fill=NA, color="gray25", size=0.1, show.legend = FALSE) + # this is for state boundaries
geom_map(aes(fill=avg_area_acres, map_id = fips),
data = d, map=county_map, color="palevioletred3", size=0.05, show.legend = TRUE) + # this is to show the FIPS which are under study
scale_fill_gradientn(colors = adjustcolor(c_cols, alpha.f = 0.7),
values = v_cols,
labels = function(x) format(x, digits = 2, scientific = TRUE),
name = "Average acres",
guide = guide_colourbar(barheight = 40)) +
coord_map()
Adjust the bins in the colorbar
This will give more weight to the blue portion by reducing the number of nbin. It is kind of a misrepresentation of your data though, since you have a sharp break between 0 and those values above 0. Using something like color_step or a binned fill scale may be another way to look into this, depending on the distribution of your data.
ggplot() +
geom_polygon(data=state_dat,
aes(x=long, y=lat, group=group), fill="gray94", color=NA, show.legend = FALSE) + # this is for shade within state boundaries
geom_polygon(data=state_dat,
aes(x=long, y=lat, group=group), fill=NA, color="gray25", size=0.1, show.legend = FALSE) + # this is for state boundaries
geom_map(aes(fill=avg_area_acres, map_id = fips),
data = d, map=county_map, color="palevioletred3", size=0.05, show.legend = TRUE) + # this is to show the FIPS which are under study
scale_fill_gradientn(colors = adjustcolor(c_cols, alpha.f = 0.7),
values = v_cols,
labels = function(x) format(x, digits = 2, scientific = TRUE),
name = "Average acres",
guide = guide_colourbar(barheight = 10, nbin = 15)) +
coord_map()
I'm trying to plot a multiple bar graph for a customer satisfaction data set using Ggplot on R Shiny. Satisfaction varies from 1 to 5, where 1 being the lowest and 5 being the highest. This is how it looks like: (This is a sample of the original data set)
Satisfaction_Level <- c(4 ,4 ,5 ,5 ,3 ,4 ,4 ,4 ,4 ,5 ,1 ,4 ,1 ,3 ,4 ,4 ,1 ,4 ,4 ,1 ,4 ,4 ,4 ,4 ,2 ,4 ,2 ,4 ,3 ,1)
Location <- c("C" ,"C" ,"C" ,"B" ,"C" ,"C" ,"C" ,"A" ,"B" ,"C" ,"C" ,"C" ,"C" ,"B" ,"C" ,"C" ,"C" ,"B" ,"C" ,"B" ,"C" ,"C" ,"C" ,"B" ,"B" ,"B" ,"B" ,"B" ,"B" ,"B")
With the below code, I was able to generate this graph:
#satisfaction counts
satisCounts <- data.frame(table(mydata[,1]))
colnames(satisCounts) <- c("satisLevel", "satisCount")
satisCounts$perc3 <- as.character(round(100* satisCounts$satisCount / sum(satisCounts$satisCount)), 2)
satisCounts$lab3 <- paste(satisCounts$satisCount, paste("(",satisCounts$perc3,"%)",sep=""),sep=" ")
output$graph3 <- renderPlot({
satisCounts %>%
ggplot(aes(x=satisLevel, y=satisCount)) +
geom_bar(stat = "identity", aes(fill=satisLevel)) +
labs(y= "Customer count", x="Satisfaction Level") +
scale_fill_manual(values = c("#de425b", "#f2955a", "#ffe48f", "#a9b957", "#488f31")) +
labs(fill = "Satisfaction Level") +
geom_text(aes(label=lab3), vjust=-.5)
})
But what I actully want is to make this a multiple bar graph where all the satisfaction levels broken down by the location. Could you please help me fix this? Thank you!
From your question is is not clear how the final plot should look like. One option would be to use facetting. Another option would be to use a dodged bar chart. But the basis idea is to first compue counts by loacation and satisfaction levels. Have a look at this.
library(dplyr)
library(ggplot2)
Satisfaction_Level<-c(4 ,4 ,5 ,5 ,3 ,4 ,4 ,4 ,4 ,5 ,1 ,4 ,1 ,3 ,4 ,4 ,1 ,4 ,4 ,1 ,4 ,4 ,4 ,4 ,2 ,4 ,2 ,4 ,3 ,1)
Location <- c("C" ,"C" ,"C" ,"B" ,"C" ,"C" ,"C" ,"A" ,"B" ,"C" ,"C" ,"C" ,"C" ,"B" ,"C" ,"C" ,"C" ,"B" ,"C" ,"B" ,"C" ,"C" ,"C" ,"B" ,"B" ,"B" ,"B" ,"B" ,"B" ,"B")
df <- data.frame(
satisLevel = Satisfaction_Level,
Location = Location,
stringsAsFactors = FALSE
)
#satisfaction counts
# satisCounts <- data.frame(table(mydata[,1]))
# colnames(satisCounts) <- c("satisLevel", "satisCount")
# satisCounts$perc3 <- as.character(round(100* satisCounts$satisCount / sum(satisCounts$satisCount)), 2)
# satisCounts$lab3 <- paste(satisCounts$satisCount, paste("(",satisCounts$perc3,"%)",sep=""),sep=" ")
satisCounts <- df %>%
count(Location, satisLevel, name = "satisCount") %>%
mutate(satisLevel = factor(satisLevel))
satisCounts$perc3 <- as.character(round(100* satisCounts$satisCount / sum(satisCounts$satisCount)), 2)
satisCounts$lab3 <- paste(satisCounts$satisCount, paste("(",satisCounts$perc3,"%)",sep=""),sep=" ")
# Option 1: Use facetting
satisCounts %>%
ggplot(aes(x=satisLevel, y=satisCount)) +
geom_bar(stat = "identity", aes(fill=satisLevel)) +
labs(y= "Customer count", x="Satisfaction Level") +
scale_fill_manual(values = c("#de425b", "#f2955a", "#ffe48f", "#a9b957", "#488f31")) +
labs(fill = "Satisfaction Level") +
geom_text(aes(label=lab3), vjust=-.5) +
facet_wrap(~ Location, ncol = 1)
# Option 2: Dodged bar chart
satisCounts %>%
ggplot(aes(x=Location, y=satisCount, fill=satisLevel)) +
geom_bar(stat = "identity", position = position_dodge2(preserve = "single")) +
labs(y= "Customer count", x="Satisfaction Level") +
scale_fill_manual(values = c("#de425b", "#f2955a", "#ffe48f", "#a9b957", "#488f31")) +
labs(fill = "Satisfaction Level") +
geom_text(aes(label=lab3), vjust=-.5, position = position_dodge2(.9, preserve = "single"))
Created on 2020-04-05 by the reprex package (v0.3.0)
I am referencing this example code to generate a plot that contains multiple subplots with their own titles but having trouble getting the titles of the subplots properly labeled by loop.
For the ease of exposition, I use the 1 by 2 plots environment as an example.
par(mfrow(1, 2))
require(fmsb)
data1=as.data.frame(matrix( sample(2:20, 10, replace=T) , ncol=10))
data2=as.data.frame(matrix( sample(2:20, 10, replace=T) , ncol=10))
colnames(data1)=c("math" , "english" , "biology" , "music" , "R-coding", "data-viz" , "french" , "physic", "statistic", "sport" )
colnames(data2)=c("math" , "english" , "biology" , "music" , "R-coding", "data-viz" , "french" , "physic", "statistic", "sport" )
df1=rbind(rep(20,10) , rep(0,10) , data1)
df2=rbind(rep(20,10) , rep(0,10) , data2)
my.list <- list(df1, df2)
for (i in my.list[[i]]) {
radarchart(i, axistype=1 ,
#custom polygon
pcol=rgb(0.2,0.5,0.5,0.9) , pfcol=rgb(0.2,0.5,0.5,0.5) , plwd=4 ,
#custom the grid
cglcol="grey", cglty=1, axislabcol="grey", caxislabels=seq(0,20,5), cglwd=0.8,
#custom labels
vlcex=0.8
)
}
However, this code block didn't return anything as it requires the input to be a dataframe. Also, if I want to put a title argument in radarchart() by assigning each dataframe a unique name (say, student 1, student 2) and using them as titles for each subplot, how can I achieve that in a loop?
Your code had some errors, here is the expected output:
par(mfrow=c(1, 2)) #debugged
library(fmsb)
data1=as.data.frame(matrix( sample(2:20, 10, replace=T) , ncol=10))
data2=as.data.frame(matrix( sample(2:20, 10, replace=T) , ncol=10))
colnames(data1)=c("math" , "english" , "biology" , "music" , "R-coding", "data-viz" , "french" , "physic", "statistic", "sport" )
colnames(data2)=c("math" , "english" , "biology" , "music" , "R-coding", "data-viz" , "french" , "physic", "statistic", "sport" )
df1=rbind(rep(20,10) , rep(0,10) , data1)
df2=rbind(rep(20,10) , rep(0,10) , data2)
my.list <- list("df1" = df1, "df2" = df2) #name the list's elements
for (i in 1:length(my.list)) { #use i as list indexer, not to call the elements
radarchart(my.list[[i]], axistype=1 , #call the list's elements
#custom polygon
pcol=rgb(0.2,0.5,0.5,0.9) , pfcol=rgb(0.2,0.5,0.5,0.5) , plwd=4 ,
#custom the grid
cglcol="grey", cglty=1, axislabcol="grey", caxislabels=seq(0,20,5), cglwd=0.8,
#custom labels
vlcex=0.8,
#title; calling the list's names
title = names(my.list)[i]
)
}