Working with a similar dataset as below, I am able to get the desired output by using scan operator, to fill forward strings/bools in test dataset, however it's timing out for larger datasets, as every property has many events and there are millions of property. Looking for an better/faster approach.
let T = datatable(PropertyId:int, Tenant:string, Owner:string, NoisyNeighbour:bool , PropertyTitle:string, EventDate:datetime )
[
1, "", "", bool(0),"",datetime(2022-08-01 00:00),
1, "", "abc", bool(null),"",datetime(2022-08-01 01:00),
1, "X","", bool(null),"Title updated",datetime(2022-08-02 00:00),
1, "X", "cde",bool(null),"",datetime(2022-08-03 00:00),
1, "A1", "",bool(null),"",datetime(2022-08-03 00:00),
1, "A2", "",bool(null),"",datetime(2022-08-03 02:00),
1, "A2", "def",bool(null),"",datetime(2022-08-03 03:00),
1, "B", "", bool(null),"",datetime(2022-08-04 00:00),
1, "C","", bool(1),"",datetime(2022-08-05 00:00),
1, "D", "xyz",bool(null),"",datetime(2022-08-06 00:00),
]; T
Expected result : Tenant who marked the property with NoisyNeighbor, Date when it was marked noisy, Current owner of the property, property title
Based on arg_max()
For each property find the last time it was marked with NN
For each property take only the events that preceded the NN event (including itself).
For each property take the Tenant & NoisyNeighbour of the last event (the one that was marked with NN) and the last Owner & PropertyTitle that are not null
let T = datatable(PropertyId:int, Tenant:string, Owner:string, NoisyNeighbour:bool , PropertyTitle:string, EventDate:datetime )
[
1 ,"" ,"" ,false ,"" ,datetime(2022-08-01 00:00)
,1 ,"" ,"abc" ,bool(null) ,"" ,datetime(2022-08-01 01:00)
,1 ,"X" ,"" ,bool(null) ,"Title updated" ,datetime(2022-08-02 00:00)
,1 ,"X" ,"cde" ,bool(null) ,"" ,datetime(2022-08-03 00:00)
,1 ,"A1" ,"" ,bool(null) ,"" ,datetime(2022-08-03 00:00)
,1 ,"A2" ,"" ,bool(null) ,"" ,datetime(2022-08-03 02:00)
,1 ,"A2" ,"def" ,bool(null) ,"" ,datetime(2022-08-03 03:00)
,1 ,"B" ,"" ,bool(null) ,"" ,datetime(2022-08-04 00:00)
,1 ,"C" ,"" ,true ,"" ,datetime(2022-08-05 00:00)
,1 ,"D" ,"xyz" ,bool(null) ,"" ,datetime(2022-08-06 00:00)
];
T
| where NoisyNeighbour
| summarize max(EventDate) by PropertyId
| join kind=inner T on PropertyId
| where EventDate <= max_EventDate
| summarize arg_max(EventDate, Tenant, NoisyNeighbour)
,arg_max(iff(isnotempty(Owner) ,EventDate ,datetime(null)) ,Owner)
,arg_max(iff(isnotempty(PropertyTitle) ,EventDate ,datetime(null)) ,PropertyTitle)
by PropertyId
| project-away max*
PropertyId
EventDate
Tenant
NoisyNeighbour
Owner
PropertyTitle
1
2022-08-05T00:00:00Z
C
true
def
Title updated
Fiddle
The partition operator is used for parallelizing the scan operator work.
let T = datatable(PropertyId:int, Tenant:string, Owner:string, NoisyNeighbour:bool , PropertyTitle:string, EventDate:datetime )
[
1 ,"" ,"" ,false ,"" ,datetime(2022-08-01 00:00)
,1 ,"" ,"abc" ,bool(null) ,"" ,datetime(2022-08-01 01:00)
,1 ,"X" ,"" ,bool(null) ,"Title updated" ,datetime(2022-08-02 00:00)
,1 ,"X" ,"cde" ,bool(null) ,"" ,datetime(2022-08-03 00:00)
,1 ,"A1" ,"" ,bool(null) ,"" ,datetime(2022-08-03 00:00)
,1 ,"A2" ,"" ,bool(null) ,"" ,datetime(2022-08-03 02:00)
,1 ,"A2" ,"def" ,bool(null) ,"" ,datetime(2022-08-03 03:00)
,1 ,"B" ,"" ,bool(null) ,"" ,datetime(2022-08-04 00:00)
,1 ,"C" ,"" ,true ,"" ,datetime(2022-08-05 00:00)
,1 ,"D" ,"xyz" ,bool(null) ,"" ,datetime(2022-08-06 00:00)
];
T
| partition hint.strategy=native by PropertyId
(
order by EventDate asc
| scan with
(
step s1 : true => Owner = coalesce(Owner , s1.Owner)
,PropertyTitle = coalesce(PropertyTitle, s1.PropertyTitle);
)
| where NoisyNeighbour
)
PropertyId
Tenant
Owner
NoisyNeighbour
PropertyTitle
EventDate
1
C
def
true
Title updated
2022-08-05T00:00:00Z
Fiddle
I made a box plot of the 3 replicates of each 40s and 80s (first 6 columns), with 40s in red and 80s in blue. I would now like to add the 7th column which is labelled "control" as a point on each pair of box plots to the corresponding "gene" in a different such that the resulting plot would have 2 box plots for 40s and 80s and a control point for each entry in the "gene" column
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
marte <- tribble(
~gene, ~rep1_40s, ~rep2_40s, ~rep3_40s, ~rep1_80s, ~rep2_80s, ~rep3_80s, ~control,
"AAA", 0.021383202, 0.016654469, 0.022484448, 0.025311535, 0.025495724, 0.02017925, 0.024144802,
"TAG", 0.019927531, 0.018790672, 0.015649845, 0.02230479, 0.019363723, 0.02017925 , 0.013676519,
"AGC", 0.013209711, 0.016891825, 0.014520044, 0.014302046, 0.014804269, 0.012904701 , 0.016027898,
"TGT", 0.033757832, 0.034990209, 0.031899462, 0.034446096, 0.034475058, 0.031047513 , 0.027512454,
"GAT", 0.013006661, 0.007615167, 0.009163947, 0.010540757, 0.008234933, 0.012000828 , 0.01232813,
"CAC", 0.011210666, 0.015012758, 0.01241387, 0.011478221, 0.011045046, 0.013579884 , 0.014466955,
"GGG", 0.012712095, 0.011927132, 0.013222864, 0.011558249, 0.014292494, 0.014647108 , 0.014706078,
"CGA", 0.00230219, 0.000969203, 0.001325076, 0.002034983, 0.001656292, 0.002036438 , 0.002198605,
"TCG", 0.004169681, 0.007377811, 0.005649008, 0.005133189, 0.006057561, 0.006131094 , 0.002630355,
"ACA", 0.011259284, 0.013825979, 0.010656401, 0.009900537, 0.010617015, 0.009670358 , 0.017110594,
"TTT", 0.033305974, 0.04751073, 0.043476442, 0.035212073, 0.037359611, 0.043669073 , 0.037754899,
"ATC", 0.01259198, 0.013905097, 0.013222864, 0.013501772, 0.012087207, 0.010356431 , 0.010269014,
"CCC", 0.009591983, 0.004213066, 0.006764862, 0.010506459, 0.006560031, 0.009354547 , 0.015795417,
"GCT", 0.021391781, 0.025397077, 0.022972634, 0.024545558, 0.023513757, 0.024426367 , 0.019063434,
"CTA", 0.012680636, 0.011848013, 0.013976065, 0.01610838, 0.014794964, 0.016128154 , 0.011570907,
"GTG", 0.018394644, 0.015507249, 0.01591486, 0.01932091, 0.019856889, 0.017086478 , 0.020245766,
"GGA", 0.017702557, 0.014182012, 0.018871872, 0.017926146, 0.018237817, 0.016411295 , 0.015895051,
"CGG", 0.00377216, 0.00346144, 0.003361509, 0.00339545, 0.004029069, 0.004258007 , 0.002889406,
"GAC", 0.019684443, 0.018790672, 0.025469356, 0.02854693, 0.027663791, 0.02270574 , 0.010773829,
"CAT", 0.015297412, 0.014083114, 0.01355762, 0.013879044, 0.013771413, 0.01256711 , 0.016114248,
"AGT", 0.018391784, 0.013054572, 0.012288337, 0.018532068, 0.014376239, 0.014745119 , 0.018087014,
"TGC", 0.020382249, 0.025041043, 0.02170335, 0.02225906, 0.021708586, 0.02506888 , 0.018425772,
"AAG", 0.016818859, 0.011393081, 0.018258153, 0.016920087, 0.016293071, 0.01251266 , 0.018731318,
"TAA", 0.016710184, 0.019324723, 0.023697938, 0.021150109, 0.0213643, 0.019351607 , 0.016067752,
"CTG", 0.034733046, 0.040864767, 0.036265238, 0.035143478, 0.036084824, 0.035555991 , 0.02738625,
"GTA", 0.014556707, 0.012560081, 0.012706782, 0.014667886, 0.014320409, 0.013863025 , 0.011776818,
"CCT", 0.015168718, 0.012184267, 0.012818367, 0.010735109, 0.012264002, 0.010988054 , 0.021819993,
"GCC", 0.009183023, 0.007239354, 0.00776913, 0.007808391, 0.008197713, 0.007198319 , 0.013530389,
"TTC", 0.018537637, 0.015922622, 0.018244205, 0.015639648, 0.017967972, 0.02003768 , 0.020591166,
"ATT", 0.018097218, 0.017069842, 0.014743214, 0.015045158, 0.017204961, 0.017184488 , 0.018425772,
"TCA", 0.015606277, 0.012757877, 0.013794739, 0.013936207, 0.011510296, 0.010323761 , 0.01798738,
"ACG", 0.003929453, 0.002650474, 0.00380785, 0.003829885, 0.003535903, 0.003528374 , 0.002789771,
"CCA", 0.013678729, 0.010601895, 0.016012498, 0.012964445, 0.014004038, 0.013340303 , 0.017369645,
"GCG", 0.001401333, 0.000929644, 0.000864786, 0.00050303, 0.00042803, 0.000457382 , 0.002504151,
"CTC", 0.021940875, 0.01946318, 0.021215164, 0.02198468, 0.022145921, 0.026560815 , 0.019262703,
"GTT", 0.019249743, 0.023537789, 0.020141155, 0.020967189, 0.019614959, 0.020854433 , 0.017827964,
"TCT", 0.032416556, 0.033704532, 0.033405864, 0.032696925, 0.034102858, 0.037919131 , 0.025539688,
"ACC", 0.011316481, 0.013034792, 0.015133763, 0.011775466, 0.012384967, 0.013547214 , 0.011697111,
"TTG", 0.029728284, 0.040330716, 0.03107652, 0.030993483, 0.031739385, 0.03275725 , 0.02247094,
"ATA", 0.011276443, 0.010839251, 0.011395654, 0.008402881, 0.010635625, 0.013971925 , 0.013457323,
"GAG", 0.011645365, 0.009454675, 0.007992301, 0.009397508, 0.008830453, 0.00750324 , 0.016745267,
"CAA", 0.009388933, 0.007536048, 0.010935364, 0.008208529, 0.009658599, 0.009332767 , 0.013955497,
"GGT", 0.010687311, 0.012738098, 0.00974977, 0.008711558, 0.01001219, 0.010977164 , 0.013556958,
"CGC", 0.001201143, 0.001305457, 0.001325076, 0.001017492, 0.001079381, 0.001785967 , 0.00212554,
"AAC", 0.013266908, 0.019146706, 0.015747482, 0.013193095, 0.014962454, 0.014418417 , 0.012441049,
"TAT", 0.012557662, 0.011630437, 0.011814099, 0.011924088, 0.011435856, 0.011761247 , 0.01454002,
"AGA", 0.019558609, 0.019502739, 0.017477055, 0.01982394, 0.017716737, 0.016160824 , 0.019667884,
"TGG", 0.024234486, 0.028818957, 0.02414428, 0.024111124, 0.023439317, 0.024143226 , 0.021766855,
"TTA", 0.015523341, 0.018493977, 0.01705861, 0.019572425, 0.016311681, 0.013111612 , 0.017827964,
"ATG", 0.014385115, 0.006903099, 0.013153123, 0.013593232, 0.011742921, 0.012175068 , 0.016917967,
"TCC", 0.014041931, 0.007872303, 0.00974977, 0.009671888, 0.009798174, 0.009909939 , 0.016898041,
"ACT", 0.027183006, 0.030045295, 0.031020727, 0.034777638, 0.034130773, 0.033987825 , 0.016446363,
"CTT", 0.02351094, 0.027256364, 0.026766536, 0.025631645, 0.02585862, 0.025656942 , 0.02465626,
"GTC", 0.012952323, 0.012678759, 0.008745502, 0.008814451, 0.008076748, 0.00761214 , 0.012965792,
"CCG", 0.002599616, 0.00114722, 0.002189862, 0.001783469, 0.002149457, 0.003081882 , 0.002949186,
"GCA", 0.013684448, 0.02231145, 0.016486735, 0.016005488, 0.01599531, 0.018817995 , 0.014792428,
"AGG", 0.014244982, 0.008722828, 0.013334449, 0.012381388, 0.012766472, 0.010770253 , 0.018113584,
"TGA", 0.025881768, 0.019008248, 0.023098167, 0.023848177, 0.025532944, 0.023500713 , 0.0193092,
"AAT", 0.016489975, 0.014538046, 0.018788183, 0.014004802, 0.018098242, 0.017881451 , 0.016094321,
"TAC", 0.011036214, 0.00860415, 0.008271264, 0.008402881, 0.008635048, 0.011216745 , 0.010355364,
"GGC", 0.01302668, 0.014142453, 0.011409602, 0.01280439, 0.011361416, 0.011576116 , 0.013317835,
"CGT", 0.004718775, 0.006428388, 0.006346417, 0.004778781, 0.005378295, 0.00509654 , 0.003659914,
"GAA", 0.020542402, 0.017841248, 0.018007086, 0.015102321, 0.018247122, 0.016171714 , 0.017230156,
"CAG", 0.017162043, 0.018414858, 0.016472787, 0.016531382, 0.014981064, 0.014244177, 0.022743275
)
colnames(marte) <- c("gene", "a40", "b40", "c40", "a80", "b80", "c80", "control" )
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "40s", "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
ggplot(aes(x = group, y = value, color = group)) +
geom_boxplot() +
geom_jitter(alpha = .5) +
coord_flip() +
facet_wrap(~gene, ncol = 4)
If I understand you, it just needs a third category, and to split up the geom's aesthetics' information.
marte_long <- marte %>%
pivot_longer(-gene, names_to = "name") %>%
mutate(group = case_when(str_detect(name, "40") ~ "40s",
name == "control" ~ "Control",
TRUE ~ "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
)
marte_long_control <- filter(marte_long, group == "Control")
marte_long <- filter(marte_long, group != "Control")
ggplot() +
geom_boxplot(data = marte_long, aes(x = group, y = value, color = group)) +
geom_jitter(data = marte_long, aes(x = group, y = value, color = group), alpha = .5) +
geom_point(data = marte_long_control, aes(x = group, y = value)) +
coord_flip() +
facet_wrap(~gene, ncol = 4)
I am trying to plot the following data (paste-bin link) https:[enter image description here][1]//pastebin.com/w1WaEcPd as a box plot with the trinucleotide identity as the x column and the Frequency as the y column. I have attached a picture of the graph I am envisioning and the code I have so far. I am getting the error:
"Error in FUN(X[[i]], ...) : object 'gene' not found".
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
marte <- tribble(
~gene, ~funnyName1, ~funnyName2, ~funnyName3, ~funnyName4, ~funnyName5, ~funnyName6, ~control
"AAA", 0.021383202, 0.016654469, 0.022484448, 0.025311535, 0.025495724, 0.02017925, 0.024144802,
"TAG", 0.019927531, 0.018790672, 0.015649845, 0.02230479, 0.019363723, 0.02017925 , 0.013676519,
"AGC", 0.013209711, 0.016891825, 0.014520044, 0.014302046, 0.014804269, 0.012904701 , 0.016027898,
"TGT", 0.033757832, 0.034990209, 0.031899462, 0.034446096, 0.034475058, 0.031047513 , 0.027512454,
"GAT", 0.013006661, 0.007615167, 0.009163947, 0.010540757, 0.008234933, 0.012000828 , 0.01232813,
"CAC", 0.011210666, 0.015012758, 0.01241387, 0.011478221, 0.011045046, 0.013579884 , 0.014466955,
"GGG", 0.012712095, 0.011927132, 0.013222864, 0.011558249, 0.014292494, 0.014647108 , 0.014706078,
"CGA", 0.00230219, 0.000969203, 0.001325076, 0.002034983, 0.001656292, 0.002036438 , 0.002198605,
"TCG", 0.004169681, 0.007377811, 0.005649008, 0.005133189, 0.006057561, 0.006131094 , 0.002630355,
"ACA", 0.011259284, 0.013825979, 0.010656401, 0.009900537, 0.010617015, 0.009670358 , 0.017110594,
"TTT", 0.033305974, 0.04751073, 0.043476442, 0.035212073, 0.037359611, 0.043669073 , 0.037754899,
"ATC", 0.01259198, 0.013905097, 0.013222864, 0.013501772, 0.012087207, 0.010356431 , 0.010269014,
"CCC", 0.009591983, 0.004213066, 0.006764862, 0.010506459, 0.006560031, 0.009354547 , 0.015795417,
"GCT", 0.021391781, 0.025397077, 0.022972634, 0.024545558, 0.023513757, 0.024426367 , 0.019063434,
"CTA", 0.012680636, 0.011848013, 0.013976065, 0.01610838, 0.014794964, 0.016128154 , 0.011570907,
"GTG", 0.018394644, 0.015507249, 0.01591486, 0.01932091, 0.019856889, 0.017086478 , 0.020245766,
"GGA", 0.017702557, 0.014182012, 0.018871872, 0.017926146, 0.018237817, 0.016411295 , 0.015895051,
"CGG", 0.00377216, 0.00346144, 0.003361509, 0.00339545, 0.004029069, 0.004258007 , 0.002889406,
"GAC", 0.019684443, 0.018790672, 0.025469356, 0.02854693, 0.027663791, 0.02270574 , 0.010773829,
"CAT", 0.015297412, 0.014083114, 0.01355762, 0.013879044, 0.013771413, 0.01256711 , 0.016114248,
"AGT", 0.018391784, 0.013054572, 0.012288337, 0.018532068, 0.014376239, 0.014745119 , 0.018087014,
"TGC", 0.020382249, 0.025041043, 0.02170335, 0.02225906, 0.021708586, 0.02506888 , 0.018425772,
"AAG", 0.016818859, 0.011393081, 0.018258153, 0.016920087, 0.016293071, 0.01251266 , 0.018731318,
"TAA", 0.016710184, 0.019324723, 0.023697938, 0.021150109, 0.0213643, 0.019351607 , 0.016067752,
"CTG", 0.034733046, 0.040864767, 0.036265238, 0.035143478, 0.036084824, 0.035555991 , 0.02738625,
"GTA", 0.014556707, 0.012560081, 0.012706782, 0.014667886, 0.014320409, 0.013863025 , 0.011776818,
"CCT", 0.015168718, 0.012184267, 0.012818367, 0.010735109, 0.012264002, 0.010988054 , 0.021819993,
"GCC", 0.009183023, 0.007239354, 0.00776913, 0.007808391, 0.008197713, 0.007198319 , 0.013530389,
"TTC", 0.018537637, 0.015922622, 0.018244205, 0.015639648, 0.017967972, 0.02003768 , 0.020591166,
"ATT", 0.018097218, 0.017069842, 0.014743214, 0.015045158, 0.017204961, 0.017184488 , 0.018425772,
"TCA", 0.015606277, 0.012757877, 0.013794739, 0.013936207, 0.011510296, 0.010323761 , 0.01798738,
"ACG", 0.003929453, 0.002650474, 0.00380785, 0.003829885, 0.003535903, 0.003528374 , 0.002789771,
"CCA", 0.013678729, 0.010601895, 0.016012498, 0.012964445, 0.014004038, 0.013340303 , 0.017369645,
"GCG", 0.001401333, 0.000929644, 0.000864786, 0.00050303, 0.00042803, 0.000457382 , 0.002504151,
"CTC", 0.021940875, 0.01946318, 0.021215164, 0.02198468, 0.022145921, 0.026560815 , 0.019262703,
"GTT", 0.019249743, 0.023537789, 0.020141155, 0.020967189, 0.019614959, 0.020854433 , 0.017827964,
"TCT", 0.032416556, 0.033704532, 0.033405864, 0.032696925, 0.034102858, 0.037919131 , 0.025539688,
"ACC", 0.011316481, 0.013034792, 0.015133763, 0.011775466, 0.012384967, 0.013547214 , 0.011697111,
"TTG", 0.029728284, 0.040330716, 0.03107652, 0.030993483, 0.031739385, 0.03275725 , 0.02247094,
"ATA", 0.011276443, 0.010839251, 0.011395654, 0.008402881, 0.010635625, 0.013971925 , 0.013457323,
"GAG", 0.011645365, 0.009454675, 0.007992301, 0.009397508, 0.008830453, 0.00750324 , 0.016745267,
"CAA", 0.009388933, 0.007536048, 0.010935364, 0.008208529, 0.009658599, 0.009332767 , 0.013955497,
"GGT", 0.010687311, 0.012738098, 0.00974977, 0.008711558, 0.01001219, 0.010977164 , 0.013556958,
"CGC", 0.001201143, 0.001305457, 0.001325076, 0.001017492, 0.001079381, 0.001785967 , 0.00212554,
"AAC", 0.013266908, 0.019146706, 0.015747482, 0.013193095, 0.014962454, 0.014418417 , 0.012441049,
"TAT", 0.012557662, 0.011630437, 0.011814099, 0.011924088, 0.011435856, 0.011761247 , 0.01454002,
"AGA", 0.019558609, 0.019502739, 0.017477055, 0.01982394, 0.017716737, 0.016160824 , 0.019667884,
"TGG", 0.024234486, 0.028818957, 0.02414428, 0.024111124, 0.023439317, 0.024143226 , 0.021766855,
"TTA", 0.015523341, 0.018493977, 0.01705861, 0.019572425, 0.016311681, 0.013111612 , 0.017827964,
"ATG", 0.014385115, 0.006903099, 0.013153123, 0.013593232, 0.011742921, 0.012175068 , 0.016917967,
"TCC", 0.014041931, 0.007872303, 0.00974977, 0.009671888, 0.009798174, 0.009909939 , 0.016898041,
"ACT", 0.027183006, 0.030045295, 0.031020727, 0.034777638, 0.034130773, 0.033987825 , 0.016446363,
"CTT", 0.02351094, 0.027256364, 0.026766536, 0.025631645, 0.02585862, 0.025656942 , 0.02465626,
"GTC", 0.012952323, 0.012678759, 0.008745502, 0.008814451, 0.008076748, 0.00761214 , 0.012965792,
"CCG", 0.002599616, 0.00114722, 0.002189862, 0.001783469, 0.002149457, 0.003081882 , 0.002949186,
"GCA", 0.013684448, 0.02231145, 0.016486735, 0.016005488, 0.01599531, 0.018817995 , 0.014792428,
"AGG", 0.014244982, 0.008722828, 0.013334449, 0.012381388, 0.012766472, 0.010770253 , 0.018113584,
"TGA", 0.025881768, 0.019008248, 0.023098167, 0.023848177, 0.025532944, 0.023500713 , 0.0193092,
"AAT", 0.016489975, 0.014538046, 0.018788183, 0.014004802, 0.018098242, 0.017881451 , 0.016094321,
"TAC", 0.011036214, 0.00860415, 0.008271264, 0.008402881, 0.008635048, 0.011216745 , 0.010355364,
"GGC", 0.01302668, 0.014142453, 0.011409602, 0.01280439, 0.011361416, 0.011576116 , 0.013317835,
"CGT", 0.004718775, 0.006428388, 0.006346417, 0.004778781, 0.005378295, 0.00509654 , 0.003659914,
"GAA", 0.020542402, 0.017841248, 0.018007086, 0.015102321, 0.018247122, 0.016171714 , 0.017230156,
"CAG", 0.017162043, 0.018414858, 0.016472787, 0.016531382, 0.014981064, 0.014244177, 0.022743275
)
colnames(marte) <- c("gene", "a40", "b40", "c40", "a80", "b80", "c80", "control" )
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "40s", "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
ggplot(aes(x = group, y = value, color = group)) +
geom_boxplot() +
geom_jitter(alpha = .5) +
coord_flip() +
facet_wrap(~gene, ncol = 4)
That would be a possible solution using the tidyverse packages. Here I recreated the data table, you would need just to rename the columns and then run the parte with the pivot_longer and mutate to prepare the data for plotting and then plot with ggplot2
I am making a few assumptions here, if it is not exactly what you were thinking, please write a comment.
library(dplyr)
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.1.0
library(tidyr)
library(stringr)
marte <- tribble(
~gene, ~funnyName1, ~funnyName2, ~funnyName3, ~funnyName4, ~funnyName5, ~funnyName6,
"AAA", 0.021383202, 0.016654469, 0.022484448, 0.025311535, 0.025495724, 0.02017925,
"TAG", 0.019927531, 0.018790672, 0.015649845, 0.02230479, 0.019363723, 0.02017925,
"AGC", 0.013209711, 0.016891825, 0.014520044, 0.014302046, 0.014804269, 0.012904701,
"TGT", 0.033757832, 0.034990209, 0.031899462, 0.034446096, 0.034475058, 0.031047513,
"AAA", 0.013006661, 0.007615167, 0.009163947, 0.010540757, 0.008234933, 0.012000828,
"TAG", 0.011210666, 0.015012758, 0.01241387, 0.011478221, 0.011045046, 0.013579884,
"AGC", 0.012712095, 0.011927132, 0.013222864, 0.011558249, 0.014292494, 0.014647108,
"TGT", 0.00230219 , 0.000969203, 0.001325076, 0.002034983, 0.001656292, 0.002036438,
"AAA", 0.004169681, 0.007377811, 0.005649008, 0.005133189, 0.006057561, 0.006131094,
"TAG", 0.011259284, 0.013825979, 0.010656401, 0.009900537, 0.010617015, 0.009670358,
"AGC", 0.033305974, 0.04751073, 0.043476442, 0.035212073, 0.037359611, 0.043669073,
"TGT", 0.01259198 , 0.013905097, 0.013222864, 0.013501772, 0.012087207, 0.010356431,
"CCC", 0.009591983, 0.004213066, 0.006764862, 0.010506459, 0.006560031, 0.009354547,
"GCT", 0.021391781, 0.025397077, 0.022972634, 0.024545558, 0.023513757, 0.024426367,
"CTA", 0.012680636, 0.011848013, 0.013976065, 0.01610838, 0.014794964, 0.016128154,
"GTG", 0.018394644, 0.015507249, 0.01591486, 0.01932091, 0.019856889, 0.017086478,
"GGA", 0.017702557, 0.014182012, 0.018871872, 0.017926146, 0.018237817, 0.016411295,
"CGG", 0.00377216 , 0.00346144, 0.003361509, 0.00339545, 0.004029069, 0.004258007
)
colnames(marte) <- c("gene", "a40", "b40", "c40", "a80", "b80", "c80")
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "group 1", "group 2"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
filter(gene %in% c("AAA", "TAG", "AGC", "TGT")) %>%
ggplot(aes(x = rep, y = value, color = group)) +
geom_boxplot() +
coord_flip() +
facet_wrap(~gene, ncol = 1)
Created on 2021-07-01 by the reprex package (v2.0.0)
Edit
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "40s", "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
ggplot(aes(x = group, y = value, color = group)) +
geom_boxplot() +
geom_jitter(alpha = .5) +
coord_flip() +
facet_wrap(~gene, ncol = 1)
Edit 2
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
marte <- tribble(
~gene, ~funnyName1, ~funnyName2, ~funnyName3, ~funnyName4, ~funnyName5, ~funnyName6,
"AAA", 0.021383202, 0.016654469, 0.022484448, 0.025311535, 0.025495724, 0.02017925,
"TAG", 0.019927531, 0.018790672, 0.015649845, 0.02230479, 0.019363723, 0.02017925 ,
"AGC", 0.013209711, 0.016891825, 0.014520044, 0.014302046, 0.014804269, 0.012904701 ,
"TGT", 0.033757832, 0.034990209, 0.031899462, 0.034446096, 0.034475058, 0.031047513 ,
"GAT", 0.013006661, 0.007615167, 0.009163947, 0.010540757, 0.008234933, 0.012000828 ,
"CAC", 0.011210666, 0.015012758, 0.01241387, 0.011478221, 0.011045046, 0.013579884 ,
"GGG", 0.012712095, 0.011927132, 0.013222864, 0.011558249, 0.014292494, 0.014647108 ,
"CGA", 0.00230219, 0.000969203, 0.001325076, 0.002034983, 0.001656292, 0.002036438 ,
"TCG", 0.004169681, 0.007377811, 0.005649008, 0.005133189, 0.006057561, 0.006131094 ,
"ACA", 0.011259284, 0.013825979, 0.010656401, 0.009900537, 0.010617015, 0.009670358 ,
"TTT", 0.033305974, 0.04751073, 0.043476442, 0.035212073, 0.037359611, 0.043669073 ,
"ATC", 0.01259198, 0.013905097, 0.013222864, 0.013501772, 0.012087207, 0.010356431 ,
"CCC", 0.009591983, 0.004213066, 0.006764862, 0.010506459, 0.006560031, 0.009354547 ,
"GCT", 0.021391781, 0.025397077, 0.022972634, 0.024545558, 0.023513757, 0.024426367 ,
"CTA", 0.012680636, 0.011848013, 0.013976065, 0.01610838, 0.014794964, 0.016128154 ,
"GTG", 0.018394644, 0.015507249, 0.01591486, 0.01932091, 0.019856889, 0.017086478 ,
"GGA", 0.017702557, 0.014182012, 0.018871872, 0.017926146, 0.018237817, 0.016411295 ,
"CGG", 0.00377216, 0.00346144, 0.003361509, 0.00339545, 0.004029069, 0.004258007 ,
"GAC", 0.019684443, 0.018790672, 0.025469356, 0.02854693, 0.027663791, 0.02270574 ,
"CAT", 0.015297412, 0.014083114, 0.01355762, 0.013879044, 0.013771413, 0.01256711 ,
"AGT", 0.018391784, 0.013054572, 0.012288337, 0.018532068, 0.014376239, 0.014745119 ,
"TGC", 0.020382249, 0.025041043, 0.02170335, 0.02225906, 0.021708586, 0.02506888 ,
"AAG", 0.016818859, 0.011393081, 0.018258153, 0.016920087, 0.016293071, 0.01251266 ,
"TAA", 0.016710184, 0.019324723, 0.023697938, 0.021150109, 0.0213643, 0.019351607 ,
"CTG", 0.034733046, 0.040864767, 0.036265238, 0.035143478, 0.036084824, 0.035555991 ,
"GTA", 0.014556707, 0.012560081, 0.012706782, 0.014667886, 0.014320409, 0.013863025 ,
"CCT", 0.015168718, 0.012184267, 0.012818367, 0.010735109, 0.012264002, 0.010988054 ,
"GCC", 0.009183023, 0.007239354, 0.00776913, 0.007808391, 0.008197713, 0.007198319 ,
"TTC", 0.018537637, 0.015922622, 0.018244205, 0.015639648, 0.017967972, 0.02003768 ,
"ATT", 0.018097218, 0.017069842, 0.014743214, 0.015045158, 0.017204961, 0.017184488 ,
"TCA", 0.015606277, 0.012757877, 0.013794739, 0.013936207, 0.011510296, 0.010323761 ,
"ACG", 0.003929453, 0.002650474, 0.00380785, 0.003829885, 0.003535903, 0.003528374 ,
"CCA", 0.013678729, 0.010601895, 0.016012498, 0.012964445, 0.014004038, 0.013340303 ,
"GCG", 0.001401333, 0.000929644, 0.000864786, 0.00050303, 0.00042803, 0.000457382 ,
"CTC", 0.021940875, 0.01946318, 0.021215164, 0.02198468, 0.022145921, 0.026560815 ,
"GTT", 0.019249743, 0.023537789, 0.020141155, 0.020967189, 0.019614959, 0.020854433 ,
"TCT", 0.032416556, 0.033704532, 0.033405864, 0.032696925, 0.034102858, 0.037919131 ,
"ACC", 0.011316481, 0.013034792, 0.015133763, 0.011775466, 0.012384967, 0.013547214 ,
"TTG", 0.029728284, 0.040330716, 0.03107652, 0.030993483, 0.031739385, 0.03275725 ,
"ATA", 0.011276443, 0.010839251, 0.011395654, 0.008402881, 0.010635625, 0.013971925 ,
"GAG", 0.011645365, 0.009454675, 0.007992301, 0.009397508, 0.008830453, 0.00750324 ,
"CAA", 0.009388933, 0.007536048, 0.010935364, 0.008208529, 0.009658599, 0.009332767 ,
"GGT", 0.010687311, 0.012738098, 0.00974977, 0.008711558, 0.01001219, 0.010977164 ,
"CGC", 0.001201143, 0.001305457, 0.001325076, 0.001017492, 0.001079381, 0.001785967 ,
"AAC", 0.013266908, 0.019146706, 0.015747482, 0.013193095, 0.014962454, 0.014418417 ,
"TAT", 0.012557662, 0.011630437, 0.011814099, 0.011924088, 0.011435856, 0.011761247 ,
"AGA", 0.019558609, 0.019502739, 0.017477055, 0.01982394, 0.017716737, 0.016160824 ,
"TGG", 0.024234486, 0.028818957, 0.02414428, 0.024111124, 0.023439317, 0.024143226 ,
"TTA", 0.015523341, 0.018493977, 0.01705861, 0.019572425, 0.016311681, 0.013111612 ,
"ATG", 0.014385115, 0.006903099, 0.013153123, 0.013593232, 0.011742921, 0.012175068 ,
"TCC", 0.014041931, 0.007872303, 0.00974977, 0.009671888, 0.009798174, 0.009909939 ,
"ACT", 0.027183006, 0.030045295, 0.031020727, 0.034777638, 0.034130773, 0.033987825 ,
"CTT", 0.02351094, 0.027256364, 0.026766536, 0.025631645, 0.02585862, 0.025656942 ,
"GTC", 0.012952323, 0.012678759, 0.008745502, 0.008814451, 0.008076748, 0.00761214 ,
"CCG", 0.002599616, 0.00114722, 0.002189862, 0.001783469, 0.002149457, 0.003081882 ,
"GCA", 0.013684448, 0.02231145, 0.016486735, 0.016005488, 0.01599531, 0.018817995 ,
"AGG", 0.014244982, 0.008722828, 0.013334449, 0.012381388, 0.012766472, 0.010770253 ,
"TGA", 0.025881768, 0.019008248, 0.023098167, 0.023848177, 0.025532944, 0.023500713 ,
"AAT", 0.016489975, 0.014538046, 0.018788183, 0.014004802, 0.018098242, 0.017881451 ,
"TAC", 0.011036214, 0.00860415, 0.008271264, 0.008402881, 0.008635048, 0.011216745 ,
"GGC", 0.01302668, 0.014142453, 0.011409602, 0.01280439, 0.011361416, 0.011576116 ,
"CGT", 0.004718775, 0.006428388, 0.006346417, 0.004778781, 0.005378295, 0.00509654 ,
"GAA", 0.020542402, 0.017841248, 0.018007086, 0.015102321, 0.018247122, 0.016171714 ,
"CAG", 0.017162043, 0.018414858, 0.016472787, 0.016531382, 0.014981064, 0.014244177,
)
colnames(marte) <- c("gene", "a40", "b40", "c40", "a80", "b80", "c80")
marte %>% pivot_longer(-gene, names_to = "name") %>%
mutate(group = if_else(str_detect(name, "40"), "40s", "80s"),
rep = case_when(
str_detect(name, "a") ~ "rep1",
str_detect(name, "b") ~ "rep2",
str_detect(name, "c") ~ "rep3",
TRUE ~ NA_character_
)
) %>%
ggplot(aes(x = group, y = value, color = group)) +
geom_boxplot() +
geom_jitter(alpha = .5) +
coord_flip() +
facet_wrap(~gene, ncol = 4)
Created on 2021-07-09 by the reprex package (v2.0.0)