Matching colums and rows in a special condition - r

output1 <- output1 <- structure(list(row = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 214L, 214L,214L), col = c(17L, 17L, 17L, 17L, 17L, 17L, 16L, 110L, 111L,111L), cell = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 27244L, 27245L, 27245L), xcoord = c(783750L, 783750L, 783750L, 783750L, 783750L, 783750L,783725L, 786075L, 786100L, 786100L), ycoord = c(187050L, 187050L,187050L, 187050L, 187050L, 187050L, 187025L, 181725L, 181725L,181725L), species = structure(c(1L, 1L, 1L, 8L, 9L, 11L, 1L,3L, 3L, 3L), .Label = c("abiealba", "alnuinca", "alnuviri", "betupend","betupube", "fagusilv", "larideci", "piceabie", "pinucemb", "pinusilv","popunigr", "poputrem", "salicapr", "sorbaucu"), class = "factor"),age = c(100L, 20L, 10L, 100L, 100L, 100L, 100L, 30L, 70L,30L), biomass = c(0.1015, 0.0152, 0.0127, 0.5391, 0.02, 0.1584,0.1019, 0.0114, 0.0115, 0.0114), stems = c(1L, 10L, 10L,20L, 5L, 3L, 4L, 15L, 2L, 10L), slowGrowth = c(0L, 0L, 0L,0L, 14L, 0L, 0L, 0L, 0L, 0L), DBH = c(17.9273, 8.831, 8.2681,34.9717, 9.7366, 18.9254, 17.9523, 6.6486, 6.6793, 6.6486), height = c(14.0924, 8.0258, 7.625, 23.4468, 8.0478, 13.6345,14.1081, 3.6519, 3.6552, 3.6519), availableLight = c(0.0934,0.0807, 0.071, 0.4742, 0.0887, 0.101, 0.0985, 0.958, 0.9952,0.9624), light_rf = c(0.2619, 0.2067, 0.1708, 0.6971, 0.063,0.1049, 0.2896, 0.9768, 0.9972, 0.9793), LeafArea = c(5.4506,5.4506, 5.4506, 5.4506, 5.4506, 5.4506, 5.2884, 0.2307, 0.1732,0.1732), nitorgen_rf = c(0, 0, 0, 0, 0.1328, 0, 0, 0, 0,0), droughtIndex = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), moisture_rf = c(1,1, 1, 1, 1, 1, 1, 1, 1, 1), degreeDay_rf = c(0.258, 0.258,0.258, 0.4726, 0.5144, 0.237, 0.258, 0.1125, 0.1125, 0.1125), foliageWght = c(0.0093, 0.0031, 0.0028, 0.0265, 0.0036,0.0023, 0.0094, 5e-04, 5e-04, 5e-04), twigWght = c(0.0537,0.0115, 0.0096, 0.0513, 0.0149, 0.0847, 0.0538, 0.0109, 0.011,0.0109), boleWght = c(0.0384, 6e-04, 3e-04, 0.4613, 0.0015,0.0713, 0.0387, 0, 0, 0), deadFoliage = c(0.405, 0.405, 0.405,0.405, 0.405, 0.405, 0.3664, 0.0627, 0.0534, 0.0534), deadTwig = c(0.9887,0.9887, 0.9887, 0.9887, 0.9887, 0.9887, 0.9537, 0.7391, 0.8132,0.8132), deadbole = c(2.3166, 2.3166, 2.3166, 2.3166, 2.3166,2.3166, 2.3947, 0, 0, 0)), .Names = c("row", "col", "cell","xcoord", "ycoord", "species", "age", "biomass", "stems", "slowGrowth","DBH", "height", "availableLight", "light_rf", "LeafArea", "nitorgen_rf","droughtIndex", "moisture_rf", "degreeDay_rf", "foliageWght","twigWght", "boleWght", "deadFoliage", "deadTwig", "deadbole"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 131023L, 131024L,131025L), class = "data.frame")
and
Details <- structure(list(fireID = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 1052L,1052L, 1052L), decade = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 100L, 100L,100L), cell = c(14150L, 14321L, 14320L, 14489L, 14323L, 13977L,14492L, 14461L, 14122L, 14123L), row = c(128L, 129L, 129L, 130L,129L, 127L, 130L, 130L, 128L, 128L), column = c(137L, 137L, 136L,135L, 139L, 136L, 138L, 107L, 109L, 110L), biomass = c(0.724241,0.652821, 0.776811, 0.860563, 0.649643, 0.751143, 0.760428, 20.5968,33.6653, 15.1725)), .Names = c("fireID", "decade", "cell", "row","column", "biomass"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,12896L, 12897L, 12898L), class = "data.frame")
I want to match these two dataset by rows and cols. Actually, I did it with
aa <- merge.data.frame(Details, output1, by=c("cell","row"))
but the problem is I have many rows in output1 which has same coordinates. However I only want to get one coordinates for each row in my details output.
Any suggestions?
Thanks in advance.

If I understand the question correctly you need something like this:
aa <- aa[!duplicated(aa[c("row", "cell")]), ]
I am removing not unique combinations of row and cell because I would imagine that cell plays a role in your analysis since you use it in the merge. Otherwise:
aa <- aa[!duplicated(aa["row"]), ]

Related

ggpredict error: $ operator not defined for this S4 class

I have formulated a mixed linear regression model that I would like to plot.
I wanted to do so by generating predictive values with ggPredict from
ggiraphExtra .
My model is as follows:
lme9v <- lmer(Log(Visit) ~ Slope_degree*Path_dist * AS_sa + (1|Location), data= merged_dataset_05, REML=F)
*
note that Log(Visit) is actually notated with backtick escapes.
ggPredict(lme9v, interactive=TRUE)
However, I get the error:
Error: $ operator not defined for this S4 class
What causes the error to occur and what should I do differently?
This is my code:
structure(list(Station = structure(1:6, .Label = c("BL0102S",
"BL0102V", "BL01R", "BL01S", "BL0405-2R", "BL0405-2S", "BL0405R",
"BL0405S", "BL1112S", "BL1112V", "BL11R", "BL11S", "BL1314-2R",
"BL1314-2S", "BL1516S", "BL1516V", "D0405S", "D0405V", "DF0405S",
"DF0405V", "DF04S", "DF04V", "DF0708S", "DF0708V", "DF07R", "DF07S",
"DF0809R", "DF0809S", "DF12R", "DF12S", "DF14S", "DF14V", "FM06R",
"FM06S", "FM0708S", "FM0708V", "FM0910R", "FM0910S", "FM1415R",
"FM1415S", "TB0405S", "TB0405V", "TB12S", "TB12V", "WMW0102R",
"WMW0102S", "WMW0203S", "WMW0203V", "WMW02S", "WMW02V"), class = "factor"),
Geom = structure(c(2L, 1L, 1L, 2L, 1L, 2L), .Label = c("L",
"S"), class = "factor"), Slope_degree = c(24L, 5L, 0L, 23L,
5L, 35L), MDD = c(5, 5, 4.7, 4.7, 4.3, 5.3), CC = c(84L,
83L, 82L, 80L, 78L, 74L), CT = c(61L, 15L, 7L, 33L, 22L,
30L), Corrected_10m = c(10.9, 10, 10, 10.9, 10, 12.2), Av_litter_depth = c(2.89,
4, 7, 4.22, 3.83, 3.58), Content = c(20.34, 23.26, 23.23,
23.74, 17.5, 20.24), Slope_radian = c(0.41887902, 0.087266463,
0, 0.401425728, 0.087266463, 0.610865238), AB_a = c(0L, 0L,
0L, 0L, 1L, 0L), AB_sa = c(9L, 2L, 0L, 1L, 5L, 0L), AS_a = c(0L,
0L, 0L, 0L, 3L, 0L), AS_sa = c(2L, 1L, 0L, 0L, 2L, 0L), Days_deployed = c(15L,
15L, 14L, 14L, 14L, 14L), Count = c(23L, 29L, 9L, 20L, 85L,
43L), Tottime = c(295, 318, 66, 199, 1386, 745), Path_dist = c(659.4047198,
1021.11078342, 516.51545352, 997.8758996, 988.18342935, 957.66932416
), Count_rate = c(9.2, 11.6, 4.10334346504559, 9.11854103343465,
42.358803986711, 17.3854447439353), Time_use = c(118, 127.2,
30.0911854103343, 90.7294832826748, 690.697674418605, 301.212938005391
), `Log(Time)` = c(4.77068462446567, 4.84576065090602, 3.40423228535731,
4.50788236805538, 6.53770220909723, 5.70781744986838), `Log(Visit)` = c(2.21920348405499,
2.45100509811232, 1.4118021206671, 2.21030981688487, 3.7461762858377,
2.85563334718238), Location = c(1, 1, 2, 2, 3, 3)), row.names = c(NA,
-6L), groups = structure(list(Station = structure(1:6, .Label = c("BL0102S",
"BL0102V", "BL01R", "BL01S", "BL0405-2R", "BL0405-2S", "BL0405R",
"BL0405S", "BL1112S", "BL1112V", "BL11R", "BL11S", "BL1314-2R",
"BL1314-2S", "BL1516S", "BL1516V", "D0405S", "D0405V", "DF0405S",
"DF0405V", "DF04S", "DF04V", "DF0708S", "DF0708V", "DF07R", "DF07S",
"DF0809R", "DF0809S", "DF12R", "DF12S", "DF14S", "DF14V", "FM06R",
"FM06S", "FM0708S", "FM0708V", "FM0910R", "FM0910S", "FM1415R",
"FM1415S", "TB0405S", "TB0405V", "TB12S", "TB12V", "WMW0102R",
"WMW0102S", "WMW0203S", "WMW0203V", "WMW02S", "WMW02V"), class = "factor"),
Geom = structure(c(2L, 1L, 1L, 2L, 1L, 2L), .Label = c("L",
"S"), class = "factor"), Slope_degree = c(24L, 5L, 0L, 23L,
5L, 35L), MDD = c(5, 5, 4.7, 4.7, 4.3, 5.3), CC = c(84L,
83L, 82L, 80L, 78L, 74L), CT = c(61L, 15L, 7L, 33L, 22L,
30L), Corrected_10m = c(10.9, 10, 10, 10.9, 10, 12.2), Av_litter_depth = c(2.89,
4, 7, 4.22, 3.83, 3.58), Content = c(20.34, 23.26, 23.23,
23.74, 17.5, 20.24), Slope_radian = c(0.41887902, 0.087266463,
0, 0.401425728, 0.087266463, 0.610865238), AB_a = c(0L, 0L,
0L, 0L, 1L, 0L), AB_sa = c(9L, 2L, 0L, 1L, 5L, 0L), AS_a = c(0L,
0L, 0L, 0L, 3L, 0L), AS_sa = c(2L, 1L, 0L, 0L, 2L, 0L), .rows = list(
1L, 2L, 3L, 4L, 5L, 6L)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), na.action = structure(c(`31` = 31L,
`32` = 32L, `47` = 47L, `48` = 48L), class = "omit"))

Is it possible to extend the intervals of the x-axis in R?

I have two plots: a barplot, and a ggplot(geom_jitter bubbleplot). Ultimately, I am using a photo editing app to line up these two plots. As you can see, the intervals in the bottom of these two plots do not match up, which is my problem here. I would like to make it so I can just change the bottom x-axis of both plots to 400 (lowest common interval to cover x-axis of both plots). I do not want to change the data values, just the axis values.
Barplot Code
GYPCdomain <- read.csv(file.choose(), header=TRUE)
GYPCbarplot <- barplot(as.matrix(GYPCdomain), horiz=TRUE, xlab = "Length (Protein Domains Shown)",
col=c("azure", "plum1", "skyblue"),
legend = c("Cytoplasmic", "Helical Membrane", "Extracellular"))
sample data:
structure(list(GYPC = c(0L, 0L, 171L, 0L, 72L, 0L, 141L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L)), class = "data.frame", row.names = c(NA, -42L))
Bubbleplot Code
library(ggplot2)
library(scales)
data(GYPC, package="ggplot2")
GYPC <- read.csv(file.choose(), header = TRUE)
GYPCggplot <- ggplot(GYPC, aes(Position, log10(Frequency)))+
geom_jitter(aes(col=Geographical.Location, size =(p.value)))+
labs(subtitle="Frequency of Various Polymorphisms", title="GYPC Gene") +
labs(color = "Geographical Location") +
labs(size = "p-value") + labs(x = "Position of Polymorphism on GYPC Gene") +
scale_size_continuous(range=c(1,4.5), trans = "reverse") +
guides(size = guide_legend(reverse = TRUE))
sample data:
structure(list(Variant = structure(c(4L, 4L, 4L, 4L, 4L, 8L,
8L, 8L, 8L, 8L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 12L,
12L, 12L, 12L, 12L, 14L, 14L, 14L, 14L, 14L, 2L, 2L, 2L, 2L,
2L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
11L, 13L, 13L, 13L, 13L, 13L, 15L, 15L, 15L, 15L, 15L), .Label = c("rs111631066",
"rs114199197", "rs115178969", "rs115201071", "rs139780142", "rs139816143",
"rs143080607", "rs143216051", "rs199797395", "rs531807314", "rs545780841",
"rs551011574", "rs560942282", "rs567759380", "rs571586275"), class = "factor"),
Position = c(213L, 213L, 213L, 213L, 213L, 60L, 60L, 60L,
60L, 60L, 249L, 249L, 249L, 249L, 249L, 183L, 183L, 183L,
183L, 183L, 282L, 282L, 282L, 282L, 282L, 294L, 294L, 294L,
294L, 294L, 150L, 150L, 150L, 150L, 150L, 135L, 135L, 135L,
135L, 135L, 258L, 258L, 258L, 258L, 258L, 255L, 255L, 255L,
255L, 255L, 138L, 138L, 138L, 138L, 138L, 159L, 159L, 159L,
159L, 159L, 141L, 141L, 141L, 141L, 141L, 198L, 198L, 198L,
198L, 198L, 258L, 258L, 258L, 258L, 258L), Geographical.Location = structure(c(1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label = c("AFR",
"AMR", "EAS", "EUR", "SAS"), class = "factor"), Frequency = c(0.023,
0.001, 0, 0, 0, 0.017, 0.001, 0, 0, 0, 0.012, 0, 0, 0, 0,
0.002, 0.003, 0.002, 0.023, 0.016, 0.001, 0, 0, 0, 0, 0,
0, 0, 0, 0.004, 0, 0, 0, 0.001, 0, 0, 0, 0, 0, 0.001, 0,
0, 0.001, 0, 0, 0.001, 0, 0, 0, 0, 0, 0.001, 0, 0, 0, 0,
0, 0, 0, 0.002, 0, 0, 0.001, 0, 0, 0, 0, 0, 0, 0.001, 0,
0, 0.001, 0, 0), pre.p.value = c(6.32e-17, 0.113, 0.00126,
0.00126, 0.00211, 2.51e-12, 0.356, 0.00806, 0.00809, 0.0139,
4.86e-10, 0.15, 0.0542, 0.0542, 0.0537, 0.000376, 0.0778,
0.0068, 7.4e-06, 0.0109, 0.264, 1, 1, 1, 1, 0.579, 1, 0.589,
0.59, 0.00144, 1, 1, 1, 0.201, 1, 1, 1, 1, 1, 0.195, 1, 1,
0.201, 1, 1, 1, 1, 0.201, 1, 1, 1, 0.139, 1, 1, 1, 1, 1,
1, 1, 0.0381, 1, 1, 0.201, 1, 1, 1, 1, 1, 1, 0.195, 1, 1,
0.201, 1, 1), p.value = c(0, 0.75, 0.5, 0.5, 0.5, 0, 0.75,
0.5, 0.5, 0.75, 0, 0.75, 0.75, 0.75, 0.75, 0.5, 0.75, 0.5,
0.25, 0.75, 0.75, 1, 1, 1, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 0.75,
1, 1, 1, 1, 1, 0.75, 1, 1, 0.75, 1, 1, 1, 1, 0.75, 1, 1,
1, 0.75, 1, 1, 1, 1, 1, 1, 1, 0.75, 1, 1, 0.75, 1, 1, 1,
1, 1, 1, 0.75, 1, 1, 0.75, 1, 1), log.p.value. = c(-16.19928292,
-0.947, -2.899629455, -2.899629455, -2.675717545, -11.60032628,
-0.449, -2.093664958, -2.092051478, -1.8569852, -9.313363731,
-0.824, -1.266000713, -1.266000713, -1.270025714, -3.424812155,
-1.11, -2.167491087, -5.13076828, -1.962573502, -0.5783960731,
0, 0, 0, 0, -0.2373214363, 0, -0.2298847052, -0.2291479884,
-2.841637508, 0, 0, 0, -0.6968039426, 0, 0, 0, 0, 0, -0.7099653886,
0, 0, -0.6968039426, 0, 0, 0, 0, -0.6968039426, 0, 0, 0,
-0.857, 0, 0, 0, 0, 0, 0, 0, -1.419075024, 0, 0, -0.6968039426,
0, 0, 0, 0, 0, 0, -0.7099653886, 0, 0, -0.6968039426, 0,
0), X = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), range = structure(c(2L, 6L, 5L, 4L, 3L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "0 < p-value < 1E-9",
"1E-2 < p-value < 1", "1E-4 < p-value < 1E-2", "1E-6 < p-value < 1E-4",
"1E-9 < p-value < 1E-6"), class = "factor")), class = "data.frame", row.names = c(NA,
-75L))
I took the liberty to produce your barplot also with ggplot, because than we can use the awesome features of the cowplot package, which was made for things like these. Setting axis limits can be done with ylim() or xlim() but because of different width of the legends, we need the cowplot package to truly align the plots (or the legends would need to go below the plots)
#recreating the barplot
library(dplyr) #needed for data wrangling
GYPCbarplot_ggplot=GYPCdomain %>%
filter(GYPC>0) %>%
mutate(domain=factor(c("Cytoplasmic", "Helical Membrane", "Extracellular"),
levels=c("Cytoplasmic", "Helical Membrane", "Extracellular"),
ordered = T)) %>%
ggplot(aes(x=1,y=GYPC,fill=domain))+
geom_col(position="stack")+
scale_fill_manual(values=c("Cytoplasmic"="azure", "Helical Membrane"="plum1", "Extracellular"="skyblue"))+
coord_flip()+
xlab("GYPC")+
ylab( "Length (Protein Domains Shown)")+
ylim(0,400)+ #creates the limit
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
#the bubbleplot
GYPC_bubbleplot <- ggplot(GYPC_data, aes(Position, log10(Frequency)))+
geom_jitter(aes(col=Geographical.Location, size =(p.value)))+
labs(subtitle="Frequency of Various Polymorphisms", title="GYPC Gene") +
labs(color = "Geographical Location") +
labs(size = "p-value") + labs(x = "Position of Polymorphism on GYPC Gene") +
scale_size_continuous(range=c(1,4.5), trans = "reverse") +
guides(size = guide_legend(reverse = TRUE))+
xlim(0,400) #added this limit
library(cowplot) #used to arrange the two plots
plot_grid(GYPCbarplot_ggplot,GYPC_bubbleplot,
ncol = 1, #both plots in one column (below each other)
align = "v", #align both bottom axes
rel_heights = c(1,1.5) #make bottom plot a bit higher
)
et voila:
If I understand correctly, the OP is asking to synchronise the x-axes in order to show the protein domains a certain position on the GYPC gene belongs to.
If my assumption is correct then there is an alternative approach which fills the background of the bubble plot according to the protein domains:
library(dplyr)
domain_name <- c("Cytoplasmic", "Helical Membrane", "Extracellular")
domain_fill <- c("azure", "plum1", "skyblue")
names(domain_fill) <- domain_name
GPYCdomain_2 <- GYPCdomain %>%
filter(GYPC > 0) %>%
mutate(domain_name = forcats::fct_inorder(rev(domain_name)),
end_pos = cumsum(GYPC),
start_pos = lag(end_pos, default = 0L))
library(ggplot2)
ggplot(GYPC, aes(Position, log10(Frequency))) +
geom_rect(aes(xmin = start_pos, xmax = end_pos, ymin = -Inf, ymax = Inf, fill = domain_name),
data = GPYCdomain_2, inherit.aes = FALSE, alpha = 0.6) +
scale_fill_manual(values = domain_fill) +
geom_jitter(aes(color = Geographical.Location, size = (p.value))) +
labs(subtitle = "Frequency of Various Polymorphisms", title = "GYPC Gene") +
labs(color = "Geographical Location") +
labs(size = "p-value") +
labs(x = "Position of Polymorphism on GYPC Gene") +
labs(fill = "Protein Domain") +
scale_size_continuous(range = c(1, 4.5), trans = "reverse") +
guides(size = guide_legend(reverse = TRUE))

Scatter plot with small pie charts with R

I have this data below called test1.melted. I also have the code to plot my data using package scatterpie, but due to inherent problem of scatterpie (if coordinates are not cartesian,i.e. equal horizontal and vertical distances), you would not get properly formatted plot. Is there a better way to plot this data without using scatterpie?
Data:
test1.melted<-structure(list(Wet_lab_dilution_A = structure(c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L), .Label = c("A", "B", "C", "D", "E", "F",
"G", "H", "I", "J", "K", "L"), class = "factor"), TypeA = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("I", "II"), class = "factor"),
NA12878 = c(100L, 50L, 25L, 20L, 10L, 0L, 100L, 50L, 25L,
20L, 10L, 0L, 100L, 50L, 25L, 20L, 10L, 0L, 100L, 50L, 25L,
20L, 10L, 0L), NA12877 = c(0L, 50L, 75L, 80L, 90L, 100L,
0L, 50L, 75L, 80L, 90L, 100L, 0L, 50L, 75L, 80L, 90L, 100L,
0L, 50L, 75L, 80L, 90L, 100L), IBD = c(1.02, 0.619, 0.294,
0.244, 0.134, 0.003, 0.003, 0.697, 0.964, 0.978, 1, 1, 1.02,
0.619, 0.294, 0.244, 0.134, 0.003, 0.003, 0.697, 0.964, 0.978,
1, 1), variableA = c("tEst", "tEst", "tEst", "tEst", "tEst",
"tEst", "tEst", "tEst", "tEst", "tEst", "tEst", "tEst", "pair",
"pair", "pair", "pair", "pair", "pair", "pair", "pair", "pair",
"pair", "pair", "pair"), valueA = c(0.1, 59.8, 84.6, 89.2,
97.4, 100, 99.6, 56.4, 29.9, 24, 12.1, 0.1, 0.1, 51.08, 75.28,
80.09, 90.16, 100, 100, 48.09, 23.97, 18.81, 9.24, 0.08)), row.names = c(NA,
-24L), .Names = c("Wet_lab_dilution_A", "TypeA", "NA12878", "NA12877",
"IBD", "variableA", "valueA"), class = "data.frame")
code:
p<- ggplot() + geom_scatterpie(aes(x=valueA, y=IBD, group=TypeA), data=test1.melted,
cols=c("NA12878", "NA12877")) + coord_equal()+
facet_grid(TypeA~variableA)
p
Do you have to use a pie chart? (And you might; there's nothing wrong with them.)
Cause something like this could illustrate literally every variable in the dataset:
library(ggplot2)
test1.melted$NA12877 <- as.factor(test1.melted$NA12877)
test1.melted$NA12878 <- as.factor(test1.melted$NA12878)
p <- ggplot(data = test1.melted, aes(x=valueA, y=IBD, group=TypeA))
p <- p + geom_point(aes(colour=NA12877, fill = NA12878), stroke=3, size = 3, shape = 21)
p <- p + geom_text(aes(label = Wet_lab_dilution_A), size = 2)
p + facet_grid(TypeA ~ variableA) + theme_minimal()

For loop and unexpected results

I'm trying to write a for loop but I couldn't finish it. If I ran them out of for loop it works well but I didn't understand where is the problem.
output100 <- structure(list(row = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), col = c(17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L), cell = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), xcoord = c(783750L,
783750L, 783750L, 783750L, 783750L, 783750L, 783750L, 783750L,
783750L, 783750L, 783725L, 783725L, 783725L, 783725L, 783725L,
783725L, 783725L, 783725L, 783725L, 783725L), ycoord = c(187050L,
187050L, 187050L, 187050L, 187050L, 187050L, 187050L, 187050L,
187050L, 187050L, 187025L, 187025L, 187025L, 187025L, 187025L,
187025L, 187025L, 187025L, 187025L, 187025L), species = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("abiealba", "alnuviri", "larideci", "piceabie",
"pinucemb", "pinusilv", "popunigr", "poputrem", "salicapr", "sorbaucu"
), class = "factor"), age = c(590L, 250L, 230L, 210L, 200L, 190L,
180L, 110L, 100L, 90L, 720L, 320L, 300L, 230L, 170L, 160L, 150L,
140L, 130L, 80L), biomass = c(6.3836, 1.2988, 0.9683, 0.6574,
0.5083, 0.3398, 0.2163, 0.0863, 0.0591, 0.0418, 6.6135, 1.7666,
1.214, 0.7032, 0.3422, 0.2571, 0.1601, 0.0846, 0.0592, 0.0323
), stems = c(1L, 1L, 3L, 1L, 2L, 6L, 5L, 8L, 3L, 5L, 1L, 3L,
1L, 1L, 2L, 5L, 7L, 4L, 6L, 5L), slowGrowth = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), DBH = c(104.9563, 50.7341, 44.7226, 37.9815, 34.1311, 28.9447,
24.1329, 16.8379, 14.5727, 12.7875, 106.7731, 58.0343, 49.2757,
39.0663, 29.027, 25.8599, 21.4205, 16.7129, 14.5803, 11.6105),
height = c(45.999, 30.659, 28.1508, 25.0823, 23.1987, 20.5,
17.8196, 13.4049, 11.9423, 10.7572, 46.3418, 33.4408, 30.0693,
25.5954, 20.5444, 18.802, 16.2293, 13.3254, 11.9472, 9.96
), availableLight = c(0.8129, 0.4994, 0.3701, 0.2541, 0.217,
0.1588, 0.102, 0.075, 0.06, 0.0545, 0.8083, 0.4101, 0.2332,
0.196, 0.1694, 0.1347, 0.0941, 0.0702, 0.0602, 0.0519), light_rf = c(0.9832,
0.8951, 0.8029, 0.6577, 0.592, 0.463, 0.2972, 0.2003, 0.1407,
0.1174, 0.9826, 0.8371, 0.6213, 0.5487, 0.4885, 0.3973, 0.2696,
0.181, 0.1409, 0.1056), LeafArea = c(5.9777, 5.9777, 5.9777,
5.9777, 5.9777, 5.9777, 5.9777, 5.9777, 5.9777, 5.9777, 6.218,
6.218, 6.218, 6.218, 6.218, 6.218, 6.218, 6.218, 6.218, 6.218
), nitorgen_rf = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), droughtIndex = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), moisture_rf = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
degreeDay_rf = c(0.4405, 0.4405, 0.4405, 0.4405, 0.4405,
0.4405, 0.4405, 0.4405, 0.4405, 0.4405, 0.4405, 0.4405, 0.4405,
0.4405, 0.4405, 0.4405, 0.4405, 0.4405, 0.4405, 0.4405),
foliageWght = c(0.1471, 0.0473, 0.0389, 0.0301, 0.0255, 0.0197,
0.0149, 0.0085, 0.0068, 0.0055, 0.1511, 0.0584, 0.0452, 0.0315,
0.0198, 0.0165, 0.0123, 0.0084, 0.0068, 0.0047), twigWght = c(0.6236,
0.1251, 0.0929, 0.0627, 0.0483, 0.0455, 0.0674, 0.0488, 0.0376,
0.0286, 0.6462, 0.1708, 0.1169, 0.0672, 0.0448, 0.0639, 0.0655,
0.0482, 0.0376, 0.023), boleWght = c(5.6128, 1.1263, 0.8365,
0.5646, 0.4345, 0.2746, 0.134, 0.0291, 0.0148, 0.0077, 5.8161,
1.5374, 1.0519, 0.6045, 0.2776, 0.1766, 0.0823, 0.0281, 0.0149,
0.0045), deadFoliage = c(0.446, 0.446, 0.446, 0.446, 0.446,
0.446, 0.446, 0.446, 0.446, 0.446, 0.4278, 0.4278, 0.4278,
0.4278, 0.4278, 0.4278, 0.4278, 0.4278, 0.4278, 0.4278),
deadTwig = c(0.7874, 0.7874, 0.7874, 0.7874, 0.7874, 0.7874,
0.7874, 0.7874, 0.7874, 0.7874, 0.7322, 0.7322, 0.7322, 0.7322,
0.7322, 0.7322, 0.7322, 0.7322, 0.7322, 0.7322), deadbole = c(3.4762,
3.4762, 3.4762, 3.4762, 3.4762, 3.4762, 3.4762, 3.4762, 3.4762,
3.4762, 3.1449, 3.1449, 3.1449, 3.1449, 3.1449, 3.1449, 3.1449,
3.1449, 3.1449, 3.1449)), .Names = c("row", "col", "cell",
"xcoord", "ycoord", "species", "age", "biomass", "stems", "slowGrowth",
"DBH", "height", "availableLight", "light_rf", "LeafArea", "nitorgen_rf",
"droughtIndex", "moisture_rf", "degreeDay_rf", "foliageWght",
"twigWght", "boleWght", "deadFoliage", "deadTwig", "deadbole"
), row.names = c(NA, 20L), class = "data.frame")
Here is my code.
for (i in 0:1) {
t <- which(output100$cell == i)
a <-max(output100[c(t),8])
dom <- c(a, dom)
}
I want to get the maximum cell for the "t". Of course here it's just a small example (0:1), I have bigger dataset in real.
not sure what your problem is, your for loop runs for me. Did you forget to initialise dom?
dom = NULL
for (i in 0:1) {
t <- which(output100$cell == i)
a <-max(output100[c(t),8])
dom <- c(a, dom)
}
dom
## [1] 6.6135 6.3836
works for me? What answers are you expecting?
Incidentally this may not be the most efficient way to do this as you are growing the results vector in the for loop. If you had lots of unique cell values this would be slow. You could achieve similar by using dplyr which also would not require you to know how many different cell values there were:
library(dplyr)
output100 %>%
group_by(cell) %>%
summarise(max(biomass))
## # A tibble: 2 × 2
## cell `max(biomass)`
## <int> <dbl>
## 1 0 6.3836
## 2 1 6.6135

bar chart of constant height for factors in time series

I am a beginner to try R for making graphs. Please help me. I have data of multiple columns (time series). Each column holds factors (please see the one column example data below). I would like to make a constant height (say 1 unit) bar chart of the time series and would like to represent “A” and “B” in different colors with the DATE on the x axis. Any tip?
Thanking you in advance!
DATE GROUP
2011.06.18 00:00:00 R
2011.06.18 06:00:00 L
2011.06.18 12:00:00 R
2011.06.18 18:00:00 R
2011.06.19 00:00:00 L
2011.06.19 06:00:00 L
2011.06.19 12:00:00 R
2011.06.19 18:00:00 L
2011.06.20 00:00:00 L
2011.06.20 06:00:00 L
2011.06.20 12:00:00 R
2011.06.20 18:00:00 L
2011.06.21 00:00:00 R
2011.06.21 06:00:00 L
Assuming your data are in dat, but with an extra column:
dat <- structure(list(DATE = structure(list(sec = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), min = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L, 6L, 12L, 18L, 0L, 6L,
12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L), mday = c(18L, 18L, 18L,
18L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 21L, 21L), mon = c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), year = c(111L,
111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L,
111L, 111L), wday = c(6L, 6L, 6L, 6L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 2L, 2L), yday = c(168L, 168L, 168L, 168L, 169L, 169L,
169L, 169L, 170L, 170L, 170L, 170L, 171L, 171L), isdst = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"
), class = c("POSIXlt", "POSIXt")), GROUP = structure(c(2L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("L",
"R"), class = "factor"), GROUP2 = structure(c(1L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("L", "R"), class = "factor")), .Names = c("DATE",
"GROUP", "GROUP2"), row.names = c(NA, -14L), class = "data.frame")
Then I think this does what you want. First count the elements == to one of the classes
counts <- apply(dat[, 2:3], 1, function(x) sum(x == "R"))
then compute the other count and bind to a matrix:
countmat <- t(cbind(L = (NCOL(dat) - 1) - counts, R = counts))
then we plot using barplot()
op <- par(mar = c(9,4,4,2) + 0.1, las = 2)
mids <- barplot(countmat, ylim = c(0,2.5),
legend.text = c("L","R"),
args.legend = list(x = "top", bty = "n"))
axis(side = 1, at = mids, labels = as.character(dat$DATE))
par(op)
which produces:
See the help pages of the individual functions for explanations on the arguments.
Edit: If you just want to do this for an individual column, then this isn't the most interesting graph, but...
count2 <- with(dat, GROUP == "R")
countmat2 <- t(cbind(R = count2, L = !count2))
op <- par(mar = c(9,4,4,2) + 0.1, las = 2)
mids <- barplot(countmat2, ylim = c(0, 1.5),
legend.text = c("R","L"),
args.legend = list(x = "top", bty = "n"))
axis(side = 1, at = mids, labels = as.character(dat$DATE))
par(op)
which gives this figure:

Resources