ggplot2: "geom_point requires the following missing aesthetics: x, y" - r

I am facing the problem that I am not able to specify the shape of the line symbols (without this specification the code works fine):
Below the data in reproducible format (it is the effects data put into a data frame):
structure(list(varL = c(0, 1e+07, 2e+07, 3e+07, 4e+07,
0, 1e+07, 2e+07, 3e+07, 4e+07, 0, 1e+07, 2e+07, 3e+07, 4e+07,
0, 1e+07, 2e+07, 3e+07, 4e+07, 0, 1e+07, 2e+07, 3e+07, 4e+07,
0, 1e+07, 2e+07, 3e+07, 4e+07, 0, 1e+07, 2e+07, 3e+07, 4e+07,
0, 1e+07, 2e+07, 3e+07, 4e+07), varP = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L), .Label = c("(0,0.1]", "(0.1,0.2]",
"(0.2,0.3]", "(0.3,0.4]", "(0.4,0.5]", "(0.5,0.6]", "(0.6,0.7]",
"(0.7,0.8]", NA), class = "factor"), fit = c(0.0496509727291671,
0.0889644199210129, 0.147763911240627, 0.228140612498209, 0.328558663864939,
0.0137066329240178, 0.0170188110490053, 0.0209924787528359, 0.0257246732663005,
0.0313187292462082, 0.0289376730565942, 0.0324367840687503, 0.036277818691311,
0.0404834466212193, 0.0450765434401318, 0.0377500587733006, 0.0506605267612627,
0.0668653640284829, 0.0868169793966305, 0.110912824327041, 0.0461062991171287,
0.0536136421990573, 0.0620580975149222, 0.071506100162885, 0.0820206662867591,
0.0271688764980807, 0.0310122602430318, 0.0352949603875076, 0.0400511628245002,
0.0453154762467586, 0.0593111130006543, 0.0777425439930874, 0.100226912943776,
0.127122712337706, 0.158670602546708, 0.02092268966042, 0.0481738946672621,
0.0984225581163725, 0.179214944179607, 0.292488347088707), se = c(0.0259513690928884,
0.0478802966619357, 0.0959400030912549, 0.146319368888539, 0.197248937550513,
0.033511891943933, 0.0649738808934063, 0.13283528902344, 0.203454843482363,
0.274713638499851, 0.0399137666412373, 0.0836182332502119, 0.170994872374127,
0.261409298049175, 0.352531889503407, 0.0128068165036135, 0.0265824058594164,
0.054035051049317, 0.0824833429902055, 0.111165505837411, 0.00821998219695643,
0.0204628357910751, 0.0416140898624852, 0.0632975717285407, 0.08510744963605,
0.0111710559049469, 0.0241847618850518, 0.0491238092261353, 0.0748967974373985,
0.100866484066391, 0.0158269724358688, 0.0376131484048352, 0.0769417704226139,
0.117330108518709, 0.157967414110193, 0.041410334660995, 0.0756439112597116,
0.154046905957391, 0.236093539915582, 0.318984533128398), lower = c(0.0446491361632188,
0.0747918828643712, 0.108580794230823, 0.151091116521613, 0.203128798877193,
0.0115654911703096, 0.0123209025069961, 0.0108946289492482, 0.00947592068351736,
0.00819339231583828, 0.0241414249675257, 0.022214689650845, 0.0165544754495112,
0.0119897952488384, 0.00852702139073778, 0.0357322013203882,
0.0454582184069632, 0.05419654291372, 0.0639689796722408, 0.0749947791208939,
0.0445700854939895, 0.0493806984121361, 0.0526928942349234, 0.0560610804290682,
0.0595674652939625, 0.0258256186623322, 0.0278406506196297, 0.0284298721928641,
0.0289213813577344, 0.0293941320590341, 0.0557369600349524, 0.0675693013920541,
0.0762061129774737, 0.0853339447169982, 0.0951745618665348, 0.0171631624694616,
0.0350640216006218, 0.0556341130925104, 0.0836247441564837, 0.120733474483254
), upper = c(0.0550901244832386, 0.10504537627394, 0.195437125694758,
0.323403092731095, 0.477154780078397, 0.0161814201791539, 0.0231702136215256,
0.0380881102321762, 0.0606970430990062, 0.0928632628051039, 0.0345006405904498,
0.046261189541596, 0.0720801941481896, 0.10883443823233, 0.1577758610065,
0.0398599758225597, 0.0563263318177106, 0.0817125182150079, 0.115272694473414,
0.157735685375171, 0.0476847568332783, 0.0581290329012679, 0.072673983374196,
0.0900244403895002, 0.110325327955545, 0.0285699305757627, 0.0344771441605397,
0.0434644025927379, 0.0544344789594513, 0.0675376260500437, 0.0630625316437756,
0.0890383062732732, 0.129352639664315, 0.181375571131999, 0.244994560270329,
0.0253570051034752, 0.06494141884307, 0.161413333266735, 0.324329403439929,
0.531510876579345)), .Names = c("varL", "varP",
"fit", "se", "lower", "upper"), row.names = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37",
"38", "39", "40"), class = "data.frame") -> effectdat
The following codes yields an error:
library(ggplot2)
ggplot(effectdat) + geom_line(aes(varL,fit,linetype=varP)) + theme_bw() + geom_point(aes(shape = varP))
Error: geom_point requires the following missing aesthetics: x, y
I read here ggplot2_Error: geom_point requires the following missing aesthetics: y to use the unlist function. However, this produces another error:
ggplot(unlist(effectdat)) + geom_line(aes(varL,fit,linetype=varP)) + theme_bw() + geom_point(aes(shape = varP))
Error: ggplot2 doesn't know how to deal with data of class numeric
Any ideas what is wrong? What surprises me is that the function without geom_point() seems to work fine.

No need to unlist the data.frame. Code below works:
ggplot(effectdat) + geom_line(aes(x = varL,y = fit,linetype=varP)) + theme_bw() + geom_point(aes(x = varL,y = fit, shape = varP))
Explanation: added the missing aesthetics as required by the function

Related

Issues with displaying data points on every frame of facet_wrap/facet_grid object

I'm trying to produce a plot with either facet_wrap or facet_grid (no preference at this time), but display a selection of data points on every frame within the facet_wrap/facet_grid object.
I read that you can simply remove the facetting variable from the data set you want included on every plot, but for whatever reason this doesn't seem to be working for me.
This is on Rstudio Version 1.1.453.
I found this code sample:
ggplot(mpg, aes(displ, hwy)) +
geom_point(data = transform(mpg, class = NULL), colour = "grey85") +
geom_point() +
facet_wrap(~class)
And pretty much copied it for my code below. The above code works fine, but for whatever reason in my implementation it returns an error message. Note I've tried setting both geom features to geom_point also with no luck.
ggplot(data = Total, aes(Total$Time, Total$Killing)) +
geom_jitter(data = transform(Total, Run = NULL), colour = "grey85") +
geom_point() +
facet_wrap(~Run)
Error: Aesthetics must be either length 1 or the same as the data (2700): x, y
This is the error message I've been encountering on attempting to run this code.
Ultimately my goal is to run the below code, but I simplified it a bit for the purposes of the question above.
ggplot(data = filter(Total, Cell_Line != "stDev"), aes(x= Time, y=Killing)) +
geom_line(data = filter(select(Total, -Run), Cell_Line == "Wild_Type"), aes(x = Time, y = filter(Total, Cell_Line == "Wild_Type")[,3])) +
geom_errorbar(aes(x = filter(Total, Cell_Line == "Wild_Type")[,2], ymax = filter(Total, Cell_Line == "Wild_Type")[,3] + filter(Total, Cell_Line == "stDev")[,3], ymin = filter(Total, Cell_Line == "Wild_Type")[,3] - filter(Total, Cell_Line == "stDev")[,3])) +
geom_point() +
facet_wrap(~Run)
And here's the result of dput(Total) trimmed down to the first 30 rows:
structure(list(Cell_Line = structure(c(5L, 12L, 13L, 1L, 2L,
3L, 4L, 6L, 7L, 8L, 9L, 10L, 11L, 15L, 14L, 5L, 12L, 13L, 1L,
2L, 3L, 4L, 6L, 7L, 8L, 9L, 10L, 11L, 15L, 14L), .Label = c("17",
"19", "20", "29", "3", "33", "38", "47", "49", "53", "55", "7",
"8", "stDev", "Wild_Type"), class = "factor"), Time = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("00",
"02", "04", "08", "12", "18", "24", "32", "40", "48", "56", "64",
"72", "80"), class = "factor"), Killing = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0704388, 0.2881066, -0.0132908,
0.04700991, 0.03049371, -0.02243472, 0.1513817, 0.129636, 0.09328508,
0.05876777, 0.1063291, 0.0357473, 0.1974026, 0.07732854, 0.07383331
)), row.names = c(NA, 30L), class = "data.frame")
Your call to transform has an error: you don't have a column named Run.
set.seed(1)
Total$Run <- sample(1:100, 30)
# this is your own code:
ggplot(data = Total, aes(Total$Time, Total$Killing)) +
geom_jitter(data = transform(Total, Run = NULL), colour = "grey85") +
geom_point() +
facet_wrap(~Run)
Which produces this plot:

Comparing interaction effect plots involving continuous variables from ggplot2 vs. base R using the effects package

I want to plot interaction effects on my data set. I cannot share the full data set due to confidentiality reasons, but I have added the output of the effect() function, which can be substituted into the plot functions to reproduce my results
The interaction effect is between two continuous variables, varP and varL. The model is a binomial probit glm. I am using the effects() package.
Calling base R plot() yields the following chart:
plot(effect("varL:varP", hx.x), multiline = TRUE)
Calling ggplot yields an error, because ggplot2 cannot deal with continuous variables (Error: A continuous variable can not be mapped to linetype). So I decided to use cut() to transform the continuous into a categorical variable and redo the regression. Calling ggplot now yields the following plot:
ggplot(data.frame(effect("varL:varP_range", hx.x1))) +
geom_line(aes(varL,fit,linetype=varP_range)) +
theme_bw() +
geom_point(aes(varL,fit, shape = varP_range), size = 4)
The effect() call results in the following data frame:
structure(list(varL = c(0, 4900000, 9800000, 1.5e+07, 2e+07,
0, 4900000, 9800000, 1.5e+07, 2e+07, 0, 4900000, 9800000, 1.5e+07,
2e+07, 0, 4900000, 9800000, 1.5e+07, 2e+07, 0, 4900000, 9800000,
1.5e+07, 2e+07, 0, 4900000, 9800000, 1.5e+07, 2e+07, 0, 4900000,
9800000, 1.5e+07, 2e+07, 0, 4900000, 9800000, 1.5e+07, 2e+07),
varP_range = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L,
8L, 8L), .Label = c("(0,0.1]", "(0.1,0.2]", "(0.2,0.3]",
"(0.3,0.4]", "(0.4,0.5]", "(0.5,0.6]", "(0.6,0.7]", "(0.7,0.8]",
NA), class = "factor"), fit = c(0.0493753018091432, 0.0674980435061065,
0.0903776022141578, 0.120475525568566, 0.155489425305936,
0.0137978311572348, 0.0146948327184384, 0.0156415523658014,
0.0167030042368772, 0.0177811545669692, 0.0283241226002015,
0.0320688611660079, 0.0362132772188478, 0.0410808356583781,
0.0462488565292175, 0.0376893470992477, 0.0434689290038736,
0.0499436899670642, 0.0576329205782917, 0.0658761148111993,
0.0461027343009466, 0.0491516310196363, 0.0523594626652533,
0.0559432088894986, 0.0595689358953948, 0.0271243286792884,
0.0288090021916312, 0.0305797680715237, 0.0325565879397662,
0.0345556259933517, 0.0585874566843392, 0.0695486360371181,
0.0820248510196267, 0.0970343941431089, 0.113280038210583,
0.0209075509863267, 0.0315251673839061, 0.046253681947139,
0.0674584535698224, 0.0942769896468577), se = c(0.026293040668674,
0.0305781859107515, 0.0540612604209606, 0.0833019722980397,
0.112487542810938, 0.0338534463222995, 0.0398771851800222,
0.0731653210576088, 0.113913900015802, 0.15437353282528,
0.0409108254231718, 0.0511475671899819, 0.0972355260310069,
0.152187250282481, 0.206403037601197, 0.0130040830190118,
0.0163131582968098, 0.03036212612247, 0.0472104967827754,
0.0638665398985353, 0.00832820196917705, 0.0121268883535081,
0.0232740805148284, 0.0361535949146859, 0.0487814483071773,
0.0113362224665359, 0.0147098330114556, 0.027542020584634,
0.0427882020481342, 0.057830481511906, 0.01601220706873,
0.0215340145342622, 0.040911260287911, 0.0636748713064177,
0.0860732173940343, 0.0419361989774607, 0.0474273262485864,
0.085507397635156, 0.132979171779259, 0.180309829530109),
lower = c(0.0443332267166309, 0.0600183728153565, 0.0743144400165266,
0.0907939516369112, 0.108668209187463, 0.0116238722892657,
0.012023490450893, 0.0108135425488573, 0.00937570172867644,
0.00809811278621186, 0.0235056239743921, 0.025500658500369,
0.0234608868503482, 0.0208470526400479, 0.0184481609874694,
0.0356438216686479, 0.0406013691901053, 0.0441057429680943,
0.0477084891588961, 0.0512971927819841, 0.0445466712620454,
0.0467806376034436, 0.0476567944606461, 0.048393808464277,
0.0490625016612481, 0.0257635103540086, 0.0269634191042765,
0.0270321095726769, 0.0269061995593561, 0.0267392100653416,
0.0550074959164803, 0.0640838876027606, 0.0705422656571929,
0.0773047979652944, 0.0841065921246788, 0.017106637503393,
0.025481438787035, 0.0321633667203674, 0.0395762352261805,
0.0476311065775831), upper = c(0.0548650233126245, 0.0756784493915628,
0.108885802402636, 0.156404820719125, 0.213976690522108,
0.0163138248731072, 0.0178616891841985, 0.0222128708020368,
0.0284614557616305, 0.0359832219741959, 0.0339378335208811,
0.039975201853988, 0.0541535055907615, 0.0749340625856128,
0.100655675600276, 0.0398297048294564, 0.0464977933470156,
0.0563819985882714, 0.0691131684577491, 0.0834778881744663,
0.0477021593328273, 0.0516176358055639, 0.0574233019439704,
0.0643924091986213, 0.0717620710757599, 0.0285446055381808,
0.0307584171780848, 0.0345046440925246, 0.0391516285886827,
0.0441582201173176, 0.062347826961093, 0.0753653653841884,
0.0948621519669373, 0.120230399782042, 0.149038245196167,
0.0254001895885684, 0.0387079476679479, 0.0649209421145592,
0.108535107340218, 0.16815843130232)), .Names = c("varL",
"varP_range", "fit", "se", "lower", "upper"), row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39", "40"), class = "data.frame")
My questions are:
1) How does effect() from the effects package transform the continuous into a categorical variable? How would this be replicated with ggplot()?
2) What is the reason that the lines in the base R plot all intersect at the same coordinate?
To answer your first question: To define the factors, Effects uses
nice(seq(min(var), max(var), length.out=5))
where nice is defined like so (from here):
nice <- function (x, direction = c("round", "down", "up"), lead.digits = 1) {
direction <- match.arg(direction)
if (length(x) > 1){
result <- sapply(x, nice, direction = direction, lead.digits = lead.digits)
if (anyDuplicated(result)) result <- nice(x, direction=direction, lead.digits = lead.digits + 1)
return(result)
}
if (x == 0)
return(0)
power.10 <- floor(log(abs(x), 10))
if (lead.digits > 1)
power.10 <- power.10 - lead.digits + 1
lead.digit <- switch(direction, round = round(abs(x)/10^power.10),
down = floor(abs(x)/10^power.10), up = ceiling(abs(x)/10^power.10))
sign(x) * lead.digit * 10^power.10
}
An example of using it:
library(effects)
set.seed(123)
x = rnorm(100)
z = rexp(100)
y = factor(sample(1:2, 100, replace=T))
test = glm(y~x+z+x*z, family = binomial(link = "probit"))
preddat <- matrix('', 25, 100)
preddat <- expand.grid(nice(seq(min(x), max(x), length.out=5)), nice(seq(min(z), max(z), length.out=5)))
colnames(preddat) <- c("x", "z")
predicts <- predict(test, preddat, type = "response")
dim(predicts) <- c(5,5)
effectspred <- pnorm(effect("x:z", test)$fit)
dim(effectspred) <- c(5,5)
all.equal(effectspred, predicts)
[1] TRUE
This is straight forward to use with ggplot:
library(tidyverse)
predicts %>% as.data.frame() %>%
gather() %>% ggplot() + geom_line(aes(x = rep(nice(seq(min(x), max(x), length.out=5)), 5), y = value, color=key))
And regarding question 2, an intuitive way to think about it might be that, since the mean of Yhat doesn't change (predicted with fixed z), all the standard normal CDFs of the partial effects share the same intersection. E.g.
sapply(seq(0, 5, length.out = 15), function(k) {
predict(test, data.frame(x = seq(-20, 20, length.out = 200), z = k), type = "response")}) %>%
as.data.frame() %>% gather() %>% ggplot() +
geom_line(aes(x = rep(seq(-20, 20, length.out = 200), 15), y = value, color = key))

Error in ggplot

I am trying to make a ggplot. When I had shape in aesthetics, the code was working just fine. However, I need to put shape in geom_point() because I'm trying to reproduce a figure. And when I added shape to geom_point() it gave me the following error:
Aesthetics must be either length 1 or the same as the data (6): shape
I've looked for other answers here but apparently, nothing seems to be working for me. Above I've provided with an image of what my data looks like. There are 17000 entries.
Below is my code:
summarised_data <-ddply(mammals,c('mammals$chr','mammals$Species','mammals$chrMark'),
function (x) c(median_rpkm = median(x$RPKM), median = median(x$dNdS)))
ggplot(summarised_data,aes(x = summarised_data$median_rpkm, y = summarised_data$median,
color = summarised_data$`mammals$Species`)) + geom_smooth(se = FALSE, method = "lm") +
geom_point(shape = summarised_data$`mammals$chrMark`) + xlab("median RPKM") + ylab("dNdS")
"ENSG00000213221", "ENSG00000213341", "ENSG00000213380", "ENSG00000213424",
"ENSG00000213533", "ENSG00000213551", "ENSG00000213619", "ENSG00000213626",
"ENSG00000213699", "ENSG00000213782", "ENSG00000213949", "ENSG00000214013",
"ENSG00000214338", "ENSG00000214357", "ENSG00000214367", "ENSG00000214517",
"ENSG00000214814", "ENSG00000215203", "ENSG00000215305", "ENSG00000215367",
"ENSG00000215440", "ENSG00000215897", "ENSG00000221947", "ENSG00000222011",
"ENSG00000224051", "ENSG00000225830", "ENSG00000225921", "ENSG00000239305",
"ENSG00000239474", "ENSG00000239900", "ENSG00000241058", "ENSG00000242247",
"ENSG00000242612", "ENSG00000243646", "ENSG00000244038", "ENSG00000244045"),
class = "factor"), Species = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Chimp", "Gori", "Human", "Maca",
"Mouse", "Oran"), class = "factor"), labs = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Chimp-A", "Chimp-X",
"Gori-A", "Gori-X", "Human-A", "Human-X", "Maca-A", "Maca-X",
"Mouse-A", "Mouse-X", "Oran-A", "Oran-X"), class = "factor"),
chrMark = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("A", "X"), class = "factor"), chr = structure(c(27L,
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L), .Label = c("1",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"2", "20", "21", "22", "2a", "2A", "2b", "2B", "3", "4",
"5", "6", "7", "8", "9", "X"), class = "factor"), dN = c(3.00669,
3.27182, 7.02044, 1.01784, 3.0363, 2.32786, 4.92959, 3.03753,
3.0776, 1.02147), dS = c(3.15631, 5.87147, 3.13716, 2.05438,
4.10205, 5.24764, 4.2014, 3.18086, 5.4942, 3.02169), dNdS = c(0.9525965447,
0.5572403504, 2.2378329444, 0.4954487485, 0.7401908802, 0.4436013141,
1.1733207978, 0.954939859, 0.5601543446, 0.3380459279), RPKM = c(31.6,
13.9, 26.3, 9.02, 11.3, 137, 242, 1.05, 59.4, 10.1), Tau = c(0.7113820598,
0.8391023102, 0.3185943152, 0.6887167806, 0.9120531859, 0.6254200542,
0.7165302682, 0.7257435312, 0.2586613298, 0.6493567251),
GC3 = c(0.615502, 0.622543, 0.393064, 0.490141, 0.461592,
0.626407, 0.490305, 0.482853, 0.346424, 0.466484)), .Names = c("gene",
"Species", "labs", "chrMark", "chr", "dN", "dS", "dNdS", "RPKM",
"Tau", "GC3"), row.names = c(NA, 10L), class = "data.frame")
There's a few things wrong with your code and how ggplot handles non-standard evaluation, I'd recommend reading a ggplot tutorial or the docs. Having a column called within summarised_data called 'mammals$species' and 'mammals$chrMark' is going to cause lots of problems.
If we change these to something more sensible...
names(summarised_data)[names(summarised_data) == "mammals$species"] <- "mammals_species"
names(summarised_data)[names(summarised_data) == "mammals$chrMark"] <- "mammals_chrMark"
We can make the ggplot code more friendly. Note that shape has to been within aes, as you're mapping it to your data.
ggplot(summarised_data, aes(x = median_rpkm, y = median)) +
geom_smooth(se = FALSE, method = "lm") +
geom_point(aes(shape = mammals_chrMark,
color = mammals_species)) +
xlab("median RPKM") + ylab("dNdS")
Hopefully this should work, or at least get you somewhere closer to an answer.

Stacked Area Graph Using R and ggplot2 Has Holes

I'm trying to create a stacked area graph with r and ggplot2. I'd like it to look
like this, but instead the areas overlap and have holes. I'm trying to ensure that the areas are stacked so that the area with the largest value in the most recent month (2016-05 in this case) are on the bottom.
Related posts like this one seem to have holes in the data, which doesn't seem to be the issue here.
Here's sample code to recreate the issue:
sample.data <- structure(
list(
rank = structure(
c(34L, 34L, 34L, 35L, 35L, 35L, 34L, 34L, 34L, 34L, 35L, 35L, 35L, 35L, 35L, 34L),
.Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35"),
class = "factor"),
vendor = structure(
c(1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L),
.Label = c("34", "35"),
class = "factor"),
year.month = c("2015-12", "2016-01", "2015-11", "2015-12", "2016-01", "2015-10", "2016-03", "2016-02", "2015-10", "2016-04", "2015-11", "2016-05", "2016-04", "2016-03", "2016-02", "2016-05"),
value = c(431616L, 272224L, 229288L, 195284L, 155168L, 154194L, 149784L, 137302L, 126612L, 117408L, 94141L, 56161L, 54606L, 53173L, 49898L, 45348L)),
.Names = c("rank", "vendor", "year.month", "value"),
row.names = c(6L, 8L, 4L, 5L, 7L, 1L, 12L, 10L, 2L, 14L, 3L, 15L, 13L, 11L, 9L, 16L),
class = "data.frame"
)
ggplot(data = sample.data, aes(x = year.month, y = value, group = vendor, color = vendor, reorder(-value), fill=vendor)) +
geom_area()
Thanks in advance for your help.
Try: + geom_area(position="dodge",stat="identity")
The following works:
ggplot(data = sample.data[order(sample.data$vendor),],
aes(x = year.month, y = value, group = vendor, color = vendor,
reorder(-value), fill=vendor)) + geom_area()
You just had to order your data: sample.data[order(sample.data$vendor),].
If you want to change the order of the graph, you have to "relevel" the vendor variable which is stored as a factor:
sample.data$vendor <- relevel(sample.data$vendor, ref="35")
Here is some code to figure out what vendor to set as the base level according to your criterion:
with(sample.data, sample.data[year.month=="2016-05",
"vendor"][which.max(sample.data[year.month=="2016-05", "value"])])

R - plot vertical profile

I have measurements of CH4 concentration with depth:
df <- structure(list(Depth = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 15L, 16L, 17L), .Label = c("0", "10",
"12", "14", "16", "18", "2", "20", "22", "24", "26", "28", "30",
"32", "4", "6", "8", "AR"), class = "factor"), Conc_CH4 = c(4.30769230769231,
23.1846153846154, 14.5615384615385, 21.1769230769231, 16.2615384615385,
132.007692307692, 5.86923076923077, 389.353846153846, 823.023076923077,
948.684615384615, 1436.56923076923, 1939.88461538462, 26.2769230769231,
27.5538461538462, 19.6461538461538)), .Names = c("Depth", "Conc_CH4"
), row.names = c(NA, -15L), class = "data.frame")
And I need to create a plot like this:
But I have some problems: the factors in my data are in the wrong order, and I don't know how to plot this kind of data using ggplot2.
Any ideas?
Here's a solution with base plotting functions (you reverse the limits of ylim):
df$Depth <- as.numeric(as.character(df$Depth))
df <- df[order(df$Depth),]
plot(Depth~Conc_CH4, df, t="l", ylim=rev(range(df$Depth)))
Why not convert Depth to a number and plot?
ggplot(transform(df, Depth=as.numeric(as.character(df$Depth))),
aes(x=Conc_CH4, y=Depth)) +
geom_line() + scale_y_reverse()
The as.numeric(as.character(...)) is because your Depth is a factor and calling as.numeric directly converts factors differently than character to string.
The scale_y_reverse reverses the y scale.
If your actual data has a depth of "AR" in it, you'll have to omit them or otherwise handle them.

Resources