I want to plot intervals of confidence of fitted values. I read some post related, but I am still stuck..These are a sample of the date:
pd <-structure(list(date = 1:5, obs = c(44.6651011845397, 62.3441339250369,
52.8968240161506, 51.7795930633478, 63.1284636561025), pred = c(47.2643891039645,
55.7996450577325, 52.9566469533233, 51.3393289316, 59.0011440099732)),
.Names = c("date", "obs", "pred"), row.names = c(NA, 5L), class = "data.frame")
pd2 <- structure(list(date = 1:5, lwr = c(44.8529592578518, 54.9926370476338,
51.7358955911624, 49.401869166722, 58.1674619447108), upr = c(49.6758189500772,
56.6066530678312, 54.1773983154842, 53.2767886964779, 59.8348260752356
)), .Names = c("date", "lwr", "upr"), row.names = c(NA, 5L), class = "data.frame")
dd <- melt(pd, id=c("date")) #Data
dd2 <- melt(pd2,id=c("date")) #Intervals of conf.
p <- ggplot(dd) + geom_line(aes(x=date, y=value, colour=variable))
p <- p + geom_smooth(aes(x=date, y=value, ymax=lwr, ymin=upr), #1 way
colour='grey', data=dd2, stat='identity')
Also tried...
# p+ geom_ribbon(data=dd2,aes(ymin=lwr,ymax=upr),alpha=0.3) #2.
I received the error:
Error in eval(expr, envir, enclos) : object 'lwr' not found ....what am I missing?
I also tried to do it without using melt ... but then I also had problems with the legend!
In the first way, the dd object you are using is not having the lwr/upr columns. So you cannot really plot them.
Can you do a:
dd<-merge(dd,pd2,by='date')
just after the melts and then:
p <- ggplot(dd) + geom_line(aes(x=date, y=value, colour=variable))
p + geom_ribbon(data=dd,aes(x=date, y=value, ymin=lwr,ymax=upr, group=variable),alpha=0.3)
Is this helping?
Related
A previous post detailed a solution to reversing the time on a ggplot post with this data:
MyData <-
structure(list(Date = structure(c(1492979809.99827, 1492602845.68722,
1493093428.90318, 1492605578.0691, 1492961342.65056, 1492771976.83545,
1493020588.88485, 1493057018.85104, 1492852011.23873, 1492855996.55059
), class = c("POSIXct", "POSIXt")), Value = c(4.52885504579172,
6.0024610790424, 8.96430060034618, 7.06435370026156, 5.08460514713079,
3.47828012891114, 6.29844291834161, 0.898315710946918, 1.44857675535604,
5.74641009094194)), .Names = c("Date", "Value"), row.names = c(NA,
-10L), class = "data.frame")
and this solution:
c_trans <- function(a, b, breaks = b$breaks, format = b$format) {
a <- as.trans(a)
b <- as.trans(b)
name <- paste(a$name, b$name, sep = "-")
trans <- function(x) a$trans(b$trans(x))
inv <- function(x) b$inverse(a$inverse(x))
trans_new(name, trans, inverse = inv, breaks = breaks, format=format)
}
rev_date <- c_trans("reverse", "time")
ggplot(MyData, aes(x=Value, y=Date)) +
geom_point() +
scale_y_continuous(trans = rev_date)
However, when I try and run this code now I get the following error:
Error: Invalid input: time_trans works with objects of class POSIXct only
I am running this code on R version 4.2.2 and cannot find a solution to fix this.
I tried forcing my data into POSIXct format, but the graphing still breaks in the same way.
You could slightly modify the function coord_x_datetime from the tidyquant package to reverse the datetime for your y-axis. Here is the function called coord_y_datetime:
coord_y_datetime <- function(xlim = NULL, ylim = NULL, expand = TRUE) {
if (!is.null(ylim)) {
ylim <- lubridate::as_datetime(ylim)
}
ggplot2::coord_cartesian(xlim = xlim, ylim = ylim, expand = expand)
}
Here is a reproducible example:
library(ggplot2)
ggplot(MyData, aes(x = Value, y = Date)) +
geom_point() +
coord_y_datetime(ylim = c(max(MyData$Date), min(MyData$Date)))
Created on 2022-11-14 with reprex v2.0.2
I want to create a forecasting plot via ggplot2. So, I have been trying to follow this example, however the desired output is not as intended, as I would like to have year on x-axis and AvgTMean on the y-axis`.
Purpose is the forecast the next 10 years.
How can I do this?
Data (AvgTMaxYear):
structure(list(year = 1980:2021, AvgTMean = c(24.2700686838937,
23.8852956598276, 25.094446596092, 24.1561175050287, 24.157183605977,
24.3047482638362, 24.7899738481466, 24.5756232655603, 24.5833086228592,
24.7344695534483, 25.3094451071121, 25.2100615173707, 24.3651692293534,
24.5423890611494, 25.2492166633908, 24.7005097837931, 24.2491591827443,
25.0912281781322, 25.0779264303305, 24.403294248319, 24.4983991453592,
24.4292324356466, 24.8179824927011, 24.7243948463075, 24.5086534543966,
24.2818632071983, 24.4567195220259, 24.8402224356034, 24.6574465515086,
24.5440715673563, 23.482670620977, 24.9979594684914, 24.5452453980747,
24.9271462811494, 24.7443215819253, 25.8929839790805, 25.1801908261063,
25.2079308058908, 25.0722425561207, 25.4554644289799, 25.4548979078736,
25.0756772250287)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-42L))
Method 1:
library(tidyverse)
library(forecast)
# Convert to timeseries object
AvgTMaxYearTS = ts(AvgTMaxYear)
# Plot
autoplot(AvgTMaxYearTS) + geom_forecast()
Output:
Method 2:
# Using fortify.ts
p = ggplot(aes(x = year, y = AvgTMean), data = AvgTMaxYear)
p = p + geom_line()
p + geom_forecast()
Error:
Error in FUN(X[[i]], ...) : object 'AvgTMean' not found
When I run your second method it runs like this:
library(ggplot2)
library(forecast)
p = ggplot(aes(x = year, y = AvgTMean), data = AvgTMaxYear)
p = p + geom_line()
p + geom_forecast()
Output:
I have a data frame like this:
dput(data)
structure(list(Anno = c(2015L, 2014L), Gennaio = c(381, 270.9
), Febbraio = c(355, 266.75), Marzo = c(352, 285.5), Aprile = c(323,
288), Maggio = c(296, 288), Giugno = c(307, 276.17), Luglio = c(340,
298.75), Agosto = c(335, 307.5), Settembre = c(304, 307.5), Ottobre = c(283,
342.5), Novembre = c(281, 401.25), Dicembre = c(274, 387.5)), .Names = c("Anno",
"Gennaio", "Febbraio", "Marzo", "Aprile", "Maggio", "Giugno",
"Luglio", "Agosto", "Settembre", "Ottobre", "Novembre", "Dicembre"
), class = "data.frame", row.names = c(NA, -2L))
I would like to plot this with multiple lines, gruped by Year (Anno). How can i do that? Something like this:
I tried this:
data.melted <- melt(data, id.vars="Anno", value.name="Mese", variable.name="Anno")
ggplot(data=data.melted, aes(x=Anno, y=Mese, group = Anno, colour = Anno)) +
+ geom_line() +
+ geom_point( size=4, shape=21, fill="white")
But i get this error:
Error in eval(expr, envir, enclos) : oggetto "Mese" non trovato
Try this:
library(reshape2)
library(ggplot2)
library(dplyr)
melt(data, "Anno", variable.name = "Mese") %>%
ggplot(aes(x=Mese, y = value, color = as.factor(Anno))) +
geom_point(size=4, shape=21, fill="white") +
geom_line(aes(group = Anno))
The piping using dplyr %>% operator is of course optional. You can save intermediate steps as you did in your code.
The result is the following plot:
I have a data frame is like this:
dput(xx)
structure(list(TimeStamp = structure(c(15705, 15706), class = "Date"),
Host = c("Host1", "Host2"), OS = structure(c(1L, 1L), .Label = "solaris", class = "factor"),
ID = structure(c(1L, 1L), .Label = "1234", class = "factor"),
Class = structure(c(1L, 1L), .Label = "Processor", class = "factor"),
Stat = structure(c(1L, 1L), .Label = "CPU", class = "factor"),
Instance = structure(c(1L, 1L), .Label = c("_Total", "CPU0",
"CPU1", "CPU10", "CPU11", "CPU12", "CPU13", "CPU14", "CPU15",
"CPU16", "CPU17", "CPU18", "CPU19", "CPU2", "CPU20", "CPU21",
"CPU22", "CPU23", "CPU3", "CPU4", "CPU5", "CPU6", "CPU7",
"CPU8", "CPU9"), class = "factor"), Average = c(4.39009345794392,
5.3152972972973), Min = c(3.35, -0.01), Max = c(5.15, 72.31
)), .Names = c("TimeStamp", "Host", "OS", "ID", "Class",
"Stat", "Instance", "Average", "Min", "Max"), row.names = c(NA,
-2L), class = "data.frame")
This data frame is huge and it has many Hosts. The challenge that I am having is that when a host like above does not have enough data points, the following ggplot fails, basically complaining about not having enough data points to draw the graph.
ggplot(xx, aes(TimeStamp, Max, group=Host, colour=Host)) + geom_point() + geom_smooth(mehtod="loess")
How can I check and see if a particular Host in this data frame has greater than 10 data points, if yes use method="loess".
if the number of data points for a Host is less than 10, use method="lm"
Yes, it was tricky to find, but it seems to be possible,
# for reproducibility
set.seed(42)
# The idea is to first split the data to < 10 and >= 10 points
# I use data.table for that
require(data.table)
dt <- data.frame(Host = rep(paste("Host", 1:10, sep=""), sample(1:20, 10)),
stringsAsFactors = FALSE)
dt <- transform(dt, x=sample(1:nrow(dt)), y = 15*(1:nrow(dt)))
dt <- data.table(dt, key="Host")
dt1 <- dt[, .SD[.N >= 10], by = Host]
dt2 <- dt[, .SD[.N < 10], by = Host]
# on to plotting now
require(ggplot2)
# Now, dt1 has all Hosts with >= 10 observations and dt2 the other way round
# plot now for dt1
p <- ggplot(data=dt1, aes(x = x, y = y, group = Host)) + geom_line() +
geom_smooth(method="loess", se=T)
# plot geom_line for dt2 by telling the data and aes
# The TRICKY part: add geom_smooth by telling data=dt2
p <- p + geom_line(data = dt2, aes(x=x, y=y, group = Host)) +
geom_smooth(data = dt2, method="lm", se=T)
p
(This is an ugly example. But it gives you the idea).
Adding to Arun's excellent answer, I think you simply need to visually distinguish e.g. use solid-line for loess, dotted-line for lm:
p <- ggplot(data=dt1, aes(x = x, y = y, group = Host)) + geom_line() +
geom_smooth(method='loess', linetype='solid', se=T)
p <- p + geom_line(data = dt2, aes(x=x, y=y, group = Host)) +
geom_smooth(data = dt2, method='lm', linetype='dashed', se=T)
The warning messages can be prevented by duplicating the data points and setting the span parameter of the geom_smooth function. For example:
data <- rbind(dt1, dt2)
p <- ggplot(data=dt1, aes(x = x, y = y, group = Host)) + geom_line() +
geom_smooth(method='loess', span = 1.4, se=T)
In case the warnings remain, you can try different values of span parameter.
If I have a nested factor, in this case I have multiple "Family" levels that are contained in the factor "Order", I would like to potentially create a
facet_grid(Family / Order ~.)
instead of the current
facet_grid(Family + Order ~.)
Basically -- ONE strip for every Order -- that contains next to it all strips for each family inside that Order. I know that facet_grid(Family / Order ~.) is currently not possible, but how would I achieve this effect? Could it be done with a theme()? Thank you so much. --SB
I should have specified above that both Family and Order are factors. The data values B are by Species which have a Family level and Order level they belong to. Here is the code for my plot:
p <- ggplot(models, aes(B,Species)) + geom_point() + facet_grid(Family + Order ~
.,scales="free",space="free")
Here is some sample data:
structure(list(Species = c("Acanthocyclops robustus", "Acroperus harpae",
"Alona affinis", "Ascaphus truei", "Bosmina longirostris"), Intercept = c(-36.1182388331068,
-27.2140776216155, -25.7920464721491, -39.2233884219763, -31.4301301084581
), B = c(0.919397836908493, 0.716601987210452, 0.685455190113372,
1.04159758611351, 0.81077051300147), Bconf = c(0.407917065756464,
0.181611850119198, 0.254101713856315, 0.708582768458448, 0.234313394549538
), Order = c("Cyclopoida", "Diplostraca", "Diplostraca", "Anura",
"Diplostraca"), Family = c("Cyclopidae", "Chydoridae", "Chydoridae",
"Leiopelmatidae", "Bosminidae")), .Names = c("Species", "Intercept",
"B", "Bconf", "Order", "Family"), row.names = c(NA, 5L), class = "data.frame")
Using facet_grid or facet_wrap will not build the graphic you are trying to build. You can, however, build a list of graphics and then plot them via gridExtra::grid.arrange. Here is an example
library(ggplot2)
library(gridExtra)
library(dplyr)
dat <-
structure(list(Species = c("Acanthocyclops robustus", "Acroperus harpae",
"Alona affinis", "Ascaphus truei", "Bosmina longirostris"), Intercept = c(-36.1182388331068,
-27.2140776216155, -25.7920464721491, -39.2233884219763, -31.4301301084581
), B = c(0.919397836908493, 0.716601987210452, 0.685455190113372,
1.04159758611351, 0.81077051300147), Bconf = c(0.407917065756464,
0.181611850119198, 0.254101713856315, 0.708582768458448, 0.234313394549538
), Order = c("Cyclopoida", "Diplostraca", "Diplostraca", "Anura",
"Diplostraca"), Family = c("Cyclopidae", "Chydoridae", "Chydoridae",
"Leiopelmatidae", "Bosminidae")), .Names = c("Species", "Intercept",
"B", "Bconf", "Order", "Family"), row.names = c(NA, 5L), class = "data.frame")
dat
# A ggplot object with NO data. Omit the order from the facet_grid call
g <-
ggplot() +
aes(Species, B) +
geom_point() +
facet_grid(. ~ Family,
scales = "free", space = "free") +
ylim(range(dat$B)) +
xlab("")
# Build a seperate graphic for each Order and title
plots <-
lapply(unique(dat$Order), function(o) {
g %+% dplyr::filter_(dat, ~ Order == o) + ggtitle(o)
})
# build as Grobs and plot via gridExtra::grid.arrange
plots %>%
lapply(ggplotGrob) %>%
arrangeGrob(grobs = .) %>%
grid.arrange(., ncol = 1)
Here's a simple solution: add a variable foo to your data that collapses the levels of the inner factor such that interaction(foo, outer) has the same level sets as inner. I know I'm missing some labels, so if someone can figure out a quick way to fill in the labels I'll edit it into my answer.
library(ggplot2)
library(gridExtra)
library(dplyr)
dat <-
structure(list(Species = c("Acanthocyclops robustus", "Acroperus harpae",
"Alona affinis", "Ascaphus truei", "Bosmina longirostris"),
Intercept = c(-36.1182388331068, -27.2140776216155, -25.7920464721491,
-39.2233884219763, -31.4301301084581),
B = c(0.919397836908493, 0.716601987210452, 0.685455190113372,
1.04159758611351, 0.81077051300147),
Bconf = c(0.407917065756464,
0.181611850119198, 0.254101713856315, 0.708582768458448, 0.234313394549538
),
Order = c("Cyclopoida", "Diplostraca", "Diplostraca", "Anura",
"Diplostraca"),
Family = c("Cyclopidae", "Chydoridae", "Chydoridae",
"Leiopelmatidae", "Bosminidae")),
.Names = c("Species", "Intercept",
"B", "Bconf", "Order", "Family"), row.names = c(NA, 5L), class = "data.frame")
replace_with_int_rank = function (x) as.numeric(as.factor(x))
collapse_nested_factor = function( inner, outer ){
ave(as.character(inner), outer, FUN = replace_with_int_rank )
}
dat$Family_collapsed = collapse_nested_factor(inner = dat$Family, dat$Order)
p <- ggplot(dat) + geom_point(aes(B,Species)) + facet_grid(Order~Family_collapsed, scales = "free")