I've a column in seconds that need to plot as "%H%M%S".
I've tried using lubridate pkg, but the column results in:
loadtime_dfs$avgPageLoadTime <- seconds_to_period(loadtime_df$avgPageLoadTime)
Formal class 'Period' [package "lubridate"]
that I can plot but doesn't show any format.
loadtime_df <- structure(list(date = structure(c(17766, 17767, 17768, 17769,
17770, 17771), class = "Date"), pagePath = c("/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151"
), pageviews = c(245L, 225L, 194L, 214L, 214L, 213L), pageLoadTime = c(18965L,
185834L, 31115L, 114561L, 88807L, 0L), avgPageLoadTime = c(6,
27, 16, 138, 144, 0), bouncerate = c(5.63380281690141, 3.48837209302326,
5.40540540540541, 7.01754385964912, 0, 5), mes = c("agosto",
"agosto", "agosto", "agosto", "agosto", "agosto")), .Names = c("date",
"pagePath", "pageviews", "pageLoadTime", "avgPageLoadTime", "bouncerate",
"mes"), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
This is what I need to plot:
ggplot(loadtime_df, aes(date,avgPageLoadTime)) +
geom_point() +
geom_smooth()
But with the Y axis with breaks: "00:01:00", "00:02:00", "00:03:00", "00:04:00", "00:05:00".
You are going to have to provide strings for ggplot to assign as labels.
If you read ?scale_y_continuous(labels=...), you'll see that labels= takes either waiver(), character, or a function. If you want to specify the specific locations and representations, then you'll want to specific both breaks= and labels=. However, typically you want ggplot2 to determine where to put the axis labels, so we'll provide a function that takes a value and returns a string.
I'm guessing there's a helper function somewhere to do this, but here's a base-R version. (The origin of the function is unimportant, since we can replace our function with another with likely the same outcome.)
This formatting function cheats a little by temporarily converting the seconds of avgPageLoadTime to POSIXct and then to a string. Doing it this way means it honors options("digits.secs"), if set.
fmt_hms <- function(x, digits.secs=NULL) {
if (!is.null(digits.secs)) {
oopts <- options(digits.secs = digits.secs)
on.exit(options(oopts), add=TRUE)
}
format(as.POSIXct(x, origin="1970-01-01 00:00:00"), format="%H:%M:%OS", tz="UTC")
}
In order to demonstrate this, I'll change one of the values of your data:
loadtime_df$avgPageLoadTime[3] <- loadtime_df$avgPageLoadTime[3] + 0.123456
fmt_hms(loadtime_df$avgPageLoadTime)
# [1] "00:00:06" "00:00:27" "00:00:16" "00:02:18" "00:02:24" "00:00:00"
fmt_hms(loadtime_df$avgPageLoadTime, digits.secs=3)
# [1] "00:00:06.000" "00:00:27.000" "00:00:16.123" "00:02:18.000" "00:02:24.000"
# [6] "00:00:00.000"
So we can just provide this function:
library(ggplot2)
ggplot(loadtime_df, aes(date,avgPageLoadTime)) +
geom_point() +
geom_smooth() +
scale_y_continuous(labels=fmt_hms)
I think you need to convert the date values in seconds to a %H%m%s format and then try plotting. I think you need one of the below approaches -
library(ggplot2)
library(lubridate)
# convert seconds to periods
td <- seconds_to_period(loadtime_df$avgPageLoadTime)
# then apply the required format
avgPageLoadTime_vector <- sprintf('%02d:%02d:%02d', td#hour, minute(td),
second(td))
# plotting using %H%m%s we can use them as y-axis ticks
# this will give you the same plot as above but Y-axis is fuzzy
ggplot(loadtime_df, aes(date,avgPageLoadTime)) +
geom_point() +
geom_smooth() +
scale_y_continuous(breaks = loadtime_df$avgPageLoadTime,
labels = avgPageLoadTime_vector)
# if you just want to plot with points and not use geom_smooth
# convert the column avgPageLoadTime into %H%m%s date-time format
loadtime_df$avgPageLoadTime <- avgPageLoadTime_vector
# this gives you the right Y-axis values but no smoothing
ggplot(loadtime_df, aes(date,avgPageLoadTime)) +
geom_point()
]2
Related
I'm trying to assign different colors to the scatterplot based on their dates, more specifically the year.
This is how my dataset looks like:
> dput(head(CORt_r100_stack_join_fspec,10))
structure(list(Date = structure(c(16779, 16834, 16884, 16924,
16973, 16997, 17031, 17184, 17214, 17254), class = "Date"), meanNDVIN_int = c(0.677501157246889,
0.632728796482024, 0.578636981692124, 0.547002029242488, 0.632635423362751,
NA, 0.699596252720458, 0.670059391804396, 0.643347941166436,
0.674034259709311), meanNDVIW_int = c(0.784142418592418, 0.652437451242156,
0.648319814752948, 0.593432266488189, 0.767890365415717, NA,
0.779249089832163, 0.71974944410843, 0.715777992826006, 0.685045115352089
), meanNDVIE_int = c(0.703614512017928, 0.701963337684803, 0.488628353756438,
0.631309466083632, 0.781589421376217, NA, 0.799663418920722,
0.78910564747191, 0.710962969930836, 0.715644011856453), meanNDVINr_int_f = c(0.677501157246889,
0.632728796482024, 0.578636981692124, 0.547002029242488, 0.632635423362751,
0.687343078509066, 0.699596252720458, 0.670059391804396, 0.643347941166436,
0.674034259709311), meanNDVIWr_int_f = c(0.784142418592418, 0.652437451242156,
0.648319814752948, 0.593432266488189, 0.767890365415717, 0.749505859407419,
0.779249089832163, 0.71974944410843, 0.715777992826006, 0.685045115352089
), meanNDVIEr_int_f = c(0.703614512017928, 0.701963337684803,
0.488628353756438, 0.631309466083632, 0.781589421376217, 0.625916155640988,
0.799663418920722, 0.78910564747191, 0.710962969930836, 0.715644011856453
), NDVI_N = c(0.17221248, 0.644239685, 0.57222623, 0.558666635,
0.51654034, 0.42053949, 0.396706695, 0.641767447, 0.641008268,
0.662841949), NDVI_W = c(0.08182944, 0.69112807, 0.637699375,
0.629429605, 0.658829525, 0.60621678, 0.57186129, 0.72636742,
0.724193596, 0.738424976), NDVI_E = c(0.17135712, 0.659222803,
0.58665977, 0.573081253, 0.533498035, 0.437643585, 0.412841468,
0.652057206, 0.651854988, 0.670345511), NDVI_U = c(0.40520304,
0.578414833, 0.455746833, 0.428289893, 0.208847548, 0, 0, 0.475193691,
0.478691084, 0.505043773)), row.names = c(NA, 10L), class = "data.frame")
I've been plotting meanNDVIN_int against NDVI_N using this code:
ggplot(CORt_r100_join_fspec_2NDVIday,aes(x=NDVI_N)) +
geom_point(aes(y=meanNDVIN_int), colour="red")
theme_bw()+
ylab("meanNDVIN_int")+
xlab("NDVI_N")
Now I want to color each point differently (no matter the color) based on their year, 2015, 2016, and 2017.
I've used the scale_color_manual function to introduce the dates but no success so far.
Any help will be much appreciated.
Here is an alternative where you substring the first 4 characters from Date in color
df
ggplot(df,aes(x=NDVI_N)) +
geom_point(aes(y=meanNDVIN_int, color=substring(Date,1,4))) +
labs(color="Year")+
theme_bw()+
ylab("meanNDVIN_int")+
xlab("NDVI_N")
I created a year variable with lubridate and stored it asfactor for discrete colouring. You were just missing moving color inside the aes() to color it by year.
# Add year Variable;
CORt_r100_stack_join_fspec <- CORt_r100_stack_join_fspec %>% mutate(
year = as.factor(lubridate::year(Date))
)
# Plot;
ggplot(CORt_r100_stack_join_fspec,aes(x=NDVI_N)) +
geom_point(aes(y=meanNDVIN_int, color = year)) +
theme_bw() +
ylab("meanNDVIN_int")+
xlab("NDVI_N")
Note: The data you provided, and named is not the same as in your plot-call. So I changed CORt_r100_join_fspec_2NDVIday to CORt_r100_join_fspec_2NDVIday to make the plot and mutate function properly.
I tried hours to figure out how I can make my loess line work. The problem is I do not know much (lets say near nothing). I only have to use R for one course in university.
I created a fake table the real table is for download here
I have to make a timeline plot that worked surprisingly well. But now I have to add two loess lines with different spans. My Problem is I don't know how the command really works. I mean I know it should be something like loess(..~.., data=..). The step where I'm stuck is marked with "WHAT BELONGS HERE" in the given code below.
table <- structure(list(
Months = c("1980-06", "1980-07", "1980-08", "1980-09",
"1980-10", "1980-11", "1980-12", "1981-01"),
Total = c(75000, 70000, 60000, 73000, 72000, 71000, 76000, 71000)),
.Names = c("Monts", "Total of Killed Pigs"),
row.names = c(NA, 4L), class = "data.frame")
ts.obj <- ts(table$`Total of Killed Pigs`, start = c(1980, 1), frequency = 2)
plot(ts.obj)
trend1 <- loess(# **WHAT BELONGS HERE?**, data = table, span =1)
predict1 <- predict(trend1)
lines(predict1, col ="blue")
That is my original code:
obj <- read.csv(file="PATH/monthly-total-number-of-pigs-sla.csv", header=TRUE, sep=",")
ts.obj <- ts(obj$Monthly.total.number.of.pigs.slaughtered.in.Victoria..Jan.1980...August.1995, start = c(1980, 1), frequency = 12)
plot(ts.obj)
trend1 <- loess (WHAT BELONGS HERE?, data = obj, span =1)
predict1 <- predict (trend1)
lines(predict1, col="blue")
We can do away with the data argument as the time series is univariate (just one variable).
The formula ts.obj ~ index(ts.obj) can be read as
value as a function of time
as ts.obj will give you the values, and index(ts.obj) will give you the time index for those values, and the tilde ~ specifies that the first is a function of, or dependent on, the other.
library(zoo) # for index()
plot(ts.obj)
trend1 <- loess(ts.obj ~ index(ts.obj), span=1)
trend2 <- loess(ts.obj ~ index(ts.obj), span=2)
trend3 <- loess(ts.obj ~ index(ts.obj), span=3)
pred <- sapply(list(trend1, trend2, trend3), predict)
matlines(index(ts.obj), pred, lty=1, col=c("blue", "red", "orange"))
zoo isn't strictly required. If you replace index(ts.obj) with as.numeric(time(ts.obj)) you should be fine, I think.
In case you were wanting to go with ggplot2:
library(ggplot2)
library(dplyr)
table <- structure(list(
Months = c("1980-06", "1980-07", "1980-08", "1980-09",
"1980-10", "1980-11", "1980-12", "1981-01"),
Total = c(75000, 70000, 60000, 73000, 72000, 71000, 76000, 71000)),
.Names = c("Months", "Total"),
row.names = c(NA, 8L), class = "data.frame")
Change to proper dates:
table <- table %>% mutate(Months = as.Date(paste0(Months,"-01")))
Plot:
ggplot(table, aes(x=Months, y=Total)) +
geom_line() +
geom_smooth(span=1, se= FALSE, color ="red") +
geom_smooth(span=2, se= FALSE, color ="green") +
geom_smooth(span=3, se= FALSE) +
theme_minimal()
I have a dataframe with dates. Here are the first 3 rows with dput:
df.cv <- structure(list(ds = structure(c(1448064000, 1448150400, 1448236800
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), y = c(10.4885204292416,
10.456538985014, 10.4264986311659), yhat = c(10.4851491194439,
10.282089547027, 10.4354960430083), yhat_lower = c(10.4169914076864,
10.2162549984153, 10.368531352493), yhat_upper = c(10.5506038959764,
10.3556867861042, 10.5093092789713), cutoff = structure(c(1447977600,
1447977600, 1447977600), class = c("POSIXct", "POSIXt"), tzone = "UTC")),.Names = c("ds",
"y", "yhat", "yhat_lower", "yhat_upper", "cutoff"), row.names = c(NA,
-3L), class = c("`enter code here`tbl_df", "tbl", "data.frame"))
I'm trying to plot the data with ggplot + geom_line from similar day/month combinations in one plot. So, for example, I want the y-value of 2016-01-01 to appear on the same x-value as 2017-01-01. If found a way to do this, but it seems to be a very complex workaround:
library(tidyverse)
library(lubridate)
p <- df.cv %>%
mutate(jaar = as.factor(year(ds))) %>%
mutate(x = as_date(as.POSIXct(
ifelse(jaar==2016, ds + years(1), ds),
origin = "1970-01-01")))
ggplot(p %>% filter(jaar!=2015), aes(x=x, group=jaar, color=jaar)) +
geom_line(aes(y=y))
It works, but as you can see I first have to extract the year, then use an ifelse to add one year to only the 2016 dates, convert with POSIXct because ifelse strips the class, convert back into POSIXct while supplying an origin, and finally remove the timestamp with as_date.
Isn't there a simpler, more elegant way to do this?
Use year<- to replace the year with any fixed leap year:
p <- df.cv %>%
mutate(jaar = as.factor(year(ds)),
x = `year<-`(as_date(ds), 2000))
ggplot(p, aes(x = x, y = y, color = jaar)) +
geom_line()
I have ts data that I draw with quarterly legend:
z <- as.zoo(my_data)
breaks <- seq(min(time(z)), max(time(z)), .25);
autoplot(z, geom="line",ylim=c(0,75)) + scale_x_yearqtr(breaks = breaks, format = "%yQ%q")
I'd like to draw a vertical line at a predetermined position (let's say at 1975-08-01).
The problem is, then I add "+ geom_vline", I get a really weird collapsed chart. Obviously, I have no true idea what I'm doing but I've tried this:
+ geom_vline(xintercept=as.Date("1975-08-01"))
+ geom_vline(xintercept=as.numeric(as.Date("1975-08-01")))
and as offsets (not sure how it goes):
+ geom_vline(xintercept=as.numeric(z[c(10,11)]))
+ geom_vline(xintercept=as.numeric(z[10]))
+ geom_vline(xintercept=3)
This is what happens (without geom_vline it is OK):
How can I put an offset ("draw vline at datapoint X") or data ("1975-08-01")?
What am I doing wrong?
Adding some data.
dput(z)
structure(c(NA, NA, NA, 56.0775, 58.53, 58.17, 61.5025, 57.71,
56.5075, 53.9375, 47.345, 48.6975, 53.15, 60.3125, 60.2, 65.1025,
63.445, 57.86, 62.1225, 62.19, 64.075, 71.7725, 69.565, 63.4575000000001,
59.2175, 53.8525, 53.4175, 50.1475, 50.9, 50.0675, 52.6925, 59.9325,
59.8625, 61.8375, 57.655, 50.23, 47.8775, 39.5475, 40.1375, 43.2075,
44.885, 48.115), index = structure(c(1974, 1974.08333333333,
1974.16666666667, 1974.25, 1974.33333333333, 1974.41666666667,
1974.5, 1974.58333333333, 1974.66666666667, 1974.75, 1974.83333333333,
1974.91666666667, 1975, 1975.08333333333, 1975.16666666667, 1975.25,
1975.33333333333, 1975.41666666667, 1975.5, 1975.58333333333,
1975.66666666667, 1975.75, 1975.83333333333, 1975.91666666667,
1976, 1976.08333333333, 1976.16666666667, 1976.25, 1976.33333333333,
1976.41666666667, 1976.5, 1976.58333333333, 1976.66666666667,
1976.75, 1976.83333333333, 1976.91666666667, 1977, 1977.08333333333,
1977.16666666667, 1977.25, 1977.33333333333, 1977.41666666667
), class = "yearmon"), frequency = 12, class = c("zooreg", "zoo"))
The index class of z has class `"yearmon"
class(index(z))
## [1] "yearmon"
so the xintercept= should be specified consistently, i.e. also as a "yearmon" object:
p <- autoplot(z, ylim=c(0,75)) +
scale_x_yearqtr(breaks = breaks, format = "%yQ%q")
p + geom_vline(xintercept = as.yearmon("1975-08"))
Any other valid specification of a "yearmon" object would work as well, e.g.
p + geom_vline(xintercept = as.yearmon(1975 + (8-1) / 12))
p + geom_vline(xintercept = as.yearmon(as.Date("1975-08-01")))
As bVa pointed out, I used dput to see the format of index.
As dates are stored in decimal, the solution is to use simple decimal value. 1975.67 for aug 1975.
geom_vline(xintercept=as.numeric(1975.67))
This is a follow up post to Draw min, max function in R
I would like to draw multiple functions with different parameters in one plot. But the group argument of ggplot does not seem to work.
Data:
# data preparation/ load
feTargetPv <- structure(list(ModelYear = 2012:2016,
ComplianceCategory = c("pv","pv", "pv", "pv", "pv"),
fetargetfix = c(30.7, 31.4, 32.1, 33.3, 34.7),
a = c("35.95", "36.8", "37.75", "39.24", "41.09"),
b = c("27.95", "28.46", "29.03", "29.9", "30.96"),
c = c("0.0005308", "0.0005308", "0.0005308", "0.0005308", "0.0005308"),
d = c("0.006057", "0.00541", "0.004725", "0.003719", "0.002573")),
.Names = c("ModelYear", "ComplianceCategory", "fetargetfix", "a", "b", "c", "d"),
row.names = c(47L, 49L, 51L, 53L, 55L), class = "data.frame")
Function:
my <- c(2012,2013, 2014)
eqs = function(x,my){
1/(pmin(pmax(as.numeric(feTargetPv[which(feTargetPv$ModelYear == my),"c"]) * x +
as.numeric(feTargetPv[which(feTargetPv$ModelYear == my),"d"]),
1/as.numeric(feTargetPv[which(feTargetPv$ModelYear == my),"a"])),
1/as.numeric(feTargetPv[which(feTargetPv$ModelYear == my),"b"])))
}
Ggplot:
ggplot(data.frame(x=seq(from = 30, to = 75, by = 1)), aes(x), group = my, col = my) +
stat_function(fun=eqs, args=my) + xlab("x") + ylab("y")
Error message
Warning message:
Computation failed in `stat_function()`:
unused arguments (2013, 2014)
How do I have to provide the my parameters to the stat_function()?
Rather than using stat_function, you could calculate the values per year via your function outside of ggplot and then use geom_line.
You could start by making a dataset of your x values for each year, using your my as your ModelYear variable.
dat = expand.grid(x=seq(from = 30, to = 75, by = 1), ModelYear = my)
Then merge your x values with the dataset that contains the function parameter values. By default this keeps only info for the first dataset, dat.
dat2 = merge(dat, feTargetPv, by = "ModelYear")
Now your function can take either the vectors of x, a, b, c, d or the dataset and the x variable.
eqs = function(data, x){
1/(pmin(pmax(as.numeric(data[,"c"]) * x +
as.numeric(data[,"d"]),
1/as.numeric(data[,"a"])),
1/as.numeric(data[,"b"])))
}
Add the values from the function as a column to the dataset. I call this new variable y.
dat2$y = eqs(dat2, dat2$x)
Now the plotting is straightforward, using y values to map the y position along with geom_line.
ggplot(dat2, aes(x, y, color = factor(ModelYear))) +
geom_line()