Dealing with uncommon date structures in ggplot2 - r

I have a data set and it contains the following variable for date.
dat$Leads_MONTH
[1] "10-Jan" "10-Feb" "10-Mar" "10-Apr" "10-May" "10-Jun" "10-Jul" "10-Aug" "10-Sep" "10-Oct" "10-Nov" "10-Dec" "11-Jan" "11-Feb" "11-Mar" "11-Apr"
[17] "11-May" "11-Jun" "11-Jul" "11-Aug" "11-Sep" "11-Oct" "11-Nov" "11-Dec" "12-Jan" "12-Feb" "12-Mar" "12-Apr" "12-May" "12-Jun" "12-Jul" "12-Aug"
[33] "12-Sep" "12-Oct" "12-Nov" "12-Dec" "13-Jan" "13-Feb" "13-Mar" "13-Apr" "13-May" "13-Jun" "13-Jul"
I want to plot this data on the x axis using ggplot2 but am having some issues with this task. Is there a way to get ggplot2 to format Leads_MONTH as a date format and then plot it using ggplot2.
ggplot(dat, aes(Leads_MONTH, LEADSforester)) +
geom_bar(stat="identity", fill="#336699") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
The above code produces a plot but the dates on the x axis are not in the write order.
I tried to set the variable as a date but am not having any luck.
> dat$Leads_MONTH <- as.Date(dat$Leads_MONTH)
Error in charToDate(x) :
character string is not in a standard unambiguous format

For the bar plot, you can manually convert Leads_MONTH to factor and specify the levels.
dat <- data.frame(Leads_MONTH = c(
"10-Jan", "10-Feb", "10-Mar", "10-Apr", "10-May", "10-Jun", "10-Jul", "10-Aug", "10-Sep", "10-Oct", "10-Nov", "10-Dec", "11-Jan", "11-Feb", "11-Mar", "11-Apr",
"11-May", "11-Jun", "11-Jul", "11-Aug", "11-Sep", "11-Oct", "11-Nov", "11-Dec", "12-Jan", "12-Feb", "12-Mar", "12-Apr", "12-May", "12-Jun", "12-Jul", "12-Aug",
"12-Sep", "12-Oct", "12-Nov", "12-Dec", "13-Jan", "13-Feb", "13-Mar", "13-Apr", "13-May", "13-Jun", "13-Jul"),
LEADSforester = runif(43))
library(ggplot2)
# Convert Leads_MONTH to factor and specify the levels
dat$Leads_MONTH <- factor(dat$Leads_MONTH, levels = dat$Leads_MONTH)
ggplot(dat, aes(Leads_MONTH, LEADSforester)) +
geom_bar(stat="identity", fill="#336699") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
If you want to convert into Date class, you need to add day and specify the argument format
as.Date(paste0(dat$Leads_MONTH, '-1'), format = '%y-%b-%d')

Try:
library(zoo)
as.yearmon(dat$Leads_MONTH, format="%y-%b")
To convert them to normal date objects wrap that in as.Date

Related

transform "mFilter" object (list of Time-Series) to plot with ggplot2

I'm working with the hpfilter from the mFilter package and I can't seem to find a simple way to convert the list of Time-Series objects by hpfilter to a format I can use with ggplot2. I realize I can take it all apart and put it back together, but I imagine there's some simple way I have overlooked? I tried the code suggested in the SO discussion R list to data frame. However I couldn't find a way to convert the list of Time-Series objects to a data.frame in any simple way. The final goal is to reproduce the default plot produced by the mFilter object (see below)
Here's some example code
# install.packages(c("mFilter"), dependencies = TRUE)
library(mFilter)
data(unemp)
unemp.hp <- hpfilter(unemp, type=c("lambda"), freq = 1606)
# str(unemp.hp)
class(unemp.hp)
# [1] "mFilter"
plot(unemp.hp)
Hit <Return> to see next plot:
Also, why am I asked to " Hit <Return>" to see the plot?
The plot function calls plot.mFilter which has parameter ask=interactive() and it is set as TRUE for interactive sessions,
you could disable this by ask=FALSE in call for plot
plot(unemp.hp,ask=FALSE)
Data:
library(mFilter)
library(ggplot2)
library(gridExtra)
# library(zoo)
data(unemp)
unemp.hp <- hpfilter(unemp, type=c("lambda"), freq = 1606)
# str(unemp.hp)
class(unemp.hp)
# [1] "mFilter"
plot(unemp.hp,ask=FALSE)
To check for slots of object unemp.hp
names(unemp.hp)
# [1] "cycle" "trend" "fmatrix" "title" "xname" "call" "type" "lambda" "method"
#[10] "x"
The relevant objects are x (the main unemp series) , trend and cycle. All three objects are of class ts, we first convert them to
data.frame using custom function and plot using ggplot and gridExtra (for grid.arrange)
objectList = list(unemp.hp$x,unemp.hp$trend,unemp.hp$cycle)
names(objectList) = c("unemp","trend","cycle")
sapply(objectList,class)
#unemp trend cycle
# "ts" "ts" "ts"
Conversion from ts to data.frame:
fn_ts_to_DF = function(x) {
DF = data.frame(date=zoo::as.Date(time(objectList[[x]])),tseries=as.matrix(objectList[[x]]))
colnames(DF)[2]=names(objectList)[x]
return(DF)
}
DFList=lapply(seq_along(objectList),fn_ts_to_DF)
names(DFList) = c("unemp","trend","cycle")
seriesTrend = merge(DFList$unemp,DFList$trend,by="date")
cycleSeries = DFList$cycle
Plots:
gSeries = ggplot(melt(seriesTrend,"date"),aes(x=date,y=value,color=variable)) + geom_line() +
ggtitle('Hodrick-Prescot Filter for unemp') +
theme(legend.title = element_blank(),legend.justification = c(0.1, 0.8), legend.position = c(0, 1),
legend.direction = "horizontal",legend.background = element_rect(fill="transparent",size=.5, linetype="dotted"))
gCycle = ggplot(cycleSeries,aes(x=date,y=cycle)) + geom_line(color="#619CFF") + ggtitle("Cyclical component (deviations from trend)")
gComb = grid.arrange(gSeries,gCycle,nrow=2)
I tried to use the prior answer, didn't worked for me.
I was getting the trend and cycle from a GDP quarterly series.
This data was a time series, so I did this, and worked for me:
list <- list(gdp_ln$x, gdp_ln$trend, gdp_ln$cycle)
names(list)=c("gdp","trend","cycle")
gdp<- data.frame((sapply(list,c)))
Data:
> dput(gdp_ln)
structure(c(16.0275785360442, 16.0477176062761, 16.0718936895007,
16.0899963371452, 16.0875707712141, 16.0981391378223, 16.0988601288276,
16.1110815092797, 16.1244321329861, 16.1384685077996, 16.1451472350838,
16.148178781735, 16.161163569502, 16.1418894206861, 16.1634877625667,
16.1965372621761, 16.2216815829736, 16.2387677536829, 16.249412380526,
16.2690521777631, 16.2812185880068, 16.2951024427095, 16.2964024092233,
16.3127733881018, 16.3233290487177, 16.3369922768377, 16.3486515031696,
16.3489275708763, 16.3451264371757, 16.3524856433069, 16.3666338513045,
16.3801691039135, 16.3959993202765, 16.4135937981601, 16.4321203154987,
16.4488104165345, 16.4344524213544, 16.4302554348621, 16.4240722287677,
16.425087582257, 16.4350803035092, 16.4507216431126, 16.4670532627455,
16.4985227751756, 16.5094864456079, 16.5352746165004, 16.5504689966469,
16.5594976247513, 16.5754312535087, 16.592641573353, 16.6003340665324,
16.6063100774853, 16.6163655606058, 16.6370227688187, 16.6564363783854,
16.6577160570216, 16.6543595214556, 16.6773721241902, 16.6911082706925,
16.6935398489076, 16.6956102943815, 16.6798673418354, 16.6772670544553,
16.6678707780266, 16.6606889172344, 16.6678398460835, 16.6668473810049,
16.676020524389, 16.6775934319312, 16.6882821147755, 16.6957985899994,
16.7032334217472, 16.6926036544774, 16.7027214366522, 16.7103625977254,
16.7105344224572, 16.7042504851486, 16.7063913529457, 16.7100598555556,
16.6960591147037, 16.686477079594, 16.5740423808036, 16.6181175035946
), .Tsp = c(2000, 2020.5, 4), class = "ts")

ggplot2 axis as time with 1 hour error [duplicate]

This question already has answers here:
Time series plot gets offset by 2 hours if scale_x_datetime is used
(2 answers)
Closed 6 years ago.
I have a question on formatting the x axis as time.
This is a sample of my data:
dput(x)
structure(list(Sample = c("BK01", "BK02", "BK03", "BK04", "BK05",
"BK06", "BK07", "BK08", "BK09", "BK10", "BK11", "BK12", "BK13",
"BK14", "BK15", "BK16", "BK17", "BK18", "BK19", "BK20", "BK21",
"BK22", "BK23", "BK24", "BK25", "BK26", "BK27", "BK28", "BK29",
"BK30", "BK31", "BK32", "BK33"), Breath.d13C = c(-25.62, -27.45,
-26.87, -25.21, -26.01, -24.33, -24.45, -23.73, -25.05, -26.11,
-27, -26.28, -24.62, -26.96, -24.55, -24.52, -21.24, -26.18,
-24.82, -26.12, -27.28, -26.5, -24.46, -22.83, -27.28, -25.55,
-27.12, -24.46, -23.07, -28.35, NA, -25.98, -26.64), Chms = structure(c(1470047400,
1470048300, 1470048300, 1470049200, 1470050100, 1470050100, 1470040200,
1470041100, 1470040200, 1470041100, 1470065400, 1470063600, 1470063600,
1470064500, 1470061800, 1470045600, 1470045600, 1470046500, 1470047400,
1470066300, 1470060000, 1470058200, 1470057300, 1470047400, 1470042000,
1470042000, 1470041100, 1470041100, 1470040200, 1470043800, NA,
1470060000, 1470039300), class = c("POSIXct", "POSIXt"), tzone = "")), class = "data.frame", row.names = c(NA,
-33L), .Names = c("Sample", "Breath.d13C", "Chms"))
I want to use ggplot2 to build a graph of Breath.d13C vs Chms (Collection Time).
library(ggplot2)
ggplot(x, aes(x=Chms,y=Breath.d13C)) +
geom_point() +
scale_y_continuous(name=expression(delta^13*C["Breath"]*" "("\u2030")),
limits=c(-30,-10),
breaks=seq(-30,-10,5),
labels=fmt_decimals(1)) +
scale_x_datetime(name="Collection Time",
labels = date_format("%H:00",tz="UTC"),
date_breaks = "1 hour") +
my_theme
This code gives me . However the times are off by an hour.
I can see this by checking the Chms column or by using the normal R plots
with this code:
plot(x$Chms,x$Breath.d13C,cex=0.8)
The two plots use the same data set, so I have no idea what's causing the error on ggplot2. I'd like to keep using it, though. Any ideas on what am I doing wrong?
Thank you in advance
You need to specify the time zone in scale_x_datetime.
The function date_format() is by default set to "UTC". Therefore, your labels are converted to UTC. To use the time zone e.g. I used "Europe/London" (to get your desired output), you can do the following in your ggplot code: labels = date_format("%H:%M", tz = "Europe/London")
But firstly in order to run your code I also had to define what you specified in your code as fmt_decimals So I used this function given by #joran:
fmt_dcimals <- function(decimals=0){
# return a function responpsible for formatting the
# axis labels with a given number of decimals
function(x) as.character(round(x,decimals))
}
So your code looks like this:
ggplot(x, aes(x=Chms,y=Breath.d13C)) +
geom_point() +
scale_y_continuous(name=expression(delta^13*C["Breath"]*" "("\u2030")),
limits=c(-30,-10),
breaks=seq(-30,-10,5),
labels=fmt_dcimals(1)) +
scale_x_datetime(name="Collection Time",
labels = date_format("%H:%M", tz = "Europe/London"),
date_breaks = "1 hour")
And output:
The problem lie in the time zone you select, i.e. UTC. You should choose the current time zone. The corrected code is as below
library(ggplot2)
ggplot(x, aes(x=Chms,y=Breath.d13C)) +
geom_point() +
scale_y_continuous(name=expression(delta^13*C["Breath"]*" "
("\u2030")),
limits=c(-30,-10),
breaks=seq(-30,-10,5)) +
scale_x_datetime(name="Collection Time",
labels = date_format("2016-08-01 %H:00",""),
date_breaks = "1 hour")
See the plot as belpw

Supressing Warnings in scale_x_datetime

This is not a duplicate since none of the methods in that putative duplicate apply here. None of them lead to the warning going away.
In fact I got an answer here from Konrad below - use suppressMessages. In the link that is asserted as a possible duplicate, they suggest suppressWarnings, which does not work.
After finally figuring out how to get R to use my timezone on the ggplot date axis correctly (found scale_x_datetime in a post here, before it was using my local timezone even though the data had the timezone set already), but it now complains with a warning:
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
This is annoying because I have to do this a lot, and don't want to get in the habit of ignore all warnings. How can I turn this off? I obviously have tried suppressWarnings (with and without print) and options(warn=-1).
R-Version is 3.1.3
ggplot2_1.0.1
scales_0.2.4
library(lubridate,quietly=T,warn.conflicts=T)
library(ggplot2,quietly=T,warn.conflicts=T)
library(scales,quietly=T,warn.conflicts=T)
sclip.time <- ymd_hms("2014-06-16 00:00:00",tz="US/Pacific")
eclip.time <- ymd_hms("2014-06-17 23:59:59",tz="US/Pacific")
sdata.time <- ymd_hms("2014-06-16 00:00:00",tz="US/Pacific")
edata.time <- ymd_hms("2014-06-17 23:59:59",tz="US/Pacific")
xdata <- seq(sdata.time,edata.time,length.out=100)
xfrac <- seq(0,4*3.1416,length.out=100)
ydata <- pmax(0.25,sin(xfrac))
ydata <- sin(xfrac)
ddf <- data.frame(x=xdata,y=ydata)
date_format_tz <- function(format = "%Y-%m-%d", tz = "UTC") {
function(x) format(x, format, tz=tz)
}
options(warn=-1)
suppressWarnings(
ggplot(ddf) +
geom_line(aes(x,y),col="blue") +
geom_vline(xintercept=as.numeric(sclip.time),color="darkred") +
geom_vline(xintercept=as.numeric(eclip.time),color="darkgreen") +
xlim(sclip.time,edata.time) +
scale_x_datetime( breaks = date_breaks("1 day"),
labels = date_format_tz("%Y-%m-%d %H:%M", tz="US/Pacific"))
)
You have to use the combination of suppressMessages and print as in the snippet below:
suppressMessages(print(
ggplot(ddf) +
geom_line(aes(x,y),col="blue") +
geom_vline(xintercept=as.numeric(sclip.time),color="darkred") +
geom_vline(xintercept=as.numeric(eclip.time),color="darkgreen") +
xlim(sclip.time,edata.time) +
scale_x_datetime( breaks = date_breaks("1 day"),
labels = date_format_tz("%Y-%m-%d %H:%M", tz="US/Pacific"))
))
A way you might get what you want is to use the "try" function, with the option silent=T :
try(silent=T, [R-script])
It is generally a bad idea to do it that way, because you become blind to the errors that might occur, but if you are really certain of what you're doing..
Actually, the message does point to a problem with your following code snippet:
... + xlim(sclip.time,edata.time) +
scale_x_datetime( breaks = date_breaks("1 day"),
labels = date_format_tz("%Y-%m-%d %H:%M", tz="US/Pacific"))
Already the first command will add a scale, and the second command will replace that scale. So the message tells you that the first command has no effect.
You should combine the two and add the limits to scale_x_datetime:
... +
scale_x_datetime(breaks = date_breaks("1 day"),
labels = date_format_tz("%Y-%m-%d %H:%M", tz="US/Pacific"),
limits = c(sclip.time,edata.time))

GTrendsR + ggplot2?

I want to generate a plot of interest over time using GTrendsR and ggplot2
The plot I want (generated with google trends) is this:
Any help will be much appreciated.
Thanks!
This is the best I was able to get:
library(ggplot2)
library(devtools)
library(GTrendsR)
usr = "my.email"
psw = "my.password"
ch = gConnect(usr, psw)
location = "all"
query = "MOOCs"
MOOCs_trends = gTrends(ch, geo = location, query = query)
MOOCs<-MOOCs_trends[[1]]
MOOCs$moocs<-as.numeric(as.character(MOOCs$moocs))
MOOCs$Week <- as.character(MOOCs$Week)
MOOCs$start <- as.Date(MOOCs$Week)
ggplot(MOOCs[MOOCs$moocs!=0,], aes(start, moocs)) +
geom_line(colour = "blue") +
ylab("Trends") + xlab("") + theme_bw()
I think that to match the graph generated by google I would need to aggregate the data to months instead of weeks... not sure how to do that yet
The object returned by gtrendsR is a list, of which the trend element in a data.frame that you would want to plot.
usr = "my.email"
psw = "my.password"
gconnect(usr, psw)
MOOCs_trends = gtrends('MOOCs')
MOOCsDF <- MOOCs_trends$trend
ggplot(data = MOOCsDF) + geom_line(aes(x=start, y=moocs))
This gives:
Now if you want to aggregate by month, I would suggest using the floor_date function from the lubridate package, in combination with dplyr (note that I am using the chain operator %>% which dplyr re-exports from the magrittr package).
usr = "my.email"
psw = "my.password"
gconnect(usr, psw)
MOOCs_trends = gtrends('MOOCs')
MOOCsDF <- MOOCs_trends
MOOCsDF$start <- floor_date(MOOCsDF$start, unit = 'month')
MOOCsDF %>%
group_by(start) %>%
summarise(moocs = sum(moocs)) %>%
ggplot() + geom_line(aes(x=start, y=moocs))
This gives:
Note 1: The query MOOCs was changed to moocs, by gtrendsR, this is reflected in the y variable that you're plotting.
Note 2: some of the cases of functions have changed (e.g. gtrendsR not GTrendsR), I am using current versions.
This will get you most of the way there. The plot doesn't look quite right, but that's more of a function of the data being a bit different. Here's the necessary conversions to numeric and to dates.
MOOCs<-MOOCs_trends[[1]]
library(ggplot2)
library(plyr)
## Convert to string
MOOCs$Week <- as.character(MOOCs$Week)
MOOCs$moocs <- as.numeric(MOOCs$moocs)
# split the string
MOOCs$start <- unlist(llply(strsplit(MOOCs$Week," - "), function(x) return(x[2])))
MOOCs$start <- as.POSIXlt(MOOCs$start)
ggplot(MOOCs,aes(x=start,y=moocs))+geom_point()+geom_path()
Google might do some smoothing, but this will plot the data you have.

creating heatmap with R with eye-tracker data

I have a table composed by the following data
frame,X,Y
which is the resulting data from several eye tracking analysis.
Now I would like to create a Heatmap using R, like the following
I tried several script found online, none of them gave me that result.
How can I do?
Here some sample data
Ignore the first two columns
task,visualization,frame,X,Y
1,b,1,383,221
1,b,1,632,356
1,b,1,947,663
1,b,1,546,206
1,b,1,488,272
1,b,1,578,752
1,b,1,415,261
1,b,1,693,158
1,b,1,684,528
1,b,1,592,67
1,b,1,393,180
1,b,1,1033,709
1,b,1,1080,739
1,b,1,711,523
1,b,1,1246,49
1,b,1,742,69
1,b,1,601,370
1,b,10,902,684
1,b,10,517,241
1,b,10,583,86
1,b,10,582,754
1,b,10,426,257
1,b,10,575,229
1,b,10,697,150
1,b,10,379,520
1,b,10,390,286
1,b,10,618,396
1,b,10,710,143
1,b,10,383,188
1,b,10,1026,713
1,b,10,1078,625
1,b,10,713,521
You can get this type of plot quite easily using stat_bin2d from ggplot2:
library(ggplot2)
ggplot(dat, aes(x = X, y = Y)) + stat_bin2d(bins = 10)
This does simple binning, as #RomanLustrik suggested you could also perform some kind of kernel smoothing. This can also be done using ggplot2:
ggplot(dat, aes(x = X, y = Y)) +
stat_density2d(geom = "tile", aes(fill = ..density..), contour = FALSE) +
geom_point()
Note that dat is the example data you gave, geting your data into a data.frame:
dat = read.table(textConnection("task,visualization,frame,X,Y
1,b,1,383,221
1,b,1,632,356
1,b,1,947,663
1,b,1,546,206
1,b,1,488,272
1,b,1,578,752
1,b,1,415,261
1,b,1,693,158
1,b,1,684,528
1,b,1,592,67
1,b,1,393,180
1,b,1,1033,709
1,b,1,1080,739
1,b,1,711,523
1,b,1,1246,49
1,b,1,742,69
1,b,1,601,370
1,b,10,902,684
1,b,10,517,241
1,b,10,583,86
1,b,10,582,754
1,b,10,426,257
1,b,10,575,229
1,b,10,697,150
1,b,10,379,520
1,b,10,390,286
1,b,10,618,396
1,b,10,710,143
1,b,10,383,188
1,b,10,1026,713
1,b,10,1078,625
1,b,10,713,521"), header = TRUE, sep = ",")

Resources