ggplot2 with time series (week and year) overlapping x labels [duplicate] - r

I'm having a very, very tough time getting the x-axis to look correct for my graphs.
Here is my data (generated via dput()):
df <- structure(list(Month = structure(1:12, .Label = c("2011-07-31", "2011-08-31", "2011-09-30", "2011-10-31", "2011-11-30", "2011-12-31", "2012-01-31", "2012-02-29", "2012-03-31", "2012-04-30", "2012-05-31", "2012-06-30"), class = "factor"), AvgVisits = c(6.98655104580674,7.66045407330464, 7.69761337479304, 7.54387561322994, 7.24483848458728, 6.32001400498928, 6.66794871794872, 7.207780853854, 7.60281201431308, 6.70113837397123, 6.57634103019538, 6.75321935568936)), .Names = c("Month","AvgVisits"), row.names = c(NA, -12L), class = "data.frame")
Here is the chart I am trying to graph:
ggplot(df, aes(x = Month, y = AvgVisits)) +
geom_bar() +
theme_bw() +
labs(x = "Month", y = "Average Visits per User")
That chart works fine - but, if I want to adjust the formatting of the date, I believe I should add this:
scale_x_date(labels = date_format("%m-%Y"))
I'm trying to make it so the date labels are 'MMM-YYYY'
ggplot(df, aes(x = Month, y = AvgVisits)) +
geom_bar() +
theme_bw() +
labs(x = "Month", y = "Average Visits per User") +
scale_x_date(labels = date_format("%m-%Y"))
When I plot that, I continue to get this error:
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Despite hours of research on formatting of geom_line and geom_bar, I can't fix it. Can anyone explain what I'm doing wrong?
Edit: As a follow-up thought: Can you use date as a factor, or should you use as.Date on a date column?

To show months as Jan 2017 Feb 2017 etc:
scale_x_date(date_breaks = "1 month", date_labels = "%b %Y")
Angle the dates if they take up too much space:
theme(axis.text.x=element_text(angle=60, hjust=1))

Can you use date as a factor?
Yes, but you probably shouldn't.
...or should you use as.Date on a date column?
Yes.
Which leads us to this:
library(scales)
df$Month <- as.Date(df$Month)
ggplot(df, aes(x = Month, y = AvgVisits)) +
geom_bar(stat = "identity") +
theme_bw() +
labs(x = "Month", y = "Average Visits per User") +
scale_x_date(labels = date_format("%m-%Y"))
in which I've added stat = "identity" to your geom_bar call.
In addition, the message about the binwidth wasn't an error. An error will actually say "Error" in it, and similarly a warning will always say "Warning" in it. Otherwise it's just a message.

Related

How to plot time in the x-axis? [duplicate]

I have tried to read through stackoverflow, blogs, books etc but have been unable to find the answer on plotting time in the x-axis in the following format(HH:MM:SS.000) in R and another quantity on the y-axis. I have the following dataset:
Time EcNo
12:54:09.000 -14.47
12:54:10.000 -17.96
12:54:11.000 -15.97
12:54:12.000 -14.61
12:54:13.000 -12.68
12:54:14.000 -10.73
12:54:15.000 -10.54
12:54:16.000 -11.62
12:54:17.000 -12.49
12:54:18.000 -11.12
How would I plot EcNo on Yaxis vs Time(x axis) in the format HH:MM:SS.000 as shown above.
I honestly would appreciate some help.
many thanks
You may also try ggplot:
library(ggplot2)
df$time <- as.POSIXct(strptime(df$Time, format="%H:%M:%S"))
# Automatic scale selection
ggplot(data = df, aes(x = time, y = EcNo)) + geom_point()
scale_x_datetime is a ggplot function, but for the nice arguments date_breaks, and date_format you need package scales:
library(scales)
ggplot(data = df, aes(x = time, y = EcNo)) + geom_point() +
scale_x_datetime(breaks = date_breaks("1 sec"), labels = date_format("%S"))
ggplot(data = df, aes(x = time, y = EcNo)) + geom_point() +
scale_x_datetime(breaks = date_breaks("1 sec"), labels = date_format("%OS3"))
ggplot(data = df, aes(x = time, y = EcNo)) + geom_point() +
scale_x_datetime(breaks = date_breaks("4 sec"), labels = date_format("%M:%S"))
plot(strptime(dta$Time, format="%H:%M:%S"), dta$EcNo, xaxt="n")
axis(1, at=as.numeric(strptime(dta$Time, format="%H:%M:%S")),
labels=strftime( strptime(dta$Time, format="%H:%M:%S"),format="%H:%M:%S"))
df <- data.frame(
Time=c('12:54:09.000','12:54:10.000','12:54:11.000','12:54:12.000','12:54:13.000','12:54:14.000','12:54:15.000','12:54:16.000','12:54:17.000','12:54:18.000'),
EcNo=c(-14.47,-17.96,-15.97,-14.61,-12.68,-10.73,-10.54,-11.62,-12.49,-11.12)
)
op <- options(digits.secs=3)
plot(as.POSIXct(df$Time,format="%H:%M:%OS"),df$EcNo,xaxt="n")
axis.POSIXct(1, as.POSIXct(df$Time,format="%H:%M:%OS"), format="%H:%M:%OS")

How to order geom_segment ggplot with colour

I am new to ggplot library. And trying to draw the plot using the following data.frame:
library(tidyverse)
df <-tribble(~event, ~startdate,~enddate,~loc,
"A",as.POSIXct("1984/02/10"),as.POSIXct("1987/06/10"),"1",
"B",as.POSIXct("1984/02/11"),as.POSIXct("1990/02/12"),"2",
"A",as.POSIXct("1992/05/15"),as.POSIXct("1999/06/15"),"3",
"C",as.POSIXct("2003/08/29"),as.POSIXct("2015/08/29"),"4",
"B",as.POSIXct("2002/04/11"),as.POSIXct("2012/04/12"),"5",
"E",as.POSIXct("2000/02/10"),as.POSIXct("2005/02/15"),"6")
max_date = max(df$startdate,df$enddate)
Using the following code snippet:
ggplot(NULL)+
geom_segment(data = df,aes(x=loc, xend =loc,y = startdate, yend = enddate,colour=event),size = 5,alpha=0.6) +
geom_label(aes(label=df$event,x = df$loc,y=max_date), size=2) +
#geom_point(data=final_df,aes(x=newspaper,y=date),color="black") + Point from other data frame
coord_flip() + xlab("LoC") + ylab("Year")
I can able to output the following chart:
How can I order the above chart using the colour i.e. using the event field (in other word how can I perform group by operation on the event field so that first it should display first all events A then events B, C etc.)? I have tried to use scale_x_continuous and reorder from tidyverse package but it didn't work. How can I display more "Year" on the x-axis? I tried to use scale_x_date (mentioned here R: ggplot display all dates on x axis but it needs as.Date and ggplot geom_segment needs as.POSIXct format). Please feel free to correct me!
Any help would be great! Thank you!
Two options. I've also reversed your x and y so you don't have to use coord_flip() and made several other small modifications including the x-axis labels (you were looking for scale_y_datetime since you flipped the axes and the "dates" were actually in POSIXct). Also, one difference with Duck's answer is my scales = "free" in facet_grid. You might decide your labels and your "loc" variable may not make sense given these new graphs anyway.
library(tibble); library(ggplot2)
df <-tribble(~event, ~startdate,~enddate,~loc,
"A",as.POSIXct("1984/02/10"),as.POSIXct("1987/06/10"),"1",
"B",as.POSIXct("1984/02/11"),as.POSIXct("1990/02/12"),"2",
"A",as.POSIXct("1992/05/15"),as.POSIXct("1999/06/15"),"3",
"C",as.POSIXct("2003/08/29"),as.POSIXct("2015/08/29"),"4",
"B",as.POSIXct("2002/04/11"),as.POSIXct("2012/04/12"),"5",
"E",as.POSIXct("2000/02/10"),as.POSIXct("2005/02/15"),"6")
max_date = max(df$startdate,df$enddate)
ggplot(df)+
geom_segment(aes(y=event, yend = event, x = startdate, xend = enddate, colour=event),size = 5,alpha=0.6) +
geom_label(aes(label=event, y = event, x=max_date), size=2) +
xlab("Year") + ylab("LoC") +
scale_x_datetime(date_breaks = "year", date_labels = "%Y") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
ggplot(df)+
geom_segment(aes(y=loc, yend = loc, x = startdate, xend = enddate, colour=event),size = 5,alpha=0.6) +
geom_label(aes(label=event, y = loc, x=max_date), size=2) +
xlab("Year") + ylab("LoC") +
scale_x_datetime(date_breaks = "year", date_labels = "%Y") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
facet_grid(rows = vars(event), scales = "free")
Created on 2020-10-18 by the reprex package (v0.3.0)
Consider this as an option, as mentioned by #ArthurYip setting reorder could affect the sense of your plot. You could avoid the labels and using facet_grid() in next way:
library(ggplot2)
#Plot
ggplot(df)+
geom_segment(aes(x=loc, xend =loc,y = startdate, yend = enddate,colour=event),size = 5,alpha=0.6) +
coord_flip() + xlab("LoC") + ylab("Year")+
facet_grid(event~.,switch = "x")
Output:

How to plot the graph using all the dates in x axis? [duplicate]

I'm having a very, very tough time getting the x-axis to look correct for my graphs.
Here is my data (generated via dput()):
df <- structure(list(Month = structure(1:12, .Label = c("2011-07-31", "2011-08-31", "2011-09-30", "2011-10-31", "2011-11-30", "2011-12-31", "2012-01-31", "2012-02-29", "2012-03-31", "2012-04-30", "2012-05-31", "2012-06-30"), class = "factor"), AvgVisits = c(6.98655104580674,7.66045407330464, 7.69761337479304, 7.54387561322994, 7.24483848458728, 6.32001400498928, 6.66794871794872, 7.207780853854, 7.60281201431308, 6.70113837397123, 6.57634103019538, 6.75321935568936)), .Names = c("Month","AvgVisits"), row.names = c(NA, -12L), class = "data.frame")
Here is the chart I am trying to graph:
ggplot(df, aes(x = Month, y = AvgVisits)) +
geom_bar() +
theme_bw() +
labs(x = "Month", y = "Average Visits per User")
That chart works fine - but, if I want to adjust the formatting of the date, I believe I should add this:
scale_x_date(labels = date_format("%m-%Y"))
I'm trying to make it so the date labels are 'MMM-YYYY'
ggplot(df, aes(x = Month, y = AvgVisits)) +
geom_bar() +
theme_bw() +
labs(x = "Month", y = "Average Visits per User") +
scale_x_date(labels = date_format("%m-%Y"))
When I plot that, I continue to get this error:
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Despite hours of research on formatting of geom_line and geom_bar, I can't fix it. Can anyone explain what I'm doing wrong?
Edit: As a follow-up thought: Can you use date as a factor, or should you use as.Date on a date column?
To show months as Jan 2017 Feb 2017 etc:
scale_x_date(date_breaks = "1 month", date_labels = "%b %Y")
Angle the dates if they take up too much space:
theme(axis.text.x=element_text(angle=60, hjust=1))
Can you use date as a factor?
Yes, but you probably shouldn't.
...or should you use as.Date on a date column?
Yes.
Which leads us to this:
library(scales)
df$Month <- as.Date(df$Month)
ggplot(df, aes(x = Month, y = AvgVisits)) +
geom_bar(stat = "identity") +
theme_bw() +
labs(x = "Month", y = "Average Visits per User") +
scale_x_date(labels = date_format("%m-%Y"))
in which I've added stat = "identity" to your geom_bar call.
In addition, the message about the binwidth wasn't an error. An error will actually say "Error" in it, and similarly a warning will always say "Warning" in it. Otherwise it's just a message.

date_minor_breaks in ggplot2

I am a beginner in ggplot2. I am unable to use date_minor_breaks to show quarterly "ticks" on x-axis.
Here's my code:
x<-c(seq(1:12))
time<-c("2010Q1","2010Q2","2010Q3","2010Q4","2011Q1","2011Q2", "2011Q3","2011Q4","2012Q1","2012Q2","2012Q3","2012Q4")
z<-data.frame(type = x,time = time)
z$time = as.yearqtr(z$time)
z$time = as.Date(z$time)
ggplot(data = z, aes(x=time,y=type)) +
geom_point() +
scale_x_date(date_labels = "%Y",date_minor_breaks = "3 months",name = "Year") +
theme_tufte() +
theme(legend.position = "none")
I researched this topic on SO Formatting dates with scale_x_date in ggplot2 and on https://github.com/hadley/ggplot2/issues/542, and found that there were some issues reported on this topic. However, I didn't quite follow the conversation about changes to ggplot2 because it's been only 6 days since I started using ggplot2.
Here's the graph I got (it doesn't have any ticks)...
Here's a sample graph with "tick marks" generated from Excel. Please ignore values because my point of creating this Excel chart is to demonstrate what I am looking for--i.e. "quarterly ticks". I'd appreciate your help.
You may have to make major breaks every three months and then pad your labels with blanks to give the illusion of major (labeled) and minor (unlabeled) ticks. See this answer for another example.
First manually make the breaks for the tick marks at every quarter.
breaks_qtr = seq(from = min(z$time), to = max(z$time), by = "3 months")
Then make the year labels and pad these labels with three blanks after each number.
labels_year = format(seq(from = min(z$time), to = max(z$time), by = "1 year"), "%Y")
labs = c(sapply(labels_year, function(x) {
c(x, rep("", 3))
}))
Now use the breaks and the labels with the labels and breaks arguments in scale_x_date. Notice that I'm not using date_labels and date_breaks for this.
ggplot(data = z, aes(x=time,y=type)) +
geom_point() +
scale_x_date(labels = labs, breaks = breaks_qtr, name = "Year") +
theme_tufte() +
theme(legend.position = "none")
You should also define your (major) date breaks:
ggplot(data = z, aes(x=time, y=type)) +
geom_point() +
scale_x_date(date_breaks = "1 year", name = "Year", date_minor_breaks="3 months",
limits = c(as.Date(as.yearqtr("2009Q4")),
as.Date(as.yearqtr("2013Q2"))),
expand=c(0,0), date_labels = "%Y") +
theme(legend.position = "none")
And some other "fancy" stuff to align the minor ticks with the major ticks (I guess there a better ways to do this, but this works).

How to make scale_x_date week start with Sunday

I'm creating a weekly time series chart, and week should start with Sunday. When I specify scale_x_date(breaks = date_breaks('1 week')) grid line and labels start on Monday, so results looks slightly off. How can I force ggplot scale_x_date week to start on Sunday
This is example of my code
library(ggplot2)
library(scales)
data.set <- structure(list(week.start = structure(c(15732, 15739,
15746, 15753, 15760, 15767, 15774, 15781,
15788, 15795, 15802, 15809 ), class =
"Date"), overtime.avg = c(2.8,
2.85666666666667, 2.18333333333333,
2.44666666666667, 2.04833333333333,
2.45833333333333, 2.12833333333333,
1.81666666666667, 1.82166666666667,
1.54333333333333, 2.09166666666667,
0.970833333333333)), .Names =
c("week.start", "overtime.avg"), row.names
= 29733:29744, class = "data.frame")
ggplot(data = data.set,
aes(x = week.start,
y = overtime.avg)) +
geom_line() +
geom_point() +
scale_x_date(breaks = date_breaks("1 week"),
labels = date_format(format = "%Y-%m-%d"))
One way would be to use function seq() and provide your own break points starting with first Sunday (used minimal value of week.start) and set by="week".
ggplot(data = data.set,aes(x = week.start,y = overtime.avg)) +
geom_line() +
geom_point() +
scale_x_date(breaks = seq(min(data.set$week.start),max(data.set$week.start),by="week"),
labels = date_format(format = "%Y-%m-%d"))

Resources