R ggplot2 x-axis not properly show - r

My question is in the following.
here is my raw data:
type,com,year,month,value
A,CH,2016,1,0
A,CH,2016,2,0
A,CH,2016,3,0
A,CH,2016,4,0
A,CH,2016,5,0
A,CH,2016,6,0
A,CH,2016,7,0
A,CH,2016,8,0
A,CH,2016,9,453128
A,CH,2016,10,868256
A,CH,2016,11,1015080
A,CH,2016,12,650912
B,CH,2016,1,0
B,CH,2016,2,0
B,CH,2016,3,0
B,CH,2016,4,0
B,CH,2016,5,0
B,CH,2016,6,61273
B,CH,2016,7,27711
B,CH,2016,8,161780
B,CH,2016,9,48889
B,CH,2016,10,72805
B,CH,2016,11,131466
B,CH,2016,12,73756
C,CH,2016,1,0
C,CH,2016,2,0
C,CH,2016,3,0
C,CH,2016,4,0
C,CH,2016,5,0
C,CH,2016,6,0
C,CH,2016,7,0
C,CH,2016,8,2200
C,CH,2016,9,111384
C,CH,2016,10,28758
C,CH,2016,11,21161
C,CH,2016,12,0
I use it to plot a line graph with gglot2.
The code is:
test <- read.csv("test.csv", header = T)
test_list <- split(test, test$type)
Rplot <- ggplot(test_list$A, aes(x=month, y=value, col=com))+geom_line()
Rplot
My question:
how to let my x-axis to display like month(1,2,3....,12)?
and how can I combine year and month showing on x-axis at the same time.(Jan2016, Feb2016,.....) or (2016/1, 2016/2,.....)
very appreciate.

Create date_label column as Date class in the dataframe using ymd() from lubridate package.
library( 'lubridate' )
test_list <- lapply( test_list, function( x ) {
x$date_label <- ymd( paste( x$year, month.abb[ x$month ], 01, sep = "-"))
x
})
Manipulate x axis using scale_x_date()
# 1. date_labels = "%Y/%m" - 2016/01
# 2. date_labels = "%m" - 01
# 3. date_labels = "%b %Y" - Jan 2016
library( 'ggplot2' )
ggplot( data = test_list$A, aes( x = date_label, y = value, color = com, group = com ) ) +
geom_line( size = 3 ) +
scale_x_date(name = "month",
date_labels = "%m",
date_breaks = "1 month" )
ggplot( data = test_list$A, aes( x = date_label, y = value, group = com, color = com ) ) +
geom_line( size = 3 ) +
theme( axis.text.x = element_text( angle = 45, hjust = 1 )) +
scale_x_date(name = "month_year",
date_labels = "%b %Y",
date_breaks = "1 month" )

For the first case, you can convert month to be a factor. The following code would eliminate decimals in the month.
test$month <- as.factor(test$month)
test_list <- split(test, test$type)
Rplot <- ggplot(test_list$A, aes(x=month, y=value, col=com, group = 1))+geom_line()
Rplot
To get numeric months you have to have a new column. I will create a new one using dplyr package and change that.
library(dplyr)
# make a new column by joining month after converting it to character and year.
test <- test %>% mutate(exactDate = paste(month.abb[as.numeric(as.character(month))], year, sep=""))
# convert to factor
test$exactDate <- factor(test$exactDate, levels = c("Jan2016","Feb2016","Mar2016","Apr2016","May2016","Jun2016",
"Jul2016","Aug2016","Sep2016", "Oct2016", "Nov2016", "Dec2016"),
labels = c("Jan2016","Feb2016","Mar2016","Apr2016","May2016","Jun2016",
"Jul2016","Aug2016","Sep2016", "Oct2016", "Nov2016", "Dec2016"))
# Plot similarly
test_list <- split(test, test$type)
Rplot <- ggplot(test_list$A, aes(x=exactDate, y=value, col=com, group = 1))+geom_line()
# Rotate axis labels by 90 degrees
Rplot+ theme(axis.text.x=element_text(angle=90, hjust=1))

Related

how to skip every other date label on x-axis in ggplot R

consider plot below ( data is from the useful link : How to create custom date labels using ggplot with scale_x_date )
start_date <- as.Date('2020-08-08')
end_date <- as.Date('2021-05-10')
# tibble with test data
df <- tibble( dates = as.Date(as.character(seq(start_date, end_date, by = 4) ) ),
yval = as.numeric(strftime(dates, format = "%d")),
# following two lines just to give some color to the plot
month = strftime(dates, format = "%m") ) %>%
group_by( month )
df
ggplot(df, aes( x=dates, y=yval, color = month ) ) +
geom_point() +
scale_x_date(date_labels = "%b %d", breaks = "1 month")
what I want is NOT to have the 2nd, the 4th , the 6th,... labels on the xaxis.
I tried "2 month" for breaks but it does the opposite and removes the 1st , 3d, 5th, ... labels.
any ideas how to do this ( preferably without manually specifying the labels one by one )
I tried to get this passing a function to breaks but that didn't work, so I made the sequence directly using start_date and end_date:
ggplot(df, aes( x=dates, y=yval, color = month ) ) +
geom_point() +
scale_x_date(
date_labels = "%b %d",
breaks =
seq(from = floor_date(start_date, "month"), to = end_date, by = "2 months"))
)
Another option is setting limits in scale_x_date where you set the month before your first date because your first date is later and then it will start breaking from the first date instead of the second date. Here is a reproducible example:
library(tibble)
library(dplyr)
start_date <- as.Date('2020-08-08')
end_date <- as.Date('2021-05-10')
# tibble with test data
df <- tibble( dates = as.Date(as.character(seq(start_date, end_date, by = 4) ) ),
yval = as.numeric(strftime(dates, format = "%d")),
# following two lines just to give some color to the plot
month = strftime(dates, format = "%m") ) %>%
group_by( month )
library(ggplot2)
ggplot(df, aes( x=dates, y=yval, color = month ) ) +
geom_point() +
scale_x_date(date_labels = "%b %d", breaks = "2 month", limits = c(as.Date("2020-07-01"), NA))
Created on 2022-08-23 with reprex v2.0.2

Reverse datetime axis with custom breaks in ggplot2

Reversing the date order is currently yet not supported in ggplot2, as stated in this GitHub issue. A reverse datetime scale could be created by manually defining a trans function from this answer.
Would be possible to manually define the 1. breaks and 2. labels of datetime axis, just like using the date_breaks and date_labels argument in scale_x_date/scale_y_date function?
Reprex
library(ggplot2)
library(scales)
sample_data = data.frame(
report_date = seq(as.Date("2021-01-01"), as.Date("2021-12-31"), by = "day"),
report_var = seq(1, 365)
)
c_trans <- function(a, b, breaks = b$breaks, format = b$format) {
a <- as.trans(a)
b <- as.trans(b)
name <- paste(a$name, b$name, sep = "-")
trans <- function(x) a$trans(b$trans(x))
inv <- function(x) b$inverse(a$inverse(x))
trans_new(name, trans, inverse = inv, breaks = breaks, format = format)
}
rev_date <- c_trans("reverse", "date")
ggplot(sample_data, aes(x = report_var, y = report_date)) +
geom_point() +
scale_y_continuous(trans = rev_date)
Created on 2021-06-22 by the reprex package (v2.0.0)
Breaks
Manually define a numeric vector in Date class. Add it to the breaks argument of scale_y_continuous function.
scale_y_continuous(
trans = rev_date,
breaks = seq.Date(as.Date("2021-01-01"), as.Date("2021-12-31"), by = "2 weeks")
)
Labels
Create a lambda function that takes the Date input and format the time. Add it to the labels argument of the scale_y_continuous function.
scale_y_continuous(
trans = rev_date,
breaks = seq.Date(as.Date("2021-01-01"), as.Date("2021-12-31"), by = "2 weeks"),
labels = ~ strftime(., "%b %d")
)

How can I work with stat_density and a timeseries (Posixct on x axis)?

Based on this example:
#example from https://ggplot2.tidyverse.org/reference/geom_tile.html
cars <- ggplot(mtcars, aes(mpg,factor(cyl)))
cars + stat_density(aes(fill = after_stat(density)), geom = "raster", position = "identity")
I wanted to create a plot with the density plotted vertically per hour of my dataset. The original dataset is very long. I also want to display the single data points and a mean as a line.
Here is a simplified basic version of the code:
#reproducable example for density plot
library(reshape2)
library(ggplot2)
library(scales)
startdate <- as.POSIXct("2020-01-01 01:00", tz="UTC")
enddate <- as.POSIXct("2020-01-01 05:00", tz="UTC")
#dataframe
df <- data.frame(x = seq.POSIXt(startdate, enddate, "hour"),
y1 = c(1,2,3,4,5),
y2 = c(2,4,6,8,10),
y3 = c(3,6,9,12,15))
df$mean <- rowMeans(df[,-1])
df_melt <- melt(df, id.vars = 1, measure.vars = c(2,3,4))
#plot
g1 <- ggplot(data = df_melt, aes(factor(x), value)) +
stat_density(aes(fill = after_stat(ndensity)),
geom = "raster", position = "identity", orientation = "y") +
geom_point()
g1
This works, but the original dataset has so many hours that the labeling of the x axis is not nice. I also want to determine how the dateformat of the labels should look like and the limits of the plot. Before working with stat_density, I used to do that with scale_x_datetime. But for the density plot I have to use factor(x) instead of the original x, which is PosixcT. So the following scaling produces an error because x is a factor and not a date, obviously:
#scale x datetime (does not work)
g1 <- g1 + scale_x_datetime(labels = date_format("%b/%d", tz="UTC"),
limits = c(startdate, enddate),
breaks = function(x)
seq.POSIXt(from = startdate, to = enddate, by = "2 days"),
date_minor_breaks = "12 hours",
expand = c(0,0))
g1
I managed to scale_x_discrete but this makes it hard to determine the label format and limits with the bigger dataset:
#scale x discrete
g1 <- g1 + scale_x_discrete(limits = c(as.character(df$x)),
breaks = as.character(df$x)[c(2,4)])
g1
The next problem with factors is then that I cannot add the mean of every hour as geom_line as every factor consists of 1 observation only.
#plot mean
g1 + geom_point(data = df, aes(factor(x), mean), col = "red")
g1 + geom_line(data = df, aes(factor(x), mean), col = "red")
So, is there a way to produce the desired plot with density per hour, overplotted points and overplotted mean line? And I want to edit the x labels and limits as comfortably as possible. Maybe there is a way to use x instead of factor(x)...
I think the solution might be as simple as dropping the as.factor() and setting an explicit group in the density. Does the following work for your real case?
library(reshape2)
library(ggplot2)
library(scales)
#> Warning: package 'scales' was built under R version 4.0.3
startdate <- as.POSIXct("2020-01-01 01:00", tz="UTC")
enddate <- as.POSIXct("2020-01-01 05:00", tz="UTC")
#dataframe
df <- data.frame(x = seq.POSIXt(startdate, enddate, "hour"),
y1 = c(1,2,3,4,5),
y2 = c(2,4,6,8,10),
y3 = c(3,6,9,12,15))
df$mean <- rowMeans(df[,-1])
df_melt <- melt(df, id.vars = 1, measure.vars = c(2,3,4))
#plot
ggplot(data = df_melt, aes(x, value)) +
stat_density(aes(fill = after_stat(ndensity),
group = x),
geom = "raster", position = "identity", orientation = "y") +
geom_point()
Created on 2021-01-29 by the reprex package (v0.3.0)

exclude weekends from x axis in heatmap

I have coded a heatmap using ggplot tiles and it has sequencial days on the x axis . The problem I am trying to solve is to remove weekends from the heatmap and show only weekdays. I have found that one solution would be to transform the dates into factors but if I do that how can I format the labels in scale_x_discrete to be in %d%m date format ? Is there a way to keep the dates as date format instead of turning it into factors ?
Below is an example:
randomString <- function(n=5,length=3) {
randomStringX <- c(1:n)
for(i in 1:n) {
randomStringX[i] <- paste(sample(c(LETTERS),length,replace = TRUE),collapse = "")
}
return(randomStringX)
}
randomString()
data.frame(client=randomString(),day=rep(seq.Date(Sys.Date()-10,length.out=10,by="1 day"),2)) %>% mutate(sales=round(rnorm(20,492,300),1)) %>% mutate(scale=cut(sales,breaks=c(0,100,200,300,max(sales)),labels = c("0-100","100-200","200-300","+300"))) %>% ggplot(.,aes(x=day,y=client,fill=scale)) + geom_tile() + scale_x_date(date_breaks = "1 day")
Thanks in advance
You can exclude data from weekends using the is.weekend function from chron
The weekend dates themselves can be excluded from an x-axis using the bdscale package
library(chron)
library(bdscale)
library(scales)
library(ggplot2)
library(dplyr)
df <- as.data.frame(client = randomString(), day = rep(seq.Date(
Sys.Date() - 10, length.out = 10, by = "1 day"), 2)) %>%
mutate(sales = round(rnorm(20, 492, 300), 1)) %>%
mutate(scale =
cut(
sales,
breaks = c(0, 100, 200, 300, max(sales)),
labels = c("0-100", "100-200", "200-300", "+300")
)) %>%
filter(is.weekend(day) == FALSE)
ggplot(df, aes(x = day, y = client, color = scale, fill = scale)) +
geom_tile() +
# scale_x_date(date_breaks = "1 day") +
theme(axis.text.x = element_text(angle = 45)) +
scale_x_bd(business.dates = sort(df$day), max.major.breaks = 30, labels=scales::date_format('%d %b'))
Removing data from weekends can also be done using lubridate and the wday function as
filter(!wday(day) %in% c(1,7))
Sun/Sat are stored as 1 and 7 respectively. - Credit to #AHart

Plot time series data in r for one year month wise

Need a help plotting in r for time series data.The Y is temperature and X is date(now in format %d-%m-%y) and need a plot like:
temp data in month wise along with a box plot
I have tried:
univariare-temperature
temperature = ts(r$temp, frequency = 12, start = 2011) plot(temperature, xaxt = "n") tsp = attributes(temperature)$tsp dates
= seq(as.Date("2011-01-01"), by = "month", along = temperature) axis(1, at = seq(tsp[1], tsp[2], along = temperature), labels = format(dates, "%m-%y"))
ggplot
gg2=ggplot(r,aes(mnth,temp)) + geom_line()
windows()
print(gg2)
but the format is coming incorrect.
Any help will be really appreciable!!
Thanks
Devi
library(ggplot2)
# Simulated temperature data
temp <- runif(75)
temp.avg <- vector()
x <- 365
for(i in 1:x){
if(i <= round(.33 * x)) {
temp.avg[i] <- mean(sample(temp, 15, replace = TRUE))
} else if (i <= round(.66 * x)) {
temp.avg[i] <- abs(log(mean(sample(temp, 15, replace = TRUE))))
} else {
temp.avg[i] <- mean(sample(temp, 15, replace = TRUE)) * (i / x) + .15
}
}
# Generate sequence of days in Date format "%d-%m-%y"
from <- as.Date("01-1-11 12:00:00 EDT", "%d-%m-%y")
to <- as.Date("31-12-11 12:00:00 EDT", "%d-%m-%y")
times <- seq.Date(from, to, 1)
# Put dates and temperatures into data frame
Temperature_data <- data.frame(date = times, temp = temp.avg)
# Plot in ggplot
ggplot(Temperature_data, aes(date, temp)) +
geom_line() +
ylim(c(0, 1)) +
xlab("") +
ylab("Temperature") +
scale_x_date(date_breaks = "1 month", date_labels = "%b %y") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Temperature fluctuations in 2011")

Resources