Formatting time on a Y axis in ggplott2 - r

I have the following code:
file = "http://dd.weather.gc.ca/hydrometric/csv/SK/hourly/SK_hourly_hydrometric.csv"
skdat <- read.csv(file, head=T, sep=",", dec=".", stringsAsFactors = F)
colnames(skdat) <- c("ID", "Date", "WaterLevel", "Grade1", "Symbol1",
"QAQC-1", "DischargeDebit", "Grade2", "Symbol2",
"QAQC-2")
subds <- subset(skdat, ID=='05AH050')
subds$datetime1 <- as.numeric(as.POSIXct(subds$Date))
class(data$datetime1)
subds[1:10, ]
ggplot(aes(x = datetime1, y = "WaterLevel"), data = subds) + geom_line()
Is there a way I can show just the time in 2 hr intervals on the Y axis?

Unclear what do you mean by "time in 2 hr intervals on the Y axis" because time is on the x-axis. Here is an example to change the breaks to 2 hours on the x-axis. The datetime1 need to be in POSIXct class.
library(ggplot2)
library(scales)
subds$datetime1 <- as.POSIXct(subds$Date)
ggplot(aes(x = datetime1, y = WaterLevel), data = subds) +
geom_line() +
scale_x_datetime(breaks = date_breaks("2 hours"),
labels = date_format("%H:%M"))

Related

Trying to plot multiple indexed prices of cryptocurrencies with different dates

I'm trying to create a nice graph of indexed prices for a few currencies so I can track relative performance from origin for different projects and price-levels.
Below is my dummy code. I've tried a lot of things but this is as far as I got...
R plot of the orignal code: prices of HEX and BTC
I wish to add other currencies as I go along.
In the end it is just a data frame with multiple columns that all need to start on the same point, the timestamp is irrelevant and I could plot only the series or shift them all to start on the same location.
This is what I'm trying to achieve:
Indexed prices of projects starting at same origin
# Dummy data that recreates my problem - two frames with different starting dates and an indexed value of the closing price.
n1 <- 366
dat1 <- data.frame(timestamp=seq.Date(as.Date("2012-12-26"), as.Date("2013-12-26"), "day"),
index.btc=seq(from = 1, to = n1, by=1, replace=TRUE)
)
dat2 <- data.frame(timestamp=seq.Date(as.Date("2013-12-26"), as.Date("2014-12-26"), "day"),
index.hex=seq(from = 1, to = n1, by=1, replace=TRUE)
)
# Merging data
jointdataset2 <- merge(dat1, dat2, by = 'timestamp', all = TRUE)
# Creating plottable data with melt function
jointdataset_plot <- melt(jointdataset2 , id.vars = 'timestamp', variable.name = 'project')
# plot on same grid, each series colored differently --
# good if the series have same scale (they have but different starting date)
ggplot(jointdataset_plot, aes(timestamp,value)) +
geom_line(aes(colour = project)) +
scale_y_log10()
# Can also plot like this
ggplot() + geom_line(data = dat1, aes(timestamp,index.btc),
color = "blue",
size = 1) +
geom_line(data = dat2, aes(timestamp,index.hex),
color = "red",
size = 1) +
labs(x = "Time",
y = "Indexed Price",
title ="Indexed historical price (daily close index)",
subtitle = "Candlesticks - data by nomics.com") +
scale_x_date(date_labels = "%Y (%b)", date_breaks = "1 year", date_minor_breaks = "1 month") +
scale_y_log10() +
theme_bw()
If I remove the timestamps, and remove N/As from one of the data frames, would I then be able to create an ID column in both frames (starting at 1, same counter) and merging them both at ID counter 1 so origins align?
Your sample data overlaps, so I've changed dat2:
library(dplyr);library(tidyr)
n1 <- 366
n2 <- 500
dat1 <- data.frame(timestamp=seq.Date(as.Date("2012-12-26"), as.Date("2013-12-26"), "day"),
index.btc=seq(from = 1, to = n1, by=1, replace=TRUE))
dat2 <- data.frame(timestamp=seq.Date(as.Date("2013-12-26"), as.Date("2014-12-26"), "day"),
index.hex=seq(from = 1, to = n2, length.out=n1))
full_join(dat1,dat2) %>%
pivot_longer(-timestamp, names_to = "index", values_to = "price") %>%
filter(!is.na(price)) %>%
group_by(index) %>%
mutate(timestamp = as.integer(timestamp - min(timestamp))) -> plotdata
ggplot(plotdata, aes(x = as.integer(timestamp),
y = price, color = index)) +
geom_line() +
labs(x = "Time (Days)",
y = "Indexed Price",
title ="Indexed historical price (daily close index)",
subtitle = "Candlesticks - data by nomics.com") +
scale_y_log10() +
theme_bw()
n1 <- 366
dat1 <- data.frame(timestamp=seq.Date(as.Date("2012-12-26"), as.Date("2013-12-26"), "day"),
index.btc=cumsum(sample(-2:10, n1, replace=TRUE))
)
dat2 <- data.frame(timestamp=seq.Date(as.Date("2013-12-26"), as.Date("2014-12-26"), "day"),
index.hex=cumsum(sample(-2:10, n1, replace=TRUE))
)
dat1$timestamp<- seq(length(dat1$timestamp))
dat2$timestamp<- seq(length(dat2$timestamp))
# Merging data
jointdataset2 <- merge(dat1, dat2, by = 'timestamp', all = TRUE)
# Creating plottable data with melt function
jointdataset_plot <- melt(jointdataset2 , id.vars = 'timestamp', variable.name = 'project')
# plot on same grid, each series colored differently --
# good if the series have same scale (they have but different starting date)
ggplot(jointdataset_plot, aes(timestamp,value)) +
geom_line(aes(colour = project)) +
scale_y_log10()
# Can also plot like this
ggplot() + geom_line(data = dat1, aes(timestamp,index.btc),
color = "blue",
size = 1) +
geom_line(data = dat2, aes(timestamp,index.hex),
color = "red",
size = 1) +
labs(x = "Time",
y = "Indexed Price",
title ="Indexed historical price (daily close index)",
subtitle = "Candlesticks - data by nomics.com") +
scale_x_continuous() +
scale_y_log10() +
theme_bw()

R ggplot How to plot a bar chart with different colours inside depicting 3 differents columns from the dataframe?

I am from a province which have 3 different areas.
I have a dataframe with all the days and the deaths from covid19 in whole the province. My idea its plot the data by weeks or month. The sum of these 7 days or 30 days. But I want to make the difference with 3 colours to depict the 3 different areas.
So this is my code. I can plot the total column. The 3 areas are called: alicante, valencia , castellon.
I donĀ“t know how to do it!
library(ggplot2)
library(scales)
log <- read.csv('https://dadesobertes.gva.es/es/datastore/dump/69c32771-3d18-4654-8c3c-cb423fcfa652?bom=True',stringsAsFactors = F,encoding = 'UTF-8')
colnames(log) <- c("code", "Date", "total", "hombres", "mujeres", "alicante", "castellon", "valencia", "dvinaros", "dcastellon", "dlaplana", "dsangunto", "dmalvarrosa", "dvilanova", "dlafe", "drequena", "dvalenciageneral", "dpeset", "dlaribera", "dgandia", "ddenia", "dxativa", "dalcoy", "dlamarina", "dsanjuan", "delda", "dalicantegeneral", "delchegeneral", "dorihuela", "dtorrevieja", "dmanises", "delchecrevillente" )
#log$Date <- as.Date(log$Date,
log$Date <- as.Date(log$Date,
"%Y-%m-%dT%H:%M:%S") # tabulate all the options here
# create variables of the week and month of each observation:
log$Mes <- as.Date(cut(log$Date,
breaks = "month"))
log$Week <- as.Date(cut(log$Date,
breaks = "week",
start.on.monday = FALSE)) # changes weekly break point to Sunday
# graph by month:
ggplot(data = log,
aes(Week, total, fill="Defunciones semanales")) +
stat_summary(fun.y = sum, # adds up all observations for the month
geom = "bar") +
labs(fill = "Color", y = "") +
#geom_text(aes(y = total,label = total), vjust=0, hjust= 0,size=4) +
labs(title = "Defunciones semanales en la Comunidad Valenciana hasta el 17 de Enero",
subtitle = "Fuente:dadesobertes.gva.es/es/dataset/covid-19-series-personas-fallecidas. ") +
scale_x_date(
#labels = date_format( "%B"),
labels = date_format( "%d-%m"),
limits=c(as.Date("2020-03-01"), as.Date("2021-02-01")),
breaks = "1 week") + # custom x-axis labels
theme(axis.text.x=element_text(angle=60, hjust=1))
This more an issue in data wrangling than in plotting. To achieve your desired result reshape your data to long format using e.g. tidy::pivot_longer. Additionally set position to "stack" in stat_summary to stack the bars for the areas.
library(ggplot2)
library(scales)
library(tidyr)
library(dplyr)
log <- read.csv("https://dadesobertes.gva.es/es/datastore/dump/69c32771-3d18-4654-8c3c-cb423fcfa652?bom=True", stringsAsFactors = F, encoding = "UTF-8")
colnames(log) <- c("code", "Date", "total", "hombres", "mujeres", "alicante", "castellon", "valencia", "dvinaros", "dcastellon", "dlaplana", "dsangunto", "dmalvarrosa", "dvilanova", "dlafe", "drequena", "dvalenciageneral", "dpeset", "dlaribera", "dgandia", "ddenia", "dxativa", "dalcoy", "dlamarina", "dsanjuan", "delda", "dalicantegeneral", "delchegeneral", "dorihuela", "dtorrevieja", "dmanises", "delchecrevillente")
log$Date <- as.Date(
log$Date,
"%Y-%m-%dT%H:%M:%S"
) # tabulate all the options here
log$Mes <- as.Date(cut(log$Date,
breaks = "month"
))
log$Week <- as.Date(cut(log$Date,
breaks = "week",
start.on.monday = FALSE
)) # changes weekly break point to Sunday
# select desired or needed variables and reshape to long format
log_area <- select(log, 1:2, 6:8, Mes, Week) %>%
pivot_longer(-c(code, Date, Mes, Week), names_to = "area")
# graph by month:
ggplot(
data = log_area,
aes(Week, value, fill = area)
) +
stat_summary(
fun.y = sum, # adds up all observations for the month
geom = "bar",
position = "stack"
) +
labs(
fill = "Color", y = "",
title = "Defunciones semanales en la Comunidad Valenciana hasta el 17 de Enero",
subtitle = "Fuente:dadesobertes.gva.es/es/dataset/covid-19-series-personas-fallecidas. "
) +
scale_x_date(
labels = date_format("%d-%m"),
limits = c(as.Date("2020-03-01"), as.Date("2021-02-01")),
breaks = "1 week"
) + # custom x-axis labels
theme(axis.text.x = element_text(angle = 60, hjust = 1))
#> Warning: `fun.y` is deprecated. Use `fun` instead.
#> Warning: Removed 87 rows containing non-finite values (stat_summary).
#> Warning: Removed 3 rows containing missing values (geom_bar).
Created on 2021-01-30 by the reprex package (v1.0.0)

How can I work with stat_density and a timeseries (Posixct on x axis)?

Based on this example:
#example from https://ggplot2.tidyverse.org/reference/geom_tile.html
cars <- ggplot(mtcars, aes(mpg,factor(cyl)))
cars + stat_density(aes(fill = after_stat(density)), geom = "raster", position = "identity")
I wanted to create a plot with the density plotted vertically per hour of my dataset. The original dataset is very long. I also want to display the single data points and a mean as a line.
Here is a simplified basic version of the code:
#reproducable example for density plot
library(reshape2)
library(ggplot2)
library(scales)
startdate <- as.POSIXct("2020-01-01 01:00", tz="UTC")
enddate <- as.POSIXct("2020-01-01 05:00", tz="UTC")
#dataframe
df <- data.frame(x = seq.POSIXt(startdate, enddate, "hour"),
y1 = c(1,2,3,4,5),
y2 = c(2,4,6,8,10),
y3 = c(3,6,9,12,15))
df$mean <- rowMeans(df[,-1])
df_melt <- melt(df, id.vars = 1, measure.vars = c(2,3,4))
#plot
g1 <- ggplot(data = df_melt, aes(factor(x), value)) +
stat_density(aes(fill = after_stat(ndensity)),
geom = "raster", position = "identity", orientation = "y") +
geom_point()
g1
This works, but the original dataset has so many hours that the labeling of the x axis is not nice. I also want to determine how the dateformat of the labels should look like and the limits of the plot. Before working with stat_density, I used to do that with scale_x_datetime. But for the density plot I have to use factor(x) instead of the original x, which is PosixcT. So the following scaling produces an error because x is a factor and not a date, obviously:
#scale x datetime (does not work)
g1 <- g1 + scale_x_datetime(labels = date_format("%b/%d", tz="UTC"),
limits = c(startdate, enddate),
breaks = function(x)
seq.POSIXt(from = startdate, to = enddate, by = "2 days"),
date_minor_breaks = "12 hours",
expand = c(0,0))
g1
I managed to scale_x_discrete but this makes it hard to determine the label format and limits with the bigger dataset:
#scale x discrete
g1 <- g1 + scale_x_discrete(limits = c(as.character(df$x)),
breaks = as.character(df$x)[c(2,4)])
g1
The next problem with factors is then that I cannot add the mean of every hour as geom_line as every factor consists of 1 observation only.
#plot mean
g1 + geom_point(data = df, aes(factor(x), mean), col = "red")
g1 + geom_line(data = df, aes(factor(x), mean), col = "red")
So, is there a way to produce the desired plot with density per hour, overplotted points and overplotted mean line? And I want to edit the x labels and limits as comfortably as possible. Maybe there is a way to use x instead of factor(x)...
I think the solution might be as simple as dropping the as.factor() and setting an explicit group in the density. Does the following work for your real case?
library(reshape2)
library(ggplot2)
library(scales)
#> Warning: package 'scales' was built under R version 4.0.3
startdate <- as.POSIXct("2020-01-01 01:00", tz="UTC")
enddate <- as.POSIXct("2020-01-01 05:00", tz="UTC")
#dataframe
df <- data.frame(x = seq.POSIXt(startdate, enddate, "hour"),
y1 = c(1,2,3,4,5),
y2 = c(2,4,6,8,10),
y3 = c(3,6,9,12,15))
df$mean <- rowMeans(df[,-1])
df_melt <- melt(df, id.vars = 1, measure.vars = c(2,3,4))
#plot
ggplot(data = df_melt, aes(x, value)) +
stat_density(aes(fill = after_stat(ndensity),
group = x),
geom = "raster", position = "identity", orientation = "y") +
geom_point()
Created on 2021-01-29 by the reprex package (v0.3.0)

R ggplot2 x-axis not properly show

My question is in the following.
here is my raw data:
type,com,year,month,value
A,CH,2016,1,0
A,CH,2016,2,0
A,CH,2016,3,0
A,CH,2016,4,0
A,CH,2016,5,0
A,CH,2016,6,0
A,CH,2016,7,0
A,CH,2016,8,0
A,CH,2016,9,453128
A,CH,2016,10,868256
A,CH,2016,11,1015080
A,CH,2016,12,650912
B,CH,2016,1,0
B,CH,2016,2,0
B,CH,2016,3,0
B,CH,2016,4,0
B,CH,2016,5,0
B,CH,2016,6,61273
B,CH,2016,7,27711
B,CH,2016,8,161780
B,CH,2016,9,48889
B,CH,2016,10,72805
B,CH,2016,11,131466
B,CH,2016,12,73756
C,CH,2016,1,0
C,CH,2016,2,0
C,CH,2016,3,0
C,CH,2016,4,0
C,CH,2016,5,0
C,CH,2016,6,0
C,CH,2016,7,0
C,CH,2016,8,2200
C,CH,2016,9,111384
C,CH,2016,10,28758
C,CH,2016,11,21161
C,CH,2016,12,0
I use it to plot a line graph with gglot2.
The code is:
test <- read.csv("test.csv", header = T)
test_list <- split(test, test$type)
Rplot <- ggplot(test_list$A, aes(x=month, y=value, col=com))+geom_line()
Rplot
My question:
how to let my x-axis to display like month(1,2,3....,12)?
and how can I combine year and month showing on x-axis at the same time.(Jan2016, Feb2016,.....) or (2016/1, 2016/2,.....)
very appreciate.
Create date_label column as Date class in the dataframe using ymd() from lubridate package.
library( 'lubridate' )
test_list <- lapply( test_list, function( x ) {
x$date_label <- ymd( paste( x$year, month.abb[ x$month ], 01, sep = "-"))
x
})
Manipulate x axis using scale_x_date()
# 1. date_labels = "%Y/%m" - 2016/01
# 2. date_labels = "%m" - 01
# 3. date_labels = "%b %Y" - Jan 2016
library( 'ggplot2' )
ggplot( data = test_list$A, aes( x = date_label, y = value, color = com, group = com ) ) +
geom_line( size = 3 ) +
scale_x_date(name = "month",
date_labels = "%m",
date_breaks = "1 month" )
ggplot( data = test_list$A, aes( x = date_label, y = value, group = com, color = com ) ) +
geom_line( size = 3 ) +
theme( axis.text.x = element_text( angle = 45, hjust = 1 )) +
scale_x_date(name = "month_year",
date_labels = "%b %Y",
date_breaks = "1 month" )
For the first case, you can convert month to be a factor. The following code would eliminate decimals in the month.
test$month <- as.factor(test$month)
test_list <- split(test, test$type)
Rplot <- ggplot(test_list$A, aes(x=month, y=value, col=com, group = 1))+geom_line()
Rplot
To get numeric months you have to have a new column. I will create a new one using dplyr package and change that.
library(dplyr)
# make a new column by joining month after converting it to character and year.
test <- test %>% mutate(exactDate = paste(month.abb[as.numeric(as.character(month))], year, sep=""))
# convert to factor
test$exactDate <- factor(test$exactDate, levels = c("Jan2016","Feb2016","Mar2016","Apr2016","May2016","Jun2016",
"Jul2016","Aug2016","Sep2016", "Oct2016", "Nov2016", "Dec2016"),
labels = c("Jan2016","Feb2016","Mar2016","Apr2016","May2016","Jun2016",
"Jul2016","Aug2016","Sep2016", "Oct2016", "Nov2016", "Dec2016"))
# Plot similarly
test_list <- split(test, test$type)
Rplot <- ggplot(test_list$A, aes(x=exactDate, y=value, col=com, group = 1))+geom_line()
# Rotate axis labels by 90 degrees
Rplot+ theme(axis.text.x=element_text(angle=90, hjust=1))

R - create legend and separate month plots with panel layers in ggplot

My goal is to create a separate 4-panel plot (right y-axis) for each of the 12 months (top x-axis) for the given years (top x-axis) in the data set. I also want to create a legend with the names of the points (V5 and V6) overlaid in 2 of the panels (V2 and V3). So instead of 1 large plot, there should be 12 plots.
After the reproducible example is my current plot.
Can you offer assistance on how to reach my goal?
Thank you.
library(ggplot2)
library(data.table)
startdate <- as.POSIXct("2008-09-12 10:00:00")
enddate <- as.POSIXct("2011-04-26 23:45:00")
interval <- 1296000
Time <- seq(from = startdate, by = interval/2, to = enddate)
set.seed(1)
timeframe <- data.frame(Time, V1 = abs(rnorm(length(Time))), V2 =
abs(rnorm(length(Time))), V3 = abs(rnorm(length(Time))), V4 =
abs(rnorm(length(Time))), V5 = abs(rnorm(length(Time))), V6 =
abs(rnorm(length(Time))))
timeframe <- setDT(timeframe)
The following Month and Year functions are derived from the waterYear function in smwrBase.
Month <- function (x, numeric = FALSE)
{
x <- as.POSIXlt(x)
yr <- x$year + 1900L
mn <- x$mon + 1L
if (numeric)
return(mn)
ordered(mn)
}
Year <- function (x, numeric = FALSE)
{
x <- as.POSIXlt(x)
yr <- x$year + 1900L
mn <- x$mon + 1L
if (numeric)
return(yr)
ordered(yr)
}
# month
mn <- Month(timeframe$Time, numeric = TRUE)
# year
yr <- Year(timeframe$Time, numeric = TRUE)
The following plot method is derived from Add a geom layer for a single panel in a faceted plot
timeframe <- data.table(timeframe, mn, yr)
setnames(timeframe, 8:9, c("Month", "Year"))
setkey(timeframe, Time)
df1 <- setDF(timeframe[, list(Time, V1, Month, Year)])
df2 <- setDF(timeframe[, list(Time, V2, Month, Year)])
df3 <- setDF(timeframe[, list(Time, V3, Month, Year)])
df4 <- setDF(timeframe[, list(Time, V4, Month, Year)])
df5 <- setDF(timeframe[, list(Time, V5, Month, Year)])
df6 <- setDF(timeframe[, list(Time, V6, Month, Year)])
names(df1) <- c("Time", "value", "Month", "Year")
names(df2) <- c("Time", "value", "Month", "Year")
names(df3) <- c("Time", "value", "Month", "Year")
names(df4) <- c("Time", "value", "Month", "Year")
names(df5) <- c("Time", "value", "Month", "Year")
names(df6) <- c("Time", "value", "Month", "Year")
df1$panel <- "V1"
df2$panel <- "V2"
df3$panel <- "V3"
df4$panel <- "V4"
df5$panel <- "V2"
df6$panel <- "V3"
dff <- rbind(df1, df2, df3, df4)
p <- ggplot(data = dff, mapping = aes(x = Time, y = value))
p <- p + facet_grid(panel ~ Month + Year, scale = "free")
p <- p + layer(data = df1, geom = "line")
p <- p + layer(data = df2, geom = "line")
p <- p + layer(data = df5, geom = "point", colour = "green")
p <- p + layer(data = df3, geom = "line")
p <- p + layer(data = df6, geom = "point", colour = "red")
p <- p + layer(data = df4, geom = "line") +
scale_fill_manual(values=c("green", "red"), name="Legend",
labels=c("v5", "v6")) # this last part is my attempt at creating the legend
p
Assistance with the facet_grid came from http://docs.ggplot2.org/current/facet_grid.html
ggplot is generally much happier with the data in a long format. Thus, start by reshaping your data. Then it's rather straightforward to use one data set for the lines and one for the points, and map a variable to the color aesthetics for the points.
# melt data from wide to long format
library(reshape2)
df <- melt(timeframe, id.vars = "Time")
# create year and month variables
df$year <- format(df$Time, "%Y")
df$month <- format(df$Time, "%m")
# select data for lines
d1 <- df[!df$variable %in% c("V5", "V6"), ]
# select data for points
d2 <- df[df$variable %in% c("V5", "V6"), ]
# rename V5 and V6 to place them in correct panels
d2$variable[d2$variable == "V5"] <- "V2"
d2$variable[d2$variable == "V6"] <- "V3"
# plot
ggplot() +
geom_line(data = d1, aes(x = Time, y = value)) +
geom_point(data = d2, aes(x = Time, y = value, color = variable)) +
facet_grid(variable ~ month + year, scale = "free") +
scale_color_manual(values = c("green", "red"), name = "Legend",
labels = c("V5", "V6"))
This is the complete answer to my question above, most of it relying on Henrik's answer. Thank you Henrik.
library(ggplot2)
library(reshape2)
startdate <- as.POSIXct("2008-09-12 10:00:00")
enddate <- as.POSIXct("2011-04-26 23:45:00")
interval <- 1296000
Time <- seq(from = startdate, by = interval/2, to = enddate)
set.seed(1)
timeframe <- data.frame(Time, V1 = abs(rnorm(length(Time))), V2 =
abs(rnorm(length(Time))), V3 = abs(rnorm(length(Time))), V4 =
abs(rnorm(length(Time))), V5 = abs(rnorm(length(Time))), V6 =
abs(rnorm(length(Time))))
df <- melt(timeframe, id.vars = "Time")
# create year and month variables
df$year <- format(df$Time, "%Y")
df$month <- format(df$Time, "%b")
# select data for lines
d1 <- df[!df$variable %in% c("V5", "V6"), ]
# select data for points
d2 <- df[df$variable %in% c("V5", "V6"), ]
# rename V5 and V6 to place them in correct panels
d2$variable[d2$variable == "V5"] <- "V2"
d2$variable[d2$variable == "V6"] <- "V3"
Source for the code below:
Selecting and plotting months in ggplot2
# separate plot for each month
for (u in unique(df$month)) {
p <- ggplot() + geom_line(data = d1[format(d1$Time,"%b")==u, ], aes(x =
Time, y = value)) + geom_point(data = d2[format(d2$Time,"%b")==u, ], aes(x= Time,
y = value, color = variable)) + facet_grid(variable ~ month + year, scale = "free")
+ scale_color_manual(values = c("green", "red"), name = "Legend",
labels = c("V5", "V6"))
print(p)
}

Resources