I have the following code:
file = ""
skdat <- read.csv(file, head=T, sep=",", dec=".", stringsAsFactors = F)
colnames(skdat) <- c("ID", "Date", "WaterLevel", "Grade1", "Symbol1",
"QAQC-1", "DischargeDebit", "Grade2", "Symbol2",
subds <- subset(skdat, ID=='05AH050')
subds$datetime1 <- as.numeric(as.POSIXct(subds$Date))
subds[1:10, ]
ggplot(aes(x = datetime1, y = "WaterLevel"), data = subds) + geom_line()
Is there a way I can show just the time in 2 hr intervals on the Y axis?

Unclear what do you mean by "time in 2 hr intervals on the Y axis" because time is on the x-axis. Here is an example to change the breaks to 2 hours on the x-axis. The datetime1 need to be in POSIXct class.
subds$datetime1 <- as.POSIXct(subds$Date)
ggplot(aes(x = datetime1, y = WaterLevel), data = subds) +
geom_line() +
scale_x_datetime(breaks = date_breaks("2 hours"),
labels = date_format("%H:%M"))


Trying to plot multiple indexed prices of cryptocurrencies with different dates

I'm trying to create a nice graph of indexed prices for a few currencies so I can track relative performance from origin for different projects and price-levels.
Below is my dummy code. I've tried a lot of things but this is as far as I got...
R plot of the orignal code: prices of HEX and BTC
I wish to add other currencies as I go along.
In the end it is just a data frame with multiple columns that all need to start on the same point, the timestamp is irrelevant and I could plot only the series or shift them all to start on the same location.
This is what I'm trying to achieve:
Indexed prices of projects starting at same origin
# Dummy data that recreates my problem - two frames with different starting dates and an indexed value of the closing price.
n1 <- 366
dat1 <- data.frame(timestamp=seq.Date(as.Date("2012-12-26"), as.Date("2013-12-26"), "day"),
index.btc=seq(from = 1, to = n1, by=1, replace=TRUE)
dat2 <- data.frame(timestamp=seq.Date(as.Date("2013-12-26"), as.Date("2014-12-26"), "day"),
index.hex=seq(from = 1, to = n1, by=1, replace=TRUE)
# Merging data
jointdataset2 <- merge(dat1, dat2, by = 'timestamp', all = TRUE)
# Creating plottable data with melt function
jointdataset_plot <- melt(jointdataset2 , id.vars = 'timestamp', = 'project')
# plot on same grid, each series colored differently --
# good if the series have same scale (they have but different starting date)
ggplot(jointdataset_plot, aes(timestamp,value)) +
geom_line(aes(colour = project)) +
# Can also plot like this
ggplot() + geom_line(data = dat1, aes(timestamp,index.btc),
color = "blue",
size = 1) +
geom_line(data = dat2, aes(timestamp,index.hex),
color = "red",
size = 1) +
labs(x = "Time",
y = "Indexed Price",
title ="Indexed historical price (daily close index)",
subtitle = "Candlesticks - data by") +
scale_x_date(date_labels = "%Y (%b)", date_breaks = "1 year", date_minor_breaks = "1 month") +
scale_y_log10() +
If I remove the timestamps, and remove N/As from one of the data frames, would I then be able to create an ID column in both frames (starting at 1, same counter) and merging them both at ID counter 1 so origins align?
Your sample data overlaps, so I've changed dat2:
n1 <- 366
n2 <- 500
dat1 <- data.frame(timestamp=seq.Date(as.Date("2012-12-26"), as.Date("2013-12-26"), "day"),
index.btc=seq(from = 1, to = n1, by=1, replace=TRUE))
dat2 <- data.frame(timestamp=seq.Date(as.Date("2013-12-26"), as.Date("2014-12-26"), "day"),
index.hex=seq(from = 1, to = n2, length.out=n1))
full_join(dat1,dat2) %>%
pivot_longer(-timestamp, names_to = "index", values_to = "price") %>%
filter(! %>%
group_by(index) %>%
mutate(timestamp = as.integer(timestamp - min(timestamp))) -> plotdata
ggplot(plotdata, aes(x = as.integer(timestamp),
y = price, color = index)) +
geom_line() +
labs(x = "Time (Days)",
y = "Indexed Price",
title ="Indexed historical price (daily close index)",
subtitle = "Candlesticks - data by") +
scale_y_log10() +
n1 <- 366
dat1 <- data.frame(timestamp=seq.Date(as.Date("2012-12-26"), as.Date("2013-12-26"), "day"),
index.btc=cumsum(sample(-2:10, n1, replace=TRUE))
dat2 <- data.frame(timestamp=seq.Date(as.Date("2013-12-26"), as.Date("2014-12-26"), "day"),
index.hex=cumsum(sample(-2:10, n1, replace=TRUE))
dat1$timestamp<- seq(length(dat1$timestamp))
dat2$timestamp<- seq(length(dat2$timestamp))
# Merging data
jointdataset2 <- merge(dat1, dat2, by = 'timestamp', all = TRUE)
# Creating plottable data with melt function
jointdataset_plot <- melt(jointdataset2 , id.vars = 'timestamp', = 'project')
# plot on same grid, each series colored differently --
# good if the series have same scale (they have but different starting date)
ggplot(jointdataset_plot, aes(timestamp,value)) +
geom_line(aes(colour = project)) +
# Can also plot like this
ggplot() + geom_line(data = dat1, aes(timestamp,index.btc),
color = "blue",
size = 1) +
geom_line(data = dat2, aes(timestamp,index.hex),
color = "red",
size = 1) +
labs(x = "Time",
y = "Indexed Price",
title ="Indexed historical price (daily close index)",
subtitle = "Candlesticks - data by") +
scale_x_continuous() +
scale_y_log10() +

R ggplot How to plot a bar chart with different colours inside depicting 3 differents columns from the dataframe?

I am from a province which have 3 different areas.
I have a dataframe with all the days and the deaths from covid19 in whole the province. My idea its plot the data by weeks or month. The sum of these 7 days or 30 days. But I want to make the difference with 3 colours to depict the 3 different areas.
So this is my code. I can plot the total column. The 3 areas are called: alicante, valencia , castellon.
I donĀ“t know how to do it!
log <- read.csv('',stringsAsFactors = F,encoding = 'UTF-8')
colnames(log) <- c("code", "Date", "total", "hombres", "mujeres", "alicante", "castellon", "valencia", "dvinaros", "dcastellon", "dlaplana", "dsangunto", "dmalvarrosa", "dvilanova", "dlafe", "drequena", "dvalenciageneral", "dpeset", "dlaribera", "dgandia", "ddenia", "dxativa", "dalcoy", "dlamarina", "dsanjuan", "delda", "dalicantegeneral", "delchegeneral", "dorihuela", "dtorrevieja", "dmanises", "delchecrevillente" )
#log$Date <- as.Date(log$Date,
log$Date <- as.Date(log$Date,
"%Y-%m-%dT%H:%M:%S") # tabulate all the options here
# create variables of the week and month of each observation:
log$Mes <- as.Date(cut(log$Date,
breaks = "month"))
log$Week <- as.Date(cut(log$Date,
breaks = "week",
start.on.monday = FALSE)) # changes weekly break point to Sunday
# graph by month:
ggplot(data = log,
aes(Week, total, fill="Defunciones semanales")) +
stat_summary(fun.y = sum, # adds up all observations for the month
geom = "bar") +
labs(fill = "Color", y = "") +
#geom_text(aes(y = total,label = total), vjust=0, hjust= 0,size=4) +
labs(title = "Defunciones semanales en la Comunidad Valenciana hasta el 17 de Enero",
subtitle = " ") +
#labels = date_format( "%B"),
labels = date_format( "%d-%m"),
limits=c(as.Date("2020-03-01"), as.Date("2021-02-01")),
breaks = "1 week") + # custom x-axis labels
theme(axis.text.x=element_text(angle=60, hjust=1))
This more an issue in data wrangling than in plotting. To achieve your desired result reshape your data to long format using e.g. tidy::pivot_longer. Additionally set position to "stack" in stat_summary to stack the bars for the areas.
log <- read.csv("", stringsAsFactors = F, encoding = "UTF-8")
colnames(log) <- c("code", "Date", "total", "hombres", "mujeres", "alicante", "castellon", "valencia", "dvinaros", "dcastellon", "dlaplana", "dsangunto", "dmalvarrosa", "dvilanova", "dlafe", "drequena", "dvalenciageneral", "dpeset", "dlaribera", "dgandia", "ddenia", "dxativa", "dalcoy", "dlamarina", "dsanjuan", "delda", "dalicantegeneral", "delchegeneral", "dorihuela", "dtorrevieja", "dmanises", "delchecrevillente")
log$Date <- as.Date(
) # tabulate all the options here
log$Mes <- as.Date(cut(log$Date,
breaks = "month"
log$Week <- as.Date(cut(log$Date,
breaks = "week",
start.on.monday = FALSE
)) # changes weekly break point to Sunday
# select desired or needed variables and reshape to long format
log_area <- select(log, 1:2, 6:8, Mes, Week) %>%
pivot_longer(-c(code, Date, Mes, Week), names_to = "area")
# graph by month:
data = log_area,
aes(Week, value, fill = area)
) +
fun.y = sum, # adds up all observations for the month
geom = "bar",
position = "stack"
) +
fill = "Color", y = "",
title = "Defunciones semanales en la Comunidad Valenciana hasta el 17 de Enero",
subtitle = " "
) +
labels = date_format("%d-%m"),
limits = c(as.Date("2020-03-01"), as.Date("2021-02-01")),
breaks = "1 week"
) + # custom x-axis labels
theme(axis.text.x = element_text(angle = 60, hjust = 1))
#> Warning: `fun.y` is deprecated. Use `fun` instead.
#> Warning: Removed 87 rows containing non-finite values (stat_summary).
#> Warning: Removed 3 rows containing missing values (geom_bar).
Created on 2021-01-30 by the reprex package (v1.0.0)

How can I work with stat_density and a timeseries (Posixct on x axis)?

Based on this example:
#example from
cars <- ggplot(mtcars, aes(mpg,factor(cyl)))
cars + stat_density(aes(fill = after_stat(density)), geom = "raster", position = "identity")
I wanted to create a plot with the density plotted vertically per hour of my dataset. The original dataset is very long. I also want to display the single data points and a mean as a line.
Here is a simplified basic version of the code:
#reproducable example for density plot
startdate <- as.POSIXct("2020-01-01 01:00", tz="UTC")
enddate <- as.POSIXct("2020-01-01 05:00", tz="UTC")
df <- data.frame(x = seq.POSIXt(startdate, enddate, "hour"),
y1 = c(1,2,3,4,5),
y2 = c(2,4,6,8,10),
y3 = c(3,6,9,12,15))
df$mean <- rowMeans(df[,-1])
df_melt <- melt(df, id.vars = 1, measure.vars = c(2,3,4))
g1 <- ggplot(data = df_melt, aes(factor(x), value)) +
stat_density(aes(fill = after_stat(ndensity)),
geom = "raster", position = "identity", orientation = "y") +
This works, but the original dataset has so many hours that the labeling of the x axis is not nice. I also want to determine how the dateformat of the labels should look like and the limits of the plot. Before working with stat_density, I used to do that with scale_x_datetime. But for the density plot I have to use factor(x) instead of the original x, which is PosixcT. So the following scaling produces an error because x is a factor and not a date, obviously:
#scale x datetime (does not work)
g1 <- g1 + scale_x_datetime(labels = date_format("%b/%d", tz="UTC"),
limits = c(startdate, enddate),
breaks = function(x)
seq.POSIXt(from = startdate, to = enddate, by = "2 days"),
date_minor_breaks = "12 hours",
expand = c(0,0))
I managed to scale_x_discrete but this makes it hard to determine the label format and limits with the bigger dataset:
#scale x discrete
g1 <- g1 + scale_x_discrete(limits = c(as.character(df$x)),
breaks = as.character(df$x)[c(2,4)])
The next problem with factors is then that I cannot add the mean of every hour as geom_line as every factor consists of 1 observation only.
#plot mean
g1 + geom_point(data = df, aes(factor(x), mean), col = "red")
g1 + geom_line(data = df, aes(factor(x), mean), col = "red")
So, is there a way to produce the desired plot with density per hour, overplotted points and overplotted mean line? And I want to edit the x labels and limits as comfortably as possible. Maybe there is a way to use x instead of factor(x)...
I think the solution might be as simple as dropping the as.factor() and setting an explicit group in the density. Does the following work for your real case?
#> Warning: package 'scales' was built under R version 4.0.3
startdate <- as.POSIXct("2020-01-01 01:00", tz="UTC")
enddate <- as.POSIXct("2020-01-01 05:00", tz="UTC")
df <- data.frame(x = seq.POSIXt(startdate, enddate, "hour"),
y1 = c(1,2,3,4,5),
y2 = c(2,4,6,8,10),
y3 = c(3,6,9,12,15))
df$mean <- rowMeans(df[,-1])
df_melt <- melt(df, id.vars = 1, measure.vars = c(2,3,4))
ggplot(data = df_melt, aes(x, value)) +
stat_density(aes(fill = after_stat(ndensity),
group = x),
geom = "raster", position = "identity", orientation = "y") +
Created on 2021-01-29 by the reprex package (v0.3.0)

R ggplot2 x-axis not properly show

My question is in the following.
here is my raw data:
I use it to plot a line graph with gglot2.
The code is:
test <- read.csv("test.csv", header = T)
test_list <- split(test, test$type)
Rplot <- ggplot(test_list$A, aes(x=month, y=value, col=com))+geom_line()
My question:
how to let my x-axis to display like month(1,2,3....,12)?
and how can I combine year and month showing on x-axis at the same time.(Jan2016, Feb2016,.....) or (2016/1, 2016/2,.....)
very appreciate.
Create date_label column as Date class in the dataframe using ymd() from lubridate package.
library( 'lubridate' )
test_list <- lapply( test_list, function( x ) {
x$date_label <- ymd( paste( x$year,[ x$month ], 01, sep = "-"))
Manipulate x axis using scale_x_date()
# 1. date_labels = "%Y/%m" - 2016/01
# 2. date_labels = "%m" - 01
# 3. date_labels = "%b %Y" - Jan 2016
library( 'ggplot2' )
ggplot( data = test_list$A, aes( x = date_label, y = value, color = com, group = com ) ) +
geom_line( size = 3 ) +
scale_x_date(name = "month",
date_labels = "%m",
date_breaks = "1 month" )
ggplot( data = test_list$A, aes( x = date_label, y = value, group = com, color = com ) ) +
geom_line( size = 3 ) +
theme( axis.text.x = element_text( angle = 45, hjust = 1 )) +
scale_x_date(name = "month_year",
date_labels = "%b %Y",
date_breaks = "1 month" )
For the first case, you can convert month to be a factor. The following code would eliminate decimals in the month.
test$month <- as.factor(test$month)
test_list <- split(test, test$type)
Rplot <- ggplot(test_list$A, aes(x=month, y=value, col=com, group = 1))+geom_line()
To get numeric months you have to have a new column. I will create a new one using dplyr package and change that.
# make a new column by joining month after converting it to character and year.
test <- test %>% mutate(exactDate = paste([as.numeric(as.character(month))], year, sep=""))
# convert to factor
test$exactDate <- factor(test$exactDate, levels = c("Jan2016","Feb2016","Mar2016","Apr2016","May2016","Jun2016",
"Jul2016","Aug2016","Sep2016", "Oct2016", "Nov2016", "Dec2016"),
labels = c("Jan2016","Feb2016","Mar2016","Apr2016","May2016","Jun2016",
"Jul2016","Aug2016","Sep2016", "Oct2016", "Nov2016", "Dec2016"))
# Plot similarly
test_list <- split(test, test$type)
Rplot <- ggplot(test_list$A, aes(x=exactDate, y=value, col=com, group = 1))+geom_line()
# Rotate axis labels by 90 degrees
Rplot+ theme(axis.text.x=element_text(angle=90, hjust=1))

R - create legend and separate month plots with panel layers in ggplot

My goal is to create a separate 4-panel plot (right y-axis) for each of the 12 months (top x-axis) for the given years (top x-axis) in the data set. I also want to create a legend with the names of the points (V5 and V6) overlaid in 2 of the panels (V2 and V3). So instead of 1 large plot, there should be 12 plots.
After the reproducible example is my current plot.
Can you offer assistance on how to reach my goal?
Thank you.
startdate <- as.POSIXct("2008-09-12 10:00:00")
enddate <- as.POSIXct("2011-04-26 23:45:00")
interval <- 1296000
Time <- seq(from = startdate, by = interval/2, to = enddate)
timeframe <- data.frame(Time, V1 = abs(rnorm(length(Time))), V2 =
abs(rnorm(length(Time))), V3 = abs(rnorm(length(Time))), V4 =
abs(rnorm(length(Time))), V5 = abs(rnorm(length(Time))), V6 =
timeframe <- setDT(timeframe)
The following Month and Year functions are derived from the waterYear function in smwrBase.
Month <- function (x, numeric = FALSE)
x <- as.POSIXlt(x)
yr <- x$year + 1900L
mn <- x$mon + 1L
if (numeric)
Year <- function (x, numeric = FALSE)
x <- as.POSIXlt(x)
yr <- x$year + 1900L
mn <- x$mon + 1L
if (numeric)
# month
mn <- Month(timeframe$Time, numeric = TRUE)
# year
yr <- Year(timeframe$Time, numeric = TRUE)
The following plot method is derived from Add a geom layer for a single panel in a faceted plot
timeframe <- data.table(timeframe, mn, yr)
setnames(timeframe, 8:9, c("Month", "Year"))
setkey(timeframe, Time)
df1 <- setDF(timeframe[, list(Time, V1, Month, Year)])
df2 <- setDF(timeframe[, list(Time, V2, Month, Year)])
df3 <- setDF(timeframe[, list(Time, V3, Month, Year)])
df4 <- setDF(timeframe[, list(Time, V4, Month, Year)])
df5 <- setDF(timeframe[, list(Time, V5, Month, Year)])
df6 <- setDF(timeframe[, list(Time, V6, Month, Year)])
names(df1) <- c("Time", "value", "Month", "Year")
names(df2) <- c("Time", "value", "Month", "Year")
names(df3) <- c("Time", "value", "Month", "Year")
names(df4) <- c("Time", "value", "Month", "Year")
names(df5) <- c("Time", "value", "Month", "Year")
names(df6) <- c("Time", "value", "Month", "Year")
df1$panel <- "V1"
df2$panel <- "V2"
df3$panel <- "V3"
df4$panel <- "V4"
df5$panel <- "V2"
df6$panel <- "V3"
dff <- rbind(df1, df2, df3, df4)
p <- ggplot(data = dff, mapping = aes(x = Time, y = value))
p <- p + facet_grid(panel ~ Month + Year, scale = "free")
p <- p + layer(data = df1, geom = "line")
p <- p + layer(data = df2, geom = "line")
p <- p + layer(data = df5, geom = "point", colour = "green")
p <- p + layer(data = df3, geom = "line")
p <- p + layer(data = df6, geom = "point", colour = "red")
p <- p + layer(data = df4, geom = "line") +
scale_fill_manual(values=c("green", "red"), name="Legend",
labels=c("v5", "v6")) # this last part is my attempt at creating the legend
Assistance with the facet_grid came from
ggplot is generally much happier with the data in a long format. Thus, start by reshaping your data. Then it's rather straightforward to use one data set for the lines and one for the points, and map a variable to the color aesthetics for the points.
# melt data from wide to long format
df <- melt(timeframe, id.vars = "Time")
# create year and month variables
df$year <- format(df$Time, "%Y")
df$month <- format(df$Time, "%m")
# select data for lines
d1 <- df[!df$variable %in% c("V5", "V6"), ]
# select data for points
d2 <- df[df$variable %in% c("V5", "V6"), ]
# rename V5 and V6 to place them in correct panels
d2$variable[d2$variable == "V5"] <- "V2"
d2$variable[d2$variable == "V6"] <- "V3"
# plot
ggplot() +
geom_line(data = d1, aes(x = Time, y = value)) +
geom_point(data = d2, aes(x = Time, y = value, color = variable)) +
facet_grid(variable ~ month + year, scale = "free") +
scale_color_manual(values = c("green", "red"), name = "Legend",
labels = c("V5", "V6"))
This is the complete answer to my question above, most of it relying on Henrik's answer. Thank you Henrik.
startdate <- as.POSIXct("2008-09-12 10:00:00")
enddate <- as.POSIXct("2011-04-26 23:45:00")
interval <- 1296000
Time <- seq(from = startdate, by = interval/2, to = enddate)
timeframe <- data.frame(Time, V1 = abs(rnorm(length(Time))), V2 =
abs(rnorm(length(Time))), V3 = abs(rnorm(length(Time))), V4 =
abs(rnorm(length(Time))), V5 = abs(rnorm(length(Time))), V6 =
df <- melt(timeframe, id.vars = "Time")
# create year and month variables
df$year <- format(df$Time, "%Y")
df$month <- format(df$Time, "%b")
# select data for lines
d1 <- df[!df$variable %in% c("V5", "V6"), ]
# select data for points
d2 <- df[df$variable %in% c("V5", "V6"), ]
# rename V5 and V6 to place them in correct panels
d2$variable[d2$variable == "V5"] <- "V2"
d2$variable[d2$variable == "V6"] <- "V3"
Source for the code below:
Selecting and plotting months in ggplot2
# separate plot for each month
for (u in unique(df$month)) {
p <- ggplot() + geom_line(data = d1[format(d1$Time,"%b")==u, ], aes(x =
Time, y = value)) + geom_point(data = d2[format(d2$Time,"%b")==u, ], aes(x= Time,
y = value, color = variable)) + facet_grid(variable ~ month + year, scale = "free")
+ scale_color_manual(values = c("green", "red"), name = "Legend",
labels = c("V5", "V6"))
