Plot timelines without date - r

I have a carbon dioxide sensor that captures the concentration during working time.
Now I'd like to create a line plot that shows one line for each working day over time (from 8 am to 6 pm).
Some sample data:
co2data <- data.frame(
dateTime = c(
"2021-08-18 08:00:00",
"2021-08-18 09:00:00",
"2021-08-18 10:00:00",
"2021-08-18 11:00:00",
"2021-08-18 12:00:00",
"2021-08-18 13:00:00",
"2021-08-18 14:00:00",
"2021-08-18 15:00:00",
"2021-08-18 16:00:00",
"2021-08-18 17:00:00",
"2021-08-18 18:00:00",
"2021-08-19 08:00:00",
"2021-08-19 09:00:00",
"2021-08-19 10:00:00",
"2021-08-19 11:00:00",
"2021-08-19 12:00:00",
"2021-08-19 13:00:00",
"2021-08-19 14:00:00",
"2021-08-19 15:00:00",
"2021-08-19 16:00:00",
"2021-08-19 17:00:00",
"2021-08-19 18:00:00"
),
ppm = c(
400,
450,
456,
560,
670,
690,
810,
900,
600,
650,
700,
410,
470,
480,
590,
700,
710,
810,
900,
1010,
1000,
1100
)
)
Now I can plot the concentration over time, but I don't know how to plot times only on the x-axis (no dates).
co2data <- co2data %>% mutate(dateTime = as.POSIXct(dateTime))
co2data %>%
ggplot(aes(x = dateTime, y = ppm)) +
geom_line() +
labs(title = "CO2-Concentration", y = "CO2-concentration ppm", x = "Time")
How can I plot one line for each day?

With the help of the data.table package (or lubridate) you can extract temporal information form Date/Time strings.
require(data.table)
require(ggplot2)
setDT(co2data)
co2data[ , time := hour(as.ITime(dateTime)) ]
co2data[ , yday := as.factor(yday(as.IDate(dateTime))) ]
ggplot(co2data, aes(x = time, y = ppm, col = yday)) +
geom_line() +
labs(title = "CO2-Concentration", y = "CO2-concentration ppm", x = "Time") +
theme_bw()

Related

How to get records related to highest sessioncount in a given day

I have a App Insights table like below expected output is : need to select the processDate and its related fields which has highest aggregate value of sessionCount in that day (yellow highlighted rows are the expected result)
let da = datatable(id:int, processDate:datetime, message:string,col:string, sessionCount:int)
[
1,"2021-03-03 12:00:00", "a","aa",2,
1,"2021-03-03 12:00:00", "a","aa2",8,
1,"2021-03-03 09:00:00", "g","gg",20,
1,"2021-03-03 09:00:00", "g","g1",3,
1,"2021-03-03 15:00:00", "b","bb",9,
1,"2021-03-03 15:00:00", "b","bb1",1,
2,"2021-03-07 21:00:00", "f","ff",6,
2,"2021-03-07 21:00:00", "f","ff",2,
2,"2021-03-07 21:00:00", "abc","faf",21,
2,"2021-03-07 22:00:00", "abc","fav",25,
2,"2021-03-07 22:00:00", "z","zz",9
];
da
| summarize maxsessionCountperRun = sum(sessionCount) by processDate
;
expected out is below
let da = datatable(id:int, processDate:datetime, message:string,col:string, sessionCount:int)
[
1,"2021-03-03 12:00:00", "a","aa",2,
1,"2021-03-03 12:00:00", "a","aa2",8,
1,"2021-03-03 09:00:00", "g","gg",20,
1,"2021-03-03 09:00:00", "g","g1",3,
1,"2021-03-03 15:00:00", "b","bb",9,
1,"2021-03-03 15:00:00", "b","bb1",1,
2,"2021-03-07 21:00:00", "f","ff",6,
2,"2021-03-07 21:00:00", "f","ff",2,
2,"2021-03-07 21:00:00", "abc","faf",21,
2,"2021-03-07 22:00:00", "abc","fav",25,
2,"2021-03-07 22:00:00", "z","zz",9
];
da
| summarize sum(sessionCount) by processDate
| summarize arg_max(sum_sessionCount, processDate) by processDate_day = bin(processDate,1d)
| project-away processDate_day
sum_sessionCount
processDate
23
2021-03-03T09:00:00Z
34
2021-03-07T22:00:00Z
Fiddle

How to remove an error Discrete value in the chart

My code:
df1 <- data.frame(
time = as.POSIXct(c("2021-05-04 01:00:00", "2021-05-04 02:00:00",
"2021-05-04 03:00:00", "2021-05-04 04:00:00", "2021-05-04 05:00:00",
"2021-05-04 06:00:00", "2021-05-04 07:00:00", "2021-05-04 08:00:00")),
lng = c(15.363, 15.363, 15.363, 15.363 , 15.363, 15.363, 15.363, 15.363),
lat = c(51.232, 51.232, 51.232, 51.232, 51.232, 51.232, 51.232, 51.232),
name = c('aaa', 'bbb', 'ccc', 'ddd', 'eee', 'fff', 'ggg', 'hhh'),
intensity = c(0, 7, 20, 200, 500, 500, 600, 800))
przedzialy <- seq(0, 1200, length.out=9)
df1_new <- df1 %>%
mutate(zakresy = cut(intensity, breaks = przedzialy))
paleta_rgb <- c("#41786E", "#5AA03C", "#BED200", "#FFFA78", "#FCDC00",
"#F5BE00", "#F0A04B", "#E68246", "#E15F32", "#D2412D")
# intensity
ggplot() +
geom_point(data = df1_new, aes(lng, lat, color = zakresy), alpha = 0.5, show.legend = TRUE, size = 2) +
scale_color_gradientn(colors = paleta_rgb) +
coord_map() +
theme_void()
I am getting this error:
Discrete value supplied to continuous scale
Additionally, I don't know how to set the range for intensity to 0, currently I have NA and I would like to get ranges for the scale:
equal to 0
(0; 100>
(100; 200>
...
(1100; 1200>
Thanks in advance for your help.

How to change the x-values displayed in ggplot and ggplotly in R

I have a timeseries X, with related timestamps, and i want to provide a graph with the hourly values. The X-axis should not show the timestamp but rather the hour, however the plot should be generated with the timestamp as the x-axis
I've already tried to plot the X with the related timestamps (hours from 0-23) using the scale_x_datetime function. However the problem arises when trying to get the x-values to show 1-24. A problem arises when you go over midnight and end up with duplicated x-values
Timestamp <- c("2019-07-30 23:00:00", "2019-07-31 00:00:00", "2019-07-31 01:00:00", "2019-07-31 02:00:00", "2019-07-31 03:00:00", "2019-07-31 04:00:00", "2019-07-31 05:00:00", "2019-07-31 06:00:00",
"2019-07-31 07:00:00", "2019-07-31 08:00:00", "2019-07-31 09:00:00", "2019-07-31 10:00:00", "2019-07-31 11:00:00", "2019-07-31 12:00:00",
"2019-07-31 13:00:00", "2019-07-31 14:00:00", "2019-07-31 15:00:00", "2019-07-31 16:00:00", "2019-07-31 17:00:00", "2019-07-31 18:00:00",
"2019-07-31 19:00:00", "2019-07-31 20:00:00", "2019-07-31 21:00:00", "2019-07-31 22:00:00", "2019-07-31 23:00:00", "2019-08-01 00:00:00","2019-08-01 01:00:00")
col <- c(110,100,105,100,105,100,110,100,110,100,110,100,110,100,110,100,
110,100,110,100,110,100,110,105,110,105,110)
hour <- c(23,24,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,1)
Timestamp <- as.POSIXct(Timestamp, tz = "GMT")
library(plotly)
library(lubridate)
library(scales)
library(ggplot2)
data <- data.frame(Timestamp,col,hour)
data$Timestamp <- as.POSIXct(data$Timestamp)
ggplot(data = data, aes(x = Timestamp))+
geom_line(aes(y = col))+
scale_x_datetime(date_breaks = "1 hours",
date_minor_breaks = "1 hour",
labels = date_format("%H"))
The desired output is a ggplot/ggplotly with the hours displayed as the x-axis (1-24 and not 0-23).
Future work would also a timeseries with 1 minute resolution to be included in the plot
edited: added working code. I want the x-axis to be 1-24 and start over at 1 after 24
Here i found the answer. It was an evolution and addition to the scale_x_datetime function
Timestamp <- c("2019-07-30 23:00:00", "2019-07-31 00:00:00", "2019-07-31 01:00:00", "2019-07-31 02:00:00", "2019-07-31 03:00:00", "2019-07-31 04:00:00", "2019-07-31 05:00:00", "2019-07-31 06:00:00",
"2019-07-31 07:00:00", "2019-07-31 08:00:00", "2019-07-31 09:00:00", "2019-07-31 10:00:00", "2019-07-31 11:00:00", "2019-07-31 12:00:00",
"2019-07-31 13:00:00", "2019-07-31 14:00:00", "2019-07-31 15:00:00", "2019-07-31 16:00:00", "2019-07-31 17:00:00", "2019-07-31 18:00:00",
"2019-07-31 19:00:00", "2019-07-31 20:00:00", "2019-07-31 21:00:00", "2019-07-31 22:00:00", "2019-07-31 23:00:00", "2019-08-01 00:00:00","2019-08-01 01:00:00")
col <- c(110,100,105,100,105,100,110,100,110,100,110,100,110,100,110,100,
110,100,110,100,110,100,110,105,110,105,110)
hour <- c(23,24,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,1)
Timestamp <- as.POSIXct(Timestamp, tz = "GMT")
library(plotly)
library(lubridate)
library(scales)
library(ggplot2)
data <- data.frame(Timestamp,col,hour)
data$Timestamp <- as.POSIXct(data$Timestamp)
ggplot(data = data, aes(x = Timestamp))+
geom_line(aes(y = col))+
scale_x_datetime(date_breaks = "1 hours",
date_minor_breaks = "1 hour",
labels = function(x) ifelse(hour(as.POSIXct(x, origin = '1970-01-01'))==0, 24, hour(as.POSIXct(x, origin = '1970-01-01'))))

ggplot - adding vertical lines

Below is the data.table I am working with. I want to plot verticle lines whenever longSignal column is 1.
data.frame(
index = c("2011-09-09 17:00:00",
"2011-09-12 17:00:00",
"2011-09-13 17:00:00", "2011-09-14 17:00:00",
"2011-09-15 17:00:00", "2011-09-16 17:00:00", "2011-09-19 17:00:00",
"2011-09-20 17:00:00", "2011-09-21 17:00:00",
"2011-09-22 17:00:00", "2011-09-23 17:00:00", "2011-09-26 17:00:00",
"2011-09-27 17:00:00", "2011-09-28 17:00:00", "2011-09-29 17:00:00",
"2011-09-30 17:00:00", "2011-10-03 17:00:00",
"2011-10-04 17:00:00", "2011-10-05 17:00:00", "2011-10-06 17:00:00",
"2011-10-07 17:00:00"),
EURUSD.Close = c(1.36534, 1.367895, 1.36783, 1.37546, 1.38764, 1.38005,
1.36849, 1.37009, 1.35722, 1.346385, 1.35002, 1.353255,
1.35825, 1.35425, 1.359705, 1.33876, 1.31759, 1.33489, 1.33482,
1.34374, 1.33771),
longSignal = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0)
)
Here is the code that I was trying to work with in ggplot
ggplot(RSI_data, aes(index, EURUSD.Close)) +
geom_line() +
geom_vline(aes(xintercept = as.numeric(RSI_data$index[which(RSI_data$longSignal == 1)]),
size = 2, colour = "red"))
I have been getting error. Could anyone tell me how can I do this right? I
Thanks in advance!
You can try this:
# convert index to date-time format; this makes x-axis continuous rather than
# categorical, so you don't have to specify the group for geom_line.
RSI_data$index <- as.POSIXct(as.character(RSI_data$index))
ggplot(RSI_data,
aes(x = index, y = EURUSD.Close)) +
geom_line() +
geom_vline(data = subset(RSI_data, longSignal == 1), # filter data source
aes(xintercept = index),
size = 2, colour = "red")
If you put the xintercept term outside the aes() then it works. Also, you may get another error saying geom_path: Each group consists of only one observation. Do you need to adjust the group aesthetic? so the group=1 is there to stop that error
ggplot(RSI_data, aes(index,EURUSD.Close)) +
geom_line(group=1) +
geom_vline(xintercept = as.numeric(RSI_data$index[which(RSI_data$longSignal == 1)]),
size = 2, colour = "red")

Duplicate dates in xts object lead to wrong indexing

I have data with users and duplicated dates e.g. users accessing a web site.
Example:
require(zoo)
require(xts)
test <- structure(list(timestamp = c("2013-03-06 01:00:00", "2014-07-06 21:00:00",
"2014-07-31 23:00:00", "2014-08-09 17:00:00", "2014-08-14 20:00:00",
"2014-08-14 22:00:00", "2014-08-16 15:00:00", "2014-08-19 02:00:00",
"2014-12-28 18:00:00", "2015-01-17 17:00:00"), user = c(1, 2,
2, 3, 3, 3, 3, 3, 4, 4)), .Names = c("timestamp", "user"), row.names = c("220667",
"331481", "422653", "629430", "378111", "646137", "558638", "151641",
"599370", "482750"), class = "data.frame")
If I create an xts object and then access it with its own index I get different data. What am I doing wrong here ?
testXts <- xts(x=test,order.by = as.Date(test$timestamp))
testXts[index(testXts)]#Different (wrong) data. Why ?

Resources