geom_area plot with min and max values - r

I have been trying to plot min and max values of temperature. I actually wanted to plot using geom_area. My data can be downloaded from here.
library(dplyr)
library(ggplot2)
dat <- read.csv("energydata_complete.csv", stringsAsFactors = FALSE)
#renaming attributes meaningfully
#names(dat)[] <- 'temp_kitchen'
dat <- dat %>%
dplyr::rename('temp_kitchen'=T1,'temp_living'=T2,'temp_laundry'=T3,
'temp_office'=T4,'temp_bath'=T5,'temp_build'=T6,'temp_iron'=T7,
'temp_teen'=T8,'temp_parent'=T9,'hum_kitchen'=RH_1,'hum_living'=RH_2,
'hum_laundry'=RH_3,'hum_office'=RH_4,'hum_bath'=RH_5,'hum_build'=RH_6,
'hum_iron'=RH_7,'hum_teen'=RH_8,'hum_parent'=RH_9)
dat$month <- as.factor(months(dat$date))
dat$date <- strptime(dat$date, format = "%Y-%m-%d %H:%M:%S")
dat$date <- as.POSIXct(dat$date, format = "%Y-%m-%d %H:%M:%S")
I have created another dataframe with month and min and max temperature values of each room.
temparature <- dat %>% group_by(month) %>% dplyr::summarise(min_temp_kitch=min(temp_kitchen),
max_temp_kitch=max(temp_kitchen),
min_temp_living=min(temp_living),
max_temp_living=max(temp_living),
min_temp_laundry=min(temp_laundry),
max_temp_laundry=max(temp_laundry),
min_temp_iron=min(temp_iron),
max_temp_iron=max(temp_iron),
min_temp_office=min(temp_office),
max_temp_office=max(temp_office),
min_temp_bath=min(temp_bath),
max_temp_bath=max(temp_bath),
min_temp_parent=min(temp_parent),
max_temp_parent=max(temp_parent),
min_temp_teen=min(temp_teen),
max_temp_teen=max(temp_teen))
Now I am trying to plot min and max temperature values from this dataframe for each room.
Below code didn't give any plot.
ggplot() + geom_area(data = temparature,aes(x=month,y=min_temp_kitch), position = 'stack') +
geom_area(data = temparature,aes(x=month, y=max_temp_kitch), position = 'stack')
Tried to create with geom_ribbon as below.
ggplot(temparature) +
geom_ribbon(aes(x=month, ymin = min_temp_kitch, ymax = max_temp_kitch), color='blue', alpha = 0.5)
This has given
But I want a plot something similar to this with points for each value.
Can someone suggest how to do this please.

You don't need to change your dates to factor and need to make the temperature dataframe into long format :
library(dplyr)
library(ggplot2)
library(lubridate)
dat <- read.csv("energydata_complete.csv", stringsAsFactors = FALSE)
dat <- dat %>%
rename('temp_kitchen'=T1,'temp_living'=T2,'temp_laundry'=T3,
'temp_office'=T4,'temp_bath'=T5,'temp_build'=T6,'temp_iron'=T7,
'temp_teen'=T8,'temp_parent'=T9,'hum_kitchen'=RH_1,'hum_living'=RH_2,
'hum_laundry'=RH_3,'hum_office'=RH_4,'hum_bath'=RH_5,'hum_build'=RH_6,
'hum_iron'=RH_7,'hum_teen'=RH_8,'hum_parent'=RH_9) %>%
mutate(month = floor_date(date(date), unit = 'months'))
temparature <- dat %>%
group_by(month) %>%
summarise(min_temp_kitch=min(temp_kitchen),
max_temp_kitch=max(temp_kitchen),
min_temp_living=min(temp_living),
max_temp_living=max(temp_living),
min_temp_laundry=min(temp_laundry),
max_temp_laundry=max(temp_laundry),
min_temp_iron=min(temp_iron),
max_temp_iron=max(temp_iron),
min_temp_office=min(temp_office),
max_temp_office=max(temp_office),
min_temp_bath=min(temp_bath),
max_temp_bath=max(temp_bath),
min_temp_parent=min(temp_parent),
max_temp_parent=max(temp_parent),
min_temp_teen=min(temp_teen),
max_temp_teen=max(temp_teen))
temp2 <- temparature %>%
tidyr::gather(temp_min_max, Temp, -month)
ggplot() +
geom_area(data = temp2 %>%
filter(temp_min_max %in% c('min_temp_kitch', 'max_temp_kitch')),
aes(x=month,y=Temp,fill = temp_min_max, color = temp_min_max),
position = 'identity')

Related

How can I create multiple plots from same dataset in R?

Let me first share a dummy data, from which I want to prepare ggplot graphs.
library(tidyverse)
set.seed(1)
sample_size <- 1200
dates <- sample(seq(1,31),sample_size,replace = TRUE)
Monthss <- sample(seq(1,12),sample_size,replace = TRUE)
hrs <- sample(seq(1,23),sample_size,replace = TRUE)
minutes <- sample(seq(1,59),sample_size,replace = TRUE)
date_time_vector <- paste0(dates,"-",Monthss,"-",2022," ",hrs,":",minutes) |> lubridate::parse_date_time("dmy HM")
Conversion <- sample(c(TRUE,FALSE),sample_size, prob = c(0.25,0.75), replace = TRUE)
df <- data.frame(Date = date_time_vector, Conversion_Status = Conversion)
df <- df |> mutate(Leads = round(runif(sample_size, min = 0,max = 10),digits = 0))
df <- df[complete.cases(df), ]
The code above gives me a data.frame with columns Date, Leads and Conversion_Status. I want to prepare Monthly column chart of total leads per day. (For example, daily leads in January, daily leads in February, etc.) So, basically, I will need to split the data on the basis of Month, and prepare one chart for each month. How can I prepare such charts?
I have tried following way:
bar_function <- function(df, col1, col2, title) {
df %>%
ggplot2::ggplot(aes(x = {{col1}}, y = {{col2}})) +
ggplot2::geom_col(fill = "steelblue") +
theme(plot.background = element_rect(fill = "white")) +theme(plot.title = element_text(hjust = 0.5))+coord_flip() +
ggplot2::labs(title = title)
}
mycharts <- df |> dplyr::nest_by(Month) |> dplyr::mutate(plot = bar_function(df,Date,Leads,"Daily Leads by Month"))
But it is giving me errors.
You can split according to month(year) and plot that.
library(ggplot2)
library(lubridate)
set.seed(1)
sample_size <- 1200
dates <- sample(seq(1,31),sample_size,replace = TRUE)
Monthss <- sample(seq(1,12),sample_size,replace = TRUE)
hrs <- sample(seq(1,23),sample_size,replace = TRUE)
minutes <- sample(seq(1,59),sample_size,replace = TRUE)
date_time_vector <- paste0(dates,"-",Monthss,"-",2022," ",hrs,":",minutes) |> lubridate::parse_date_time("dmy HM")
Conversion <- sample(c(TRUE,FALSE),sample_size, prob = c(0.25,0.75), replace = TRUE)
df <- data.frame(Date = date_time_vector, Conversion_Status = Conversion)
df$Leads <- round(runif(sample_size, min = 0,max = 10),digits = 0)
df <- df[complete.cases(df), ]
df$month_year <- strftime(df$Date, format = "%m-%Y")
df.split <- split(df, f = df$month_year)
out <- vector("list", length(df.split))
names(out) <- names(df.split)
for (i in seq_along(df.split)) {
out[[i]] <- ggplot(data = df.split[[i]], mapping = aes(x = Date, y = Leads)) +
geom_col(fill = "steelblue") +
theme(plot.background = element_rect(fill = "white")) +
theme(plot.title = element_text(hjust = 0.5))+
coord_flip() +
labs(title = "Daily leads by month")
}
To plot you can just print e.g. out[[1]].
If you want to change the desired columns dynamically, you can use aes_string for mapping. This can naturally be wrapped into sapply and there are probably other ways of approaching the problem. The for loop is pretty agnostic and I find that it's readable even by people who do not dabble in R (compared to say sapply).
There are some issues with your code. First, your dataset has no Month column, i.e. you have to add it for which I use lubridate::month. Second, you are passing the dataset df to your bar function instead of the splitted data column from your nested df. Third, in the mutate step you have to wrap the result in list():
library(ggplot2)
library(dplyr, warn=FALSE)
mycharts <- df |>
nest_by(Month = lubridate::month(Date)) |>
mutate(plot = list(bar_function(data, Date, Leads, "Daily Leads by Month")))
mycharts$plot[[1]]
mycharts$plot[[5]]
I finally found an answer. I used following code:
lapply(split(df, df$Month),
function(x)
ggplot(x, aes(x=Date, y=Leads)) +
geom_col(fill = "steelblue") + coord_flip()+
ggtitle(x$Month[1]))
Thank you all for your support.

Order facet by semester and year

I am trying to order a ggplot graph using facet_grid, example:
Sorry , I know that my data its no logic but its ok to show my problem with the facets...
In my real data I dont have this data, only i use this block below to get my data in this example:
################ only to get data in my example ######################
set.seed(12345)
Date <- seq(as.Date("2010/1/1"), as.Date("2013/1/1"), "6 months" )
Y <- rnorm(n=length(Date), mean=100, sd=1)
df <- data.frame(Date, Y)
df$Year <- format(df$Date, "%Y")
df$Sem <- format(df$Date, "%b")
df$Sem <- gsub("ene.", "1S",df$Sem )
df$Sem <- gsub("jul.", "2S",df$Sem )
df$MonthYear <- format(df$Date, "%b-%Y")
############## o #################
Variables that are in my real data are:
Sem, Semester and Y
I extract the year to sort in my second try:
df=df %>%
mutate(extract_year= sub('.*(\\d{4}).*', '\\1', MonthYear))
df$Semester=paste0(df$Sem,df$extract_year)
#Here I tried to sort the facet but I failed:
df2 <- within(df,Semester <- ordered(Semester, levels = rev(sort(unique(Semester)))))
df2 %>%
ggplot(aes(y = Y )) +
geom_bar() +
facet_grid(Semester~.)
I tried also with:
df2 %>%
arrange(extract_year) %>%
ggplot(aes(y = Y )) +
geom_bar() +
facet_grid(Semester~.)
I would like get this order:
1S2010/2S2010/1S2011/2S2011/1S2012/2S2012
Basically you have to set the order of the levels in your desired order. One option to achieve that would be to arrange the data in your desired order and make use of forcats::fct_inorder to set the order of the levels of your Semester variable.
library(ggplot2)
library(dplyr)
## Make Semester variable in the desired format
df <- mutate(df, Sem = recode(Sem, Jan = "1S", Jul = "2S"),
Semester = paste0(Sem, Year))
# Arrange and set order
df <- df %>%
arrange(gsub("^(\\dS)(\\d{4})$", "\\2\\1", Semester)) %>%
mutate(Semester = forcats::fct_inorder(Semester))
df %>%
ggplot(aes(y = Y )) +
geom_bar() +
facet_grid(Semester~.)

Using spectrum function to find peaks with same intervals

I have to analyze data and find instances that have higher values repeating through same intervals. Example:
I am trying to use spectrum function but it gives me weird result.
Here is my example data, I inserted outliers every 1 hour into it.
library(dplyr)
library(lubridate)
library(ggplot2)
set.seed(900)
data1 <-
data.frame(
datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*50001, "15 sec"),
Value = sample(1:10, 50002, replace = T),
Instance = "A"
)
data1.1 <- data.frame(
datetime= seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*50001, "hour"),
Value = sample(10:100, 209, replace = T),
Instance = "A"
)
data1 <- rbind(data1, data1.1) %>% group_by(datetime, Instance) %>% summarise(Value = max(Value)) %>% ungroup()
ggplot(data1, aes(x=datetime, y=Value, color = Instance)) +
geom_point()
spect <- spectrum(data1$Value, log="no", spans=c(5,5), plot=FALSE)
delta <- 1/4
specx <- spect$freq/delta
specy <- 2*spect$spec
plot(specx, specy, xlab="Period (minutes)", ylab="Spectral Density", type="l")
I expected to get spectral graph where spectral density will show me 60 minutes. But that's what I got:
How to find peaks repeating intervals (60 minutes in my example)?

ggplot using grouped date variables (such as year_month)

I feel like this should be an easy task for ggplot, tidyverse, lubridate, but I cannot seem to find an elegant solution.
GOAL: Create a bar graph of my data aggregated/summarized/grouped_by year and month.
#Libraries
library(tidyverse)
library(lubridate)
# Data
date <- sample(seq(as_date('2013-06-01'), as_date('2014-5-31'), by="day"), 10000, replace = TRUE)
value <- rnorm(10000)
df <- tibble(date, value)
# Summarise
df2 <- df %>%
mutate(year = year(date), month = month(date)) %>%
unite(year_month,year,month) %>%
group_by(year_month) %>%
summarise(avg = mean(value),
cnt = n())
# Plot
ggplot(df2) +
geom_bar(aes(x=year_month, y = avg), stat = 'identity')
When I create the year_month variable, it naturally becomes a character variable instead of a date variable. I have also tried grouping by year(date), month(date) but then I can't figure out how to use two variables as the x-axis in ggplot. Perhaps this could be solved by flooring the dates to the first day of the month...?
You were really close. The missing pieces are floor_date() and scale_x_date():
library(tidyverse)
library(lubridate)
date <- sample(seq(as_date('2013-06-01'), as_date('2014-5-31'), by = "day"),
10000, replace = TRUE)
value <- rnorm(10000)
df <- tibble(date, value) %>%
group_by(month = floor_date(date, unit = "month")) %>%
summarize(avg = mean(value))
ggplot(df, aes(x = month, y = avg)) +
geom_bar(stat = "identity") +
scale_x_date(NULL, date_labels = "%b %y", breaks = "month")

Time series multiple plot for different group in R

I have a large data frame of several variables (around 50) with first column as date and second column id.
My data roughly look like this:
df <- data.frame(date = c("01-04-2001 00:00","01-04-2001 00:00","01-04-2001 00:00",
"01-05-2001 00:00","01-05-2001 00:00","01-05-2001 00:00",
"01-06-2001 00:00","01-06-2001 00:00","01-06-2001 00:00",
"01-07-2001 00:00","01-07-2001 00:00","01-07-2001 00:00"),
id = c(1,2,3,1,2,3,1,2,3,1,2,3), a = c(1,2,3,4,5,6,7,8,9,10,11,12),
b = c(2,2.5,3,3.2,4,4.6,5,5.6,8,8.9,10,10.6))
I want time series plots for all three ids separately in same graph of variables, a and b in different graphs.
I tried ggplot but it didn't work. Please help me
Do you mean something like this?
library(reshape)
library(lattice)
df2 <- melt(df, id.vars = c("date", "id"), measure.vars = c("a", "b"))
xyplot(value ~ date | variable, group = id, df2, t='l')
Addendum
# The following is from a comment by jbaums.
# It will create a single plot/file for each variable of df2
png('plots%02d.png')
xyplot(value ~ date | variable, group = id, df2, t='l', layout=c(1, 1),
scales=list(alternating=FALSE, tck=1:0))
dev.off()
You can also add relation='free' to scales so that y-axis limits are calculated separately for each plot.
Edit: After reading the comments, maybe you should try something like this:
library(tidyr)
df2 <- gather(df, variable, value, -date, -id)
vars <- unique(df2$variable)
library(ggplot2)
for (i in 1:length(vars)) {
ggplot() +
geom_line(data = subset(df2, variable == vars[[i]]),
aes(date, value, group = id, color = factor(id))) +
ylab(as.character(vars[[i]])) +
ggsave(file = paste0(vars[[i]], ".png"))
}
This should save a PNG for each variable in your dataframe (and will change y label of every plot to variable name, as per your request)
Here's how to do it in ggplot, using the tidyr package to get it in the right format:
library(ggplot2)
library(tidyr)
library(dplyr)
df <- data.frame(date = c("01-04-2001 00:00","01-04-2001 00:00","01-04-2001 00:00",
"01-05-2001 00:00","01-05-2001 00:00","01-05-2001 00:00",
"01-06-2001 00:00","01-06-2001 00:00","01-06-2001 00:00",
"01-07-2001 00:00","01-07-2001 00:00","01-07-2001 00:00"),
id = c(1,2,3,1,2,3,1,2,3,1,2,3), a = c(1,2,3,4,5,6,7,8,9,10,11,12),
b = c(2,2.5,3,3.2,4,4.6,5,5.6,8,8.9,10,10.6))
Then using dplyr's group_by and do functions, we can save multiple plots.
df %>%
gather(variable, value, -date, -id) %>%
mutate(id = factor(id)) %>%
group_by(variable) %>%
do(
qplot(data = ., x = date, y = value, geom = "line", group = id, color = id, main = paste("variable =", .$variable)) +
ggsave(filename = paste0(.$variable, ".png")
)
)

Resources