Related
I am trying to convert a character column containing month names into a numerical column containing numbers (1=January, etc.).
Example 1. This works:
df2 <- structure(list(Month = c("January", "January", "March", "March", "April")), class = "data.frame", row.names = c(NA, -5L))
df2 %>%
group_by(Month) %>%
mutate(Month = which(Month == month.name)) -> df2
Example 2. This does not work (error: "Month must be size 31 or 1, not 2."):
df <- structure(list(Month = c("August", "August", "August", "August",
"August", "August", "August", "August", "August", "August", "August",
"August", "August", "August", "August", "August", "August", "August",
"August", "August", "August", "August", "August", "August", "August",
"August", "August", "August", "August", "August", "August", "December",
"December", "December", "December")), class = "data.frame", row.names = c(NA, -35L))
df %>%
group_by(Month) %>%
mutate(Month = which(Month == month.name)) -> df
The code for converting is the same in both cases. Why doesn't it work in the second example? I can't get my head around it.
I am trying to plot temperature over time (in the form of a date), however, I am not sure how to.
See here my original table in Excel:
Or as R code:
dput(Average_temperature_period)
structure(list(Sample = c("ZS_IG_1", "AK_SN_1", "JP_IG_2", "AW_IG_1",
"SBB_SN_1", "AW_IG_2", "JvH_IG_3", "JvH_IG_2", "SBB_SN_4", "SBB_SN_3",
"SBB_SN_2", "EF_SN_1", "JP_IG_2", "JvH_IG_3", "EF_SN_1", "JvH_IG_2",
"AK_SN_1", "ZS_IG_1", "AW_IG_1", "SBB_SN_1", "AW_IG_2", "SBB_SN_4",
"SBB_SN_3", "SBB_SN_2"), Sampling_date = c("23/03/2022", "24/03/2022",
"25/03/2022", "25/03/2022", "25/03/2022", "25/03/2022", "29/03/2022",
"29/03/2022", "01/04/2022", "01/04/2022", "01/04/2022", "12/04/2022",
"25/04/2022", "26/04/2022", "28/04/2022", "29/04/2022", "03/05/2022",
"04/05/2022", "10/05/2022", "10/05/2022", "11/05/2022", "11/05/2022",
"12/05/2022", "12/05/2022"), Period = c("March", "March", "March",
"March", "March", "March", "March", "March", "March", "March",
"March", "March", "AprilMay", "AprilMay", "AprilMay", "AprilMay",
"AprilMay", "AprilMay", "AprilMay", "AprilMay", "AprilMay", "AprilMay",
"AprilMay", "AprilMay"), Average_temperature_field = c(7.137037037,
6.966666667, 10.55555556, 7.281481481, 6.874074074, 9.211111111,
9.662962963, 8.12962963, 6.707407407, 6.774074074, 7.162962963,
8.114814815, NA, 11.74814815, 13.51111111, 11.29259259, 15.4962963,
NA, 15.45925926, 17.14814815, 17.72592593, 15.84074074, 16.85555556,
19.78148148), Average_moisture_field = c(33.48518519, 47.35555556,
32.54814815, 34.01851852, 38.66666667, 31.71851852, 23.54814815,
26.83333333, 42.47777778, 29.45555556, 44.50740741, 40.27407407,
25.77407407, 18.91481481, 26.67777778, 16.27407407, 25.38518519,
19.9962963, 18.27777778, 16.14074074, 22.86666667, 23.48518519,
13.93703704, 20.92222222)), row.names = c(NA, 24L), class = "data.frame")
See here my code in R thus far:
##### Soil temperature graph
Average_temperature_period <- read.csv("~/Desktop/First Internship/MicroResp/R/R script/Average_temperature_period.csv")
Average_temperature_period$Sampling_date <- as.character(Average_temperature_period$Sampling_date)
Average_temperature_period <- Average_temperature_period[c(1:24),c(1:5)]
# Change order x axis (past to present)
Average_temperature_period$Sampling_date <- factor(Average_temperature_period$Sampling_date, levels = c("23/03/22","24/03/22","25/03/22","29/03/22","01/04/22","12/04/22","25/04/22","26/04/22","28/04/22","29/04/22","03/05/22","04/05/22","10/05/22","11/05/22","12/05/22"))
# Plot average temperature against the date
ggplot(data=Average_temperature_period, aes(x=Sampling_date, y=Average_temperature_field)) +
geom_smooth(method = "lm", se=FALSE, color="black", aes(group=1)) +
theme_classic() +
ylab("Average soil temperature (°C)") +
xlab("Sampling date")
The x axis keeps on showing 'NA' for the sampling date. Does anyone know why and how to fix it? I would like to have the x axis in order of date (past to present).
Update with the new data and request of OP:
adding this line drop_na(Average_temperature_field) %>%
library(tidyverse)
library(lubridate)
df %>%
drop_na(Average_temperature_field) %>%
mutate(Sampling_date = dmy(Sampling_date)) %>%
group_by(Sampling_date) %>%
summarise(avg_temp_day = mean(Average_temperature_field,na.rm = TRUE)) %>%
ggplot(aes(x = Sampling_date, y=avg_temp_day))+
geom_point()+
geom_line()+
scale_x_date(date_labels="%d %b",date_breaks ="1 day")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))
First answer:
Here is one way to do it. You have sometimes two temperatures per day so I used the mean for this day:
library(tidyverse)
library(lubridate)
df %>%
mutate(Sampling_date = dmy(Sampling_date)) %>%
group_by(Sampling_date) %>%
summarise(avg_temp_day = mean(Average_temperature_field,na.rm = TRUE)) %>%
ggplot(aes(x = Sampling_date, y=avg_temp_day))+
geom_point()+
geom_line()+
scale_x_date(date_labels="%d %b",date_breaks ="2 day")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))
data:
df <- structure(list(Sample = c("ZS_IG_1", "AK_SN_1", "JP_IG_2", "AW_IG_1",
"SBB_SN_1", "AW_IG_2", "JvH_IG_3", "JvH_IG_2", "SBB_SN_4", "SBB_SN_3",
"SBB_SN_2", "EF_SN_1", "JP_IG_2", "JvH_IG_3", "EF_SN_1", "JvH_IG_2",
"AK_SN_1", "ZS_IG_1", "AW_IG_1", "SBB_SN_1", "AW_IG_2", "SBB_SN_4",
"SBB_SN_3", "SBB_SN_2"), Sampling_date = c("23/03/2022", "24/03/2022",
"25/03/2022", "25/03/2022", "25/03/2022", "25/03/2022", "29/03/2022",
"29/03/2022", "01/04/2022", "01/04/2022", "01/04/2022", "12/04/2022",
"25/04/2022", "26/04/2022", "28/04/2022", "29/04/2022", "03/05/2022",
"04/05/2022", "10/05/2022", "10/05/2022", "11/05/2022", "11/05/2022",
"12/05/2022", "12/05/2022"), Period = c("March", "March", "March",
"March", "March", "March", "March", "March", "March", "March",
"March", "March", "AprilMay", "AprilMay", "AprilMay", "AprilMay",
"AprilMay", "AprilMay", "AprilMay", "AprilMay", "AprilMay", "AprilMay",
"AprilMay", "AprilMay"), Average_temperature_field = c(33.48518519,
47.35555556, 32.54814815, 34.01851852, 38.66666667, 31.71851852,
23.54814815, 26.83333333, 42.47777778, 29.45555556, 44.50740741,
40.27407407, 25.77407407, 11.74814815, 13.51111111, 11.29259259,
15.4962963, 19.9962963, 15.45925926, 17.14814815, 17.72592593,
15.84074074, 16.85555556, 19.78148148), Average_moisture_field = c(7.137037037,
6.966666667, 10.55555556, 7.281481481, 6.874074074, 9.211111111,
9.662962963, 8.12962963, 6.707407407, 6.774074074, 7.162962963,
8.114814815, NA, 18.91481481, 26.67777778, 16.27407407, 25.38518519,
NA, 18.27777778, 16.14074074, 22.86666667, 23.48518519, 13.93703704,
20.92222222)), class = "data.frame", row.names = c(NA, -24L))
I was able to get the bar plots with the below code. However I am trying to add secondary axis to Semi-Final column as a line chart. Can we add this ?
mobility_aus_sum <- structure(list(Year = c(2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020, 2020, 2020, 2021, 2021, 2021, 2021, 2021, 2021, 2021,
2021, 2021, 2021, 2021, 2021, 2022, 2022), mon_day = c("April",
"August", "December", "February", "July", "June", "March", "May",
"November", "October", "September", "April", "August", "December",
"February", "January", "July", "June", "March", "May", "November",
"October", "September", "February", "January"), Final = c(-1483,
-912, -405, -232, -698, -739, -633, -1125, -540, -738, -802,
-482, -1012, -260, -607, -677, -827, -549, -509, -440, -326,
-659, -871, -480, -639), `Semi-Final` = c(-1333, -762, -255,
-82, -548, -589, -483, -975, -390, -588, -652, -332, -862, -110,
-457, -527, -677, -399, -359, -290, -176, -509, -721, -330, -489
)), row.names = c(NA, -25L), groups = structure(list(Year = c(2020,
2021, 2022), .rows = structure(list(1:11, 12:23, 24:25), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
dates <- with(mobility_aus_sum, as.Date(paste(Year, mon_day, 1), "%Y %B %d"))
df2 <- mobility_aus_sum[order(dates),]
df3 <- ts(df2$Final, start = c(df2$Year[1], match(df2$mon_day[1], month.name)),
frequency = 12)
dygraph(df3) %>%
dyRangeSelector() %>%
dyBarChart()
You may try(You need to download barseries.js)
library(dplyr)
library(dygraphs)
library(tibble)
dyBarSeries <- function(dygraph, name, ...) {
file <- "D:/barseries.js" #you need to link to the downloaded file
plotter_ <- paste0(readLines(file, skipNul = T), collapse = "\n")
dots <- list(...)
do.call('dySeries', c(list(dygraph = dygraph, name = name, plotter =
plotter_), dots))
}
mobility_aus_sum %>%
mutate(mon_day = match(mon_day, month.name)) %>%
rowwise %>%
mutate(dates = paste0(c(Year, mon_day, 1), collapse = "-")) %>%
mutate(dates = as.Date(dates, "%Y-%m-%d")) %>%
ungroup %>%
select(-mon_day, -Year) %>%
column_to_rownames(var = 'dates') %>%
dygraph(.) %>%
dyAxis("y", label = "Final", valueRange = c(-1500, -200), independentTicks = TRUE) %>%
dyAxis("y2", label = "Semi-Final ", valueRange = c(-1400, 0), independentTicks = TRUE) %>%
dyBarSeries("Final") %>%
dySeries("Semi-Final", axis=('y2')) %>%
dyRangeSelector()
I'm coding an interactive map in R with Shiny and Leaflet. I programmed 1 select all button for the months (checkGroup) and it worked fine, but adding the select all button for the other inputs has caused none of the map to work properly.
#import data
data <- structure(list(Area = c("Scarborough", "Etobicoke", "East York",
"North York", "North York", "Etobicoke", "Downtown Core (Central)",
"York", "Downtown Core (Central)", "York"), occurrenceyear = c(2017L,
2018L, 2018L, 2018L, 2018L, 2018L, 2017L, 2018L, 2018L, 2018L
), occurrencemonth = structure(c(12L, 5L, 5L, 5L, 5L, 5L, 6L,
12L, 12L, 12L), .Label = c("", "April", "August", "December",
"February", "January", "July", "June", "March", "May", "November",
"October", "September"), class = "factor"), Long = c(-79.1886063,
-79.5458221, -79.3138199, -79.4392548, -79.4406738, -79.5390091,
-79.3820572, -79.4840012, -79.3930817, -79.4356079), Lat = c(43.7639694,
43.5895691, 43.6753197, 43.7586555, 43.727829, 43.6431503, 43.6683502,
43.6842308, 43.6707535, 43.6820869)), row.names = c(NA, 10L), class = "data.frame")
# Define UI ----
ui <- fluidPage(
titlePanel("Interactive Toronto Auto Theft Visualization"),
sidebarLayout(
sidebarPanel(
checkboxGroupInput("checkGroup", h3("Month"), choices = list("January", "February", "March", "April", "May", "June", "July", "August" ,"September", "October", "November", "December"), selected = "Janurary"),
actionLink("selectall", "Select All"),
checkboxGroupInput("checkGroup2", h3("Year"),
choices = list(2014, 2015,2016 , 2017, 2018 ), selected = 2018),
actionLink("Selectall2", "Select All"),
checkboxGroupInput("checkGroup3", "Toronto Neighbourhoods", choices = list("Downtown Core (Central)", "East End", "North End", "West End", "East York", "Etobicoke", "North York", "Scarborough", "York"), selected = "York"),
actionLink("Selectall3", "Select All")
),
mainPanel (leafletOutput("map", "100%", 500))
))
# Define server logic ----
server <- function(input, output, session){
observe({
if(input$selectall == 0) return(NULL)
else if(input$selectall%%2==0)
{
updateCheckboxGroupInput(session, "checkGroup", "Month", choices = list("January", "February", "March", "April", "May", "June", "July", "August" ,"September", "October", "November", "December"))
}
else
{
updateCheckboxGroupInput(session, "checkGroup", "Month", choices = list("January", "February", "March", "April", "May", "June", "July", "August" ,"September", "October", "November", "December"), selected = list("January", "February", "March", "April", "May", "June", "July", "August" ,"September", "October", "November", "December"))
}
if(input$Selectall2 == 0) return(NULL)
else if(input$Selectall2 %%2 == 0)
{
updateCheckboxGroupInput(session, "checkGroup2", "Year", choices = list(2014, 2015,2016 , 2017, 2018))
}
else
{
updateCheckboxGroupInput(session, "checkGroup2", "Year", choices = list(2014, 2015,2016 , 2017, 2018), selected = list(2014, 2015,2016 , 2017, 2018))
}
})
filtered <- reactive({
if (is.null(input$checkGroup) & is.null(input$checkGroup2) & is.null(input$checkGroup3)){
return (NULL)
}
data %>% filter(occurrencemonth %in% input$checkGroup & occurrenceyear %in% input$checkGroup2 & Area %in% input$checkGroup3)
})
output$map <- renderLeaflet({
leaflet()%>%
addProviderTiles("CartoDB") %>%
addCircleMarkers(data = filtered(), radius = 2)
})
}
I believe the problem is in the observe function because that is where the programming for the select all buttons are placed. I've only programmed 2 of the buttons before running into the problem and was trying to fix the issue before adding in the third button (selectall3).
I've tried creating two separate observe functions for the two separate buttons, but that did not fix the problem.
You have typo here:
if(input$selectall2 == 0) return(NULL)
Should be:
if(input$Selectall2 == 0) return(NULL)
I've been around the forums looking for a solution to my issue but can't seem to find anything. Derivatives of my question and their answer haven't really helped either. My data has four columns, one for Year and one for Month). I've been wanting to plot the data all in one graph without using any facets for years in ggplot. This is what I've been struggling with so far with:
df<-data.frame(Month = rep(c("January", "February", "March", "April", "May", "June",
"July", "August", "September", "October",
"November", "February", "March"),each = 20),
Year = rep(c("2018", "2019"), times = c(220, 40)),
Type = rep(c("C", "T"), 260),
Value = runif(260, min = 10, max = 55))
df$Month<-ordered(df$Month, month.name)
df$Year<-ordered(df$Year)
ggplot(df) +
geom_boxplot(aes(x = Month, y = Value, fill = Type)) +
facet_wrap(~Year)
I'd ideally like to manage this using dplyr and lubridate. Any help would be appreciated!
One option would be to make a true date value, then you can use the date axis formatter. Something like this is a rough start
ggplot(df) +
geom_boxplot(aes(x = lubridate::mdy(paste(Month, 1, Year)), y = Value, fill = Type, group=lubridate::mdy(paste(Month, 1, Year)))) +
scale_x_date(breaks="month", date_labels = "%m")
Do you mean this?
df<-data.frame(Month = rep(c("January", "February", "March", "April", "May", "June",
"July", "August", "September", "October",
"November", "February", "March"),each = 20),
Year = rep(c("2018", "2019"), times = c(220, 40)),
Type = rep(c("C", "T"), 260),
Value = runif(260, min = 10, max = 55))
df$Month <- factor(df$Month,levels=c("January", "February", "March", "April", "May", "June",
"July", "August", "September", "October",
"November", "Dicember"), ordered = T)
df$Month<-ordered(df$Month)
df$Year<-ordered(df$Year)
df$Year_Month <- paste0(df$Month, " ", df$Year)
df$Year_Month <- factor(df$Year_Month, levels = unique(df$Year_Month))
ggplot(df) +
geom_boxplot(aes(x = Year_Month, y = Value, fill = Type))