I have this function that counts all instances in 1 day(12am to 12am) i want it to count all instances from 12pm to 12pm :
days_analyzed_data <- data%>% #(Liat`s code with a change for day instead of minute)
filter(PERCENTAGE != "100.00%") %>%
mutate(TIME = dmy_hms(TIME, tz = "Asia/Jerusalem")) %>%
mutate(DATE = date(TIME),
TIME = as_hms(floor_date(TIME, unite="1 day"))) %>% #here is where u choose minute/hour/day.. etc'
group_by(CAMERA, DATE, TIME) %>%
summarise(count = n())
TNKS
Related
Is there a way to use the filtering function inside the summarise for example:
#I have a dataset with 5 columns Price, Type, Amount USD, date
# I only want the mean price of the rows that are not Type == "SELL"
data = summarise(Price = filter(!Type == "SELL") %>% mean(Price), Amount = sum(Amount), USD = sum(USD), date = min(date))
You can use :
library(dplyr)
data %>%
summarise(Price = mean(Price[Type != 'SELL']),
Amount = sum(Amount),
USD = sum(USD),
date = min(date))
To use it in pipes :
data %>%
summarise(Price = mean(data %>% filter(Type != 'SELL') %>% pull(Price)),
Amount = sum(Amount),
USD = sum(USD),
date = min(date))
I'm trying to count the difference in dates from a single column, based on another columns value.
This is the result I'm looking for
Try this
library('dplyr')
df <- data.frame(id = c(1, 2, 3, 1, 2, 3),
Date = c('1/1/2020', '1/3/2020','1/1/2020','1/7/2020','1/6/2020','1/5/2020'))
df %>% mutate(Date = as.Date(Date, format='%m/%d/%Y')) %>%
group_by(id) %>%
mutate(DIFF = Date - lag(Date))
Here is a way using dplyr and lubridate (needed to make the dates behave when subtracting). It looks like you want the calculation to determine the number of days between the dates in a group by ID and the earliest date for that ID.
library(dplyr)
library(lubridate)
df %>%
mutate(Date = dmy(Date)) %>%
group_by(ID) %>%
mutate(Diff = Date - min(Date))
If you want to have NA instead of 0, you can do the following:
df %>%
mutate(Date = dmy(Date)) %>%
group_by(ID) %>%
mutate(Diff = if_else(Date == min(Date), NA_integer_, Date - min(Date))
A day with precipitation >= 2.5 mm is called a rainy day. I could able to calculate monthwise rainy days using the following code
library(seas)
library(tidyverse)
library(zoo)
library(lubridate)
data(mscdata)
dat.int <- (mksub(mscdata, id=1108447))
dat.int %>%
as_tibble() %>% # for easier viewing
mutate(yearmon = as.yearmon(dat.int$date, "%b %y")) %>%
dplyr::select(-date, -year, -yday, -t_max, -t_min, -t_mean) %>%
pivot_longer(cols = -yearmon, names_to = "variable", values_to = "value") %>%
group_by(yearmon, variable) %>%
summarise(rainy_days = sum(value > 2.5)) %>%
pivot_wider(names_from = "variable", values_from = "rainy_days")
Then I have calculated the longterm average using the following code
dat.int %>%
as_tibble() %>% # for easier viewing
mutate(yearmon = as.yearmon(dat.int$date, "%b %y")) %>%
dplyr::select(-date, -year, -yday, -t_max, -t_min, -t_mean) %>%
pivot_longer(cols = -yearmon, names_to = "variable", values_to = "value") %>%
group_by(yearmon, variable) %>%
summarise(rainy_days = sum(value > 2.5)) %>%
mutate(year = year(yearmon)) %>%
group_by(variable) %>%
summarize(value = as.integer(round(mean(rainy_days, na.rm = T)))) %>%
pivot_wider(names_from = "variable", values_from = "value")
Now two thresholds should be calculated as: lower threshold = 0.81*long term average and upper threshold = 1.19*long term average. Then calculate the number of years having rainy days between these two thresholds. Now I want to calculate the number of years having rainy days in the range of 81–119% of long term average (between lower and upper threshold).
Edit: Based on OP's comments and wanting to summarize by total precip, rain and snow.
library(dplyr)
library(lubridate)
dat.int %>%
mutate(month = month(ymd(date))) %>%
group_by(year, month) %>%
summarize_at(vars(precip,rain,snow), funs(days = sum(. >= 2.5,na.rm = TRUE))) %>%
group_by(year) %>%
summarize_at(vars(ends_with("days")), funs(yearly = sum(.))) %>%
summarize_at(vars(-year), list(~ sum(. > mean(.) * 0.81 & . < mean(.) * 1.19))) %>%
rename_all(list(~ gsub("days_yearly","in_range",.))) summarize(years = n())
# precip_in_range rain_in_range snow_in_range
# <int> <int> <int>
#1 26 24 6
I am computing statistics (i.e., mean, max, median etc) for winter season comprised of Months 11 & 12 of previous year and Months 1-4 of following year.
mydate <- as.data.frame(seq(as.Date("2010-01-01"), to= as.Date("2019-12-31"), by="day"))
colnames(mydate) <- "Date"
DF <- data.frame(A = runif(3652,0,10),
J = runif(3652,0,8),
X = runif(3652,0,12),
Z = runif(3652,0,10),
mydate)
mydata <- DF %>% mutate(Year = year(Date), Month = month(Date)) %>%
pivot_longer(-c(Date,Year,Month), names_to = "variable", values_to = "values") %>%
filter(Month == 11 | Month == 12 | Month == 01 | Month == 02 | Month == 03 | Month == 04) %>%
mutate(W_Year = ifelse(Month > 10, Year+1, Year)) %>%
filter(W_Year != 2019) %>%
group_by(W_Year, variable) %>%
mutate(Cumulative = cumsum(values)) %>%
mutate(NewDate = ymd(paste("2020", Month, day(Date), sep = "-"))) %>%
ungroup() %>%
group_by(variable, NewDate) %>%
summarise(Median = median(Cumulative))
I would then need to combine mydata with the data.frame of accumulated values for the most recent year, which in my case is the year 2019 as an additional column.
X = c("A", "J","X", "Z")
Data2019 <- DF %>% mutate(Year = year(Date), Month = month(Date)) %>%
pivot_longer(-c(Date,Year,Month), names_to = "variable", values_to = "values") %>%
filter(between(Month,5,10)) %>%
filter(Year == 2019) %>%
group_by(Year, variable) %>%
mutate(Precipitation = cumsum(values)) %>%
mutate(NewDate = ymd(paste("2020", Month,day(Date), sep = "-"))) %>%
ungroup() %>%
group_by(variable, NewDate) %>%
select(c(4,6,7)) %>%
slice(match(X, variable))
While combining the two data.frame, i am getting mis-match error for number of rows- which i believe is due to leap year but do not know how to overcome this problem. Any way forward would help. Thank you,
Data_plot <- data.frame(mydata, Data2019[,2])
I'm trying to filter intraday-data to include only certain period inside the day. Is there a trick in some packages to achieve this. Here is example data:
library(tibbletime)
example <- as.tibble(data.frame(
date = ymd_hms(seq(as.POSIXct("2017-01-01 09:00:00"), as.POSIXct("2017-01-02 20:00:00"), by="min")),
value = rep(1, 2101)))
I would like to include only 10:00:00 - 18:35:00 for each day, but can't achieve this nicely. My solution for now has been creating extra indic columns and then filter by them, but it hasn't worked well either.
You can use the function between() from data.table
example[data.table::between(format(example$date, "%H:%M:%S"),
lower = "10:00:00",
upper = "18:35:00"), ]
library(tibbletime)
library(tidyverse)
library(lubridate)
example <- as.tibble(data.frame(
date = ymd_hms(seq(as.POSIXct("2017-01-01 09:00:00"), as.POSIXct("2017-01-02 20:00:00"), by="min")),
value = rep(1, 2101)))
example %>%
mutate(time = as.numeric(paste0(hour(date),".",minute(date)))) %>%
filter(time >= 10 & time <= 18.35) %>%
select(-time)
This is pretty hacky but if you really want to stay in the tidyverse:
rng <- range((hms("10:00:00") %>% as_datetime()), (hms("18:35:00") %>% as_datetime()))
example %>%
separate(., date, into = c("date", "time"), sep = " ") %>%
mutate(
time = hms(time) %>% as_datetime(),
date = as_date(date)
) %>%
filter(time > rng[1] & time < rng[2]) %>%
separate(., time, into = c("useless", "time"), sep = " ") %>%
select(-useless)