Incorrect date/time conversion when reading netcdf file - r

I have a netcdf file (nc.in) containing 30 years of 3-hourly temperature data from 1981-2010. When I convert time to the gregorian calendar the resulting dates are incorrect. The output starts on 02/13/04 (should be 01/01/1981) and then skips three days each time rather than three hours, e.g. 01/13/04, then 02/16/04 etc. instead of 01/01/1981 00:00, then 01/01/1981 03:00 etc.
I used the following code to get time:
# Get time
nc.t <- ncvar_get(nc.in,"time")
nc.tunits <- ncatt_get(nc.in,"time","units")
and then converted it to Gregorian dates using:
# Split the time units string into fields
nc.tustr <- strsplit(nc.tunits$value, " ")
nc.tdstr <- strsplit(unlist(nc.tustr)[3], "-")
nc.tyear = as.integer(unlist(nc.tdstr)[1])
nc.tmonth = as.integer(unlist(nc.tdstr)[2])
nc.tday = as.integer(unlist(nc.tdstr)[3])
nc.chron=chron(nc.t, origin = c(nc.tyear, nc.tmonth, nc.tday))
See below for reproducibility. Sorry if this is not the correct info to provide - I am still learning how to provide reproducible examples.
dput(nc.in[1:10])
list(filename = "ERA5-STP-RUS-t2m.nc", writable = FALSE, id = 65536L,
safemode = FALSE, format = "NC_FORMAT_64BIT", is_GMT = FALSE,
groups = list(structure(list(id = 65536L, name = "", ndims = 3L,
nvars = 4L, natts = 2L, dimid = structure(0:2, .Dim = 3L),
fqgn = ""), class = "ncgroup4")), fqgn2Rindex = structure(list(
1L), .Names = ""), ndims = 3, natts = 2)
dput(nc.t[1:10])
structure(c(710040L, 710043L, 710046L, 710049L, 710052L, 710055L,
710058L, 710061L, 710064L, 710067L), .Dim = 10L)
dput(nc.tunits[1:10])
structure(list(TRUE, "hours since 1900-01-01 00:00:00.0", NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL), .Names = c("hasatt",
"value", NA, NA, NA, NA, NA, NA, NA, NA))
dput(nc.chron[1:10])
structure(c(710040L, 710043L, 710046L, 710049L, 710052L, 710055L,
710058L, 710061L, 710064L, 710067L), .Dim = 10L, format = "m/d/y", origin = c(1900L,
1L, 1L), class = c("dates", "times"))

Related

Keep specific rows in a dataframe from a list

From a list from this process:
library(stackr)
df <- data.frame (qid = c(71663375, 71674701, 71724524))
lst1 <- split(df$qid, as.integer(gl(nrow(df), 100, nrow(df))))
out <- vector('list', length(lst1))
for(i in seq_along(lst1)) {
out[[i]] <- stack_questions(lst1[[i]])
}
How is it possible to create from out list a new dataframe with the columns tags, creation_date, question_id?
dput of the out list
dput(out)
list(structure(list(tags = c("r", "r", "sql,dataexplorer"), is_answered = c(TRUE,
TRUE, FALSE), view_count = c(33L, 19L, 27L), accepted_answer_id = c(71724636L,
71674900L, NA), answer_count = c(1L, 1L, 1L), score = c(0L, 0L,
0L), last_activity_date = structure(c(1648978330, 1648633121,
1648563500), tzone = "", class = c("POSIXct", "POSIXt")), creation_date = structure(c(1648977343,
1648632306, 1648562092), tzone = "", class = c("POSIXct", "POSIXt"
)), last_edit_date = structure(c(1648977839, 1648632778, 1648562436
), tzone = "", class = c("POSIXct", "POSIXt")), question_id = c(71724524L,
71674701L, 71663375L), content_license = c("CC BY-SA 4.0", "CC BY-SA 4.0",
"CC BY-SA 4.0"), link = c("https://stackoverflow.com/questions/71724524/melt-a-dataframe-using-a-list-column",
"https://stackoverflow.com/questions/71674701/create-a-new-column-using-detecting-the-domain-of-a-url-from-an-existing-column",
"https://stackoverflow.com/questions/71663375/paginate-pages-to-receive-results-from-tsql"
), title = c("Melt a dataframe using a list column", "Create a new column using detecting the domain of a url from an existing column",
"Paginate pages to receive results from tSQL"), owner_account_id = c(24733596L,
24733596L, 24733596L), owner_reputation = c(17L, 17L, 17L), owner_user_id = c(18621268L,
18621268L, 18621268L), owner_user_type = c("registered", "registered",
"registered"), owner_profile_image = c("https://lh3.googleusercontent.com/a/AATXAJwQRtIYRrvKJi1a4AfvTHoE4ht8f_WQ1Qv3jtbr=k-s256",
"https://lh3.googleusercontent.com/a/AATXAJwQRtIYRrvKJi1a4AfvTHoE4ht8f_WQ1Qv3jtbr=k-s256",
"https://lh3.googleusercontent.com/a/AATXAJwQRtIYRrvKJi1a4AfvTHoE4ht8f_WQ1Qv3jtbr=k-s256"
), owner_display_name = c("Domin D", "Domin D", "Domin D"), owner_link = c("https://stackoverflow.com/users/18621268/domin-d",
"https://stackoverflow.com/users/18621268/domin-d", "https://stackoverflow.com/users/18621268/domin-d"
)), row.names = c(NA, -3L), class = "data.frame", metadata = list(
has_more = FALSE, quota_max = 10000L, quota_remaining = 1323L)))
out[[1]][c('tags', 'creation_date', 'question_id')]
tags creation_date question_id
1 r 2022-04-03 05:15:43 71724524
2 r 2022-03-30 05:25:06 71674701
3 sql,dataexplorer 2022-03-29 09:54:52 71663375
Or if out is a list containing multiple data frames per element:
lapply(out, function(x) x[c('tags', 'creation_date', 'question_id')])

How do I unnest list embeded in data.frame column?

I'm new to working with nested lists, so I'm hoping the solution provided can also provide some commenting on the how. I have a nested list that I scraped using jsonlite. How do I take how to take the list data for all teams, and bind together into a single data.frame? The list is setup below. I copied one element of the list (for 1 team)
Here is the code I used to get to the list that I've pasted below. I'm showing simply so that I can provide how the list is setup.
json <-
url %>%
fromJSON(simplifyDataFrame = T)
df <- json$body$rosters
# DF with each team showing up on it's own line, but nested lists in players
df_teams <- df$teams
# One teams worth of data
JSON_list <- df_teams[1, ]
My list content is below.
JSON_list <- structure(list(
projected_points = NA, long_abbr = "KE", lineup_status = "ok",
short_name = "Kramerica", total_roster_salary = 22L, division = "",
players = list(structure(list(
firstname = c(
"Jonathan", "Anthony"
), wildcards = structure(list(
contract = c("1", "1"),
salary = c("1", "21")
), class = "data.frame", row.names = c(
NA,
2L
)), on_waivers = c(
0L, 0L
), photo = c(
"http://sports.cbsimg.net/images/baseball/mlb/players/170x170/1657581.png",
"http://sports.cbsimg.net/images/baseball/mlb/players/170x170/1670417.png"
),
eligible_for_offense_and_defense = c(0L, 0L),
opponents = list(
structure(list(
game_id = c(
"", ""
), weather_error = c(
"Weather is not available for this game yet",
"Weather is not available for this game yet"
),
weather_icon_code = c(
"", ""
), home_team = c("true", "true"),
abbrev = c("OAK", "OAK"),
time = c(
1553803620L,
1553911620L
),
date = c(
"20190328",
"20190329"
), weather_icon_url = c(
"", ""
), venue_type = c("", ""), game_abbr = c("", ""),
weather = c("", ""), temperature = c(
NA, NA
)
), class = "data.frame", row.names = c(NA, 2L)),
structure(list(game_id = c("", "", ""), weather_error = c(
"Weather is not available for this game yet",
"Weather is not available for this game yet", "Weather is not available for this game yet"
), weather_icon_code = c("", "", ""), home_team = c(
"true",
"true", "true"
), abbrev = c("TEX", "TEX", "TEX"), time = c(
1553803500L,
1553990700L, 1554062700L
), date = c(
"20190328", "20190330",
"20190331"
), weather_icon_url = c("", "", ""), venue_type = c(
"",
"", ""
), game_abbr = c("", "", ""), weather = c(
"", "",
""
), temperature = c(NA, NA, NA)), class = "data.frame", row.names = c(
NA,
3L
))
), icons = structure(list(
headline = c(
"Angels' Jonathan Lucroy: Inks deal with Angels",
NA
),
hot = c(NA, 1L),
cold = c(1L, NA),
injury = c(
"Knee: Questionable for start of season",
NA
)
), class = "data.frame", row.names = c(NA, 21L)), elias_id = c(
"LUC758619", "RIZ253611"
), percentstarted = c(
"48%", "97%"
),
profile_link = c(
"<a class='playerLink' aria-label=' Jonathan Lucroy C LAA' href='http://baseball.cbssports.com/players/playerpage/1657581'>Jonathan Lucroy</a> <span class=\"playerPositionAndTeam\">C | LAA</span> ",
"<a class='playerLink' aria-label=' Anthony Rizzo 1B CHC' href='http://baseball.cbssports.com/players/playerpage/1670417'>Anthony Rizzo</a> <span class=\"playerPositionAndTeam\">1B | CHC</span>"
),
id = c(
"1657581", "1670417"
), pro_status = c(
"A", "A"
), on_waivers_until = c(NA, NA), jersey = c("20", "44"),
percentowned = c("61%", "99%"),
pro_team = c(
"LAA", "CHC"
), position = c(
"C", "1B"
), lastname = c(
"Lucroy", "Rizzo"
),
roster_pos = c("C", "1B"),
update_type = c("normal", "normal"),
age = c(
32L, 29L
), eligible = c(
"C,U", "1B,U"
), is_locked = c(
0L,
0L
), bats = c(
"R", "L"
), owned_by_team_id = c(
12L, 12L
), ytd_points = c(
0L, 0L
), roster_status = c(
"A", "A"
), is_keeper = c(
0L, 0L
), profile_url = c(
"http://baseball.cbssports.com/players/playerpage/1657581",
"http://baseball.cbssports.com/players/playerpage/1670417"
), fullname = c(
"Jonathan Lucroy", "Anthony Rizzo"
), throws = c(
"R",
"L"
), headline = c(
"Angels' Jonathan Lucroy: Inks deal with Angels",
NA
), `starting-pitcher-today` = c(
NA, "false"
), injury = c(NA, "Knee"), return = c(
"Questionable for start of season",
NA
)
), class = "data.frame", row.names = c(NA, 2L))),
name = "Kramerica Enterprises", logo = "http://baseball.cbssports.com/images/team-logo/main-36x36.jpg",
abbr = "KE", point = "20190328", id = "12", active_roster_salary = 22L,
warning = structure(list(description = NA_character_), row.names = 1L, class = "data.frame")
), row.names = 1L, class = "data.frame")
# Desired table sample (does not include all columns)
tibble::tribble(
~projected_points, ~long_abbr, ~lineup_status, ~short_name, ~total_roster_salary, ~division, ~name, ~logo, ~abbr, ~point5, ~active_roster_salary, ~id2, ~firstname, ~contract, ~salary,
NA, "KE", "ok", "Kramerica", 22, NA, "Biloxi Blackjacks", NA, "KE", 20190328, 22, 1657581, "Jonathan", 1, 1
)
The issue I'm running into is that the players column looks to be a nested df, and also has other nested df in it. Specifically: "wildcards", "opponents" and "icons". I am looking for a data frame that contains all of the columns. For the nested lists, I'd like their content to show up as columns for that particular player. I.E. Wildcards, create a column for "contract" and "salary". Also, how would I bind the list together if I wanted to specifically choose columns from JSON_list I.E. "long_abbr", "lineup_status", etc. from the and "firstname", both wildcard columns, "id", and some other from the JSON_list$players?
You can isolate the list elements using [[]] and the columns using [] if you have a nested structure. If the number if rows are equal, you can directly make your dataframe using cbind
Let's make a reproducible example
Create 3 data frames of similar dimensions
df1 <- data.frame(var1=c('a', 'b', 'c'), var2=c('d', 'e', 'f'), var3=1:3)
df2 <- data.frame(var4=c('g', 'h', 'i'), var5=c('j', 'k', 'l'), var6=4:6)
df3 <- data.frame(var7=c(6:8), var8=c('j', 'k', 'l'), var9=4:6)
Put the data frames in a nested list structure
list <- list(df1,df2)
nested.list <- list(list, df3)
Make a binded data frame made of var2, var6 and var7
binded.df <- cbind(nested.list[[1]][[1]][2],nested.list[[1]][[2]][3],nested.list[[2]][1])

How to create a for loop based on unique user IDs and specific event types

I have two data frames: users and events.
Both data frames contain a field that links events to users.
How can I create a for loop where every user's unique ID is matched against an event of a particular type and then stores the number of occurrences into a new column within users (users$conversation_started, users$conversation_missed, etc.)?
In short, it is a conditional for loop.
So far I have this but it is wrong:
for(i in users$id){
users$conversation_started <- nrow(event[event$type = "conversation-started"])
}
An example of how to do this would be ideal.
The idea is:
for(each user)
find the matching user ID in events
count the number of event types == "conversation-started"
assign count value to user$conversation_started
end for
Important note:
The type field can contain one of five values so I will need to be able to effectively filter on each type for each associate:
> events$type %>% table %>% as.matrix
[,1]
conversation-accepted 3120
conversation-already-accepted 19673
conversation-declined 27
conversation-missed 831
conversation-request 23427
Data frames (note that these are reduced versions as confidential information has been removed):
users <- structure(list(`_id` = c("JTuXhdI4Ai", "iGIeCEXyVE", "6XFtOJh0bD",
"mNN986oQv9", "9NI71KBMX9", "x1jH7t0Cmy"), language = c("en",
"en", "en", "en", "en", "en"), registering = c(TRUE, TRUE, FALSE,
FALSE, FALSE, NA), `_created_at` = structure(c(1485995043.131,
1488898839.838, 1480461193.146, 1481407887.979, 1489942757.189,
1491311381.916), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
`_updated_at` = structure(c(1521039527.236, 1488898864.834,
1527618624.877, 1481407959.116, 1490043838.561, 1491320333.09
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), lastOnlineTimestamp = c(1521039526.90314,
NA, 1480461472, 1481407959, 1490043838, NA), isAgent = c(FALSE,
NA, FALSE, FALSE, FALSE, NA), lastAvailableTime = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = c("POSIXct",
"POSIXt"), tzone = ""), available = c(NA, NA, NA, NA, NA,
NA), busy = c(NA, NA, NA, NA, NA, NA), joinedTeam = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = c("POSIXct",
"POSIXt"), tzone = ""), timezone = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
)), row.names = c("list.1", "list.2", "list.3", "list.4",
"list.5", "list.6"), class = "data.frame")
and
events <- structure(list(`_id` = c("JKY8ZwkM1S", "CG7Xj8dAsA", "pUkFFxoahy",
"yJVJ34rUCl", "XxXelkIFh7", "GCOsENVSz6"), expirationTime = structure(c(1527261147.873,
NA, 1527262121.332, NA, 1527263411.619, 1527263411.619), class = c("POSIXct",
"POSIXt"), tzone = ""), partId = c("d22bfddc-cd51-489f-aec8-5ab9225c0dd5",
"d22bfddc-cd51-489f-aec8-5ab9225c0dd5", "cf4356da-b63e-4e4d-8e7b-fb63035801d8",
"cf4356da-b63e-4e4d-8e7b-fb63035801d8", "a720185e-c300-47c0-b30d-64e1f272d482",
"a720185e-c300-47c0-b30d-64e1f272d482"), type = c("conversation-request",
"conversation-accepted", "conversation-request", "conversation-accepted",
"conversation-request", "conversation-request"), `_p_conversation` = c("Conversation$6nSaLeWqs7",
"Conversation$6nSaLeWqs7", "Conversation$6nSaLeWqs7", "Conversation$6nSaLeWqs7",
"Conversation$bDuAYSZgen", "Conversation$bDuAYSZgen"), `_p_merchant` = c("Merchant$0A2UYADe5x",
"Merchant$0A2UYADe5x", "Merchant$0A2UYADe5x", "Merchant$0A2UYADe5x",
"Merchant$0A2UYADe5x", "Merchant$0A2UYADe5x"), `_p_associate` = c("D9ihQOWrXC",
"D9ihQOWrXC", "D9ihQOWrXC", "D9ihQOWrXC", "D9ihQOWrXC", "D9ihQOWrXC"
), `_wperm` = list(list(), list(), list(), list(), list(), list()),
`_rperm` = list("*", "*", "*", "*", "*", "*"), `_created_at` = structure(c(1527264657.998,
1527264662.043, 1527265661.846, 1527265669.435, 1527266922.056,
1527266922.059), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
`_updated_at` = structure(c(1527264657.998, 1527264662.043,
1527265661.846, 1527265669.435, 1527266922.056, 1527266922.059
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), read = c(TRUE,
NA, TRUE, NA, NA, NA), data.customerName = c("Shopper 109339",
NA, "Shopper 109339", NA, "Shopper 109364", "Shopper 109364"
), data.departmentName = c("Personal advisors", NA, "Personal advisors",
NA, "Personal advisors", "Personal advisors"), data.recurring = c(FALSE,
NA, TRUE, NA, FALSE, FALSE), data.new = c(TRUE, NA, FALSE,
NA, TRUE, TRUE), data.missed = c(0L, NA, 0L, NA, 0L, 0L),
data.customerId = c("84uOFRLmLd", "84uOFRLmLd", "84uOFRLmLd",
"84uOFRLmLd", "5Dw4iax3Tj", "5Dw4iax3Tj"), data.claimingTime = c(NA,
4L, NA, 7L, NA, NA), data.lead = c(NA, NA, FALSE, NA, NA,
NA), data.maxMissed = c(NA, NA, NA, NA, NA, NA), data.associateName = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), data.maxDecline = c(NA, NA, NA, NA, NA, NA
), data.goUnavailable = c(NA, NA, NA, NA, NA, NA)), row.names = c("list.1",
"list.2", "list.3", "list.4", "list.5", "list.6"), class = "data.frame")
Update: 21st September 2018
This solution now results in an NA-only data frame being produced at the end of the function. When written to a .csv, this is what I get (naturally, Excel displays NA-values as blank values):
My data source has not changed, nor has my script.
What might be causing this?
My guess is that this is an unforeseen case where there may have been 0 hits for each step has occurred; as such, is there a way to add 0 to those cases where there weren't any hits, rather than NA/ blank values?
Is there a way to avoid this?
New solution based on the provided data.
Note: As your data had no overlap in _id, I changed the events$_id to be the same as in users.
Simplified example data:
users <- structure(list(`_id` = structure(c(4L, 3L, 1L, 5L, 2L, 6L),
.Label = c("6XFtOJh0bD", "9NI71KBMX9", "iGIeCEXyVE",
"JTuXhdI4Ai", "mNN986oQv9", "x1jH7t0Cmy"),
class = "factor")), .Names = "_id",
row.names = c(NA, -6L), class = "data.frame")
events <- structure(list(`_id` = c("JKY8ZwkM1S", "CG7Xj8dAsA", "pUkFFxoahy",
"yJVJ34rUCl", "XxXelkIFh7", "GCOsENVSz6"),
type = c("conversation-request", "conversation-accepted",
"conversation-request", "conversation-accepted",
"conversation-request", "conversation-request")),
.Names = c("_id", "type"), class = "data.frame",
row.names = c("list.1", "list.2", "list.3", "list.4", "list.5", "list.6"))
events$`_id` <- users$`_id`
> users
_id
1 JTuXhdI4Ai
2 iGIeCEXyVE
3 6XFtOJh0bD
4 mNN986oQv9
5 9NI71KBMX9
6 x1jH7t0Cmy
> events
_id type
list.1 JTuXhdI4Ai conversation-request
list.2 iGIeCEXyVE conversation-accepted
list.3 6XFtOJh0bD conversation-request
list.4 mNN986oQv9 conversation-accepted
list.5 9NI71KBMX9 conversation-request
list.6 x1jH7t0Cmy conversation-request
We can use the same approach I suggested before, just enhance it a bit.
First we loop over unique(events$type) to store a table() of every type of event per id in a list:
test <- lapply(unique(events$type), function(x) table(events$`_id`, events$type == x))
Then we store the specific type as the name of the respective table in the list:
names(test) <- unique(events$type)
Now we use a simple for-loop to match() the user$_id with the rownames of the table and store the information in a new variable with the name of the event type:
for(i in names(test)){
users[, i] <- test[[i]][, 2][match(users$`_id`, rownames(test[[i]]))]
}
Result:
> users
_id conversation-request conversation-accepted
1 JTuXhdI4Ai 1 0
2 iGIeCEXyVE 0 1
3 6XFtOJh0bD 1 0
4 mNN986oQv9 0 1
5 9NI71KBMX9 1 0
6 x1jH7t0Cmy 1 0
Hope this helps!

Calculating the median of a time series, by 8 every 8 hours

I am new to R and I do have to calculate the mean of time series, containing 5 years, with hourly taken data of ozon etc..
My df looks like:
structure(list(date = structure(c(1L, 1L, 1L, 1L), .Label = "01.01.2010", class = "factor"),
day.of = c(1L, 1L, 1L, 1L), time = structure(1:4, .Label = c("00:00",
"01:00", "02:00", "03:00"), class = "factor"), SVF_Ray = c(1L,
1L, 1L, 1L), Gmax = c(0, 0, 0, 0), Ta = c(-1.3, -1.2, -1.2,
-1.2), Tmrt = c(-19.3, -12.1, -12, -12.1), PET = c(-10.4,
-8.7, -8.7, -8.7), PT = c(-11.3, -9.3, -9.3, -9.3), Ozon = c(61.35,
62.65, 63.4, 63.85), rDatum = structure(c(14610, 14610, 14610,
14610), class = "Date"), year = c(2010, 2010, 2010, 2010),
month = c(1, 1, 1, 1), day = c(1, 1, 1, 1), hour = c(0, 1,
2, 3)), .Names = c("date", "day.of", "time", "SVF_Ray", "Gmax",
"Ta", "Tmrt", "PET", "PT", "Ozon", "rDatum", "year", "month",
"day", "hour"), row.names = c(NA, 4L), class = "data.frame")
I would like to calculate the mean of Ozon every 8 hours, so a series of 4 calculated means for every day. I have arranged my datum like:
Datum_Ozon$rDatum <- as.Date(data$date, format="%d.%m.%Y")
Datum_Ozon$hour<-as.numeric(unlist(strsplit(as.character(df$time), ":"))[seq(1, 2 * length(df$time), 2)])
Format is numeric
But I don't know any further in achieving my goal. Thanks in advance!
If its the case that your data is regular and complete (ie, every hour has a record), the following base R code should do the trick:
# Get the number of 8 hour intervals
intervalCnt <- nrow(df) / 8L
# add a grouping vector to your data
df$group <- rep(1:intervalCnt, each=8)
# get the median for each interval, keep year var around for later
intervalMedian <- aggregate(var~group + day + month + year, data=df, FUN=median)
Note that this solution relies on the assumption that the data has a regular structure, i.e., every hour has a record. If the measure of interest is missing, i.e. NA, then simply adding na.rm to the aggregate function will return the statistics of interest:
# get the median for each interval
intervalMedian <- aggregate(var~group + day + month + year, data=df, FUN=median, na.rm=T)
If you have a variable for hour of the day, here is a simple way to check for data regularity:
table(df$hourOfDay)
The result of this function is a frequency count of each hour. The counts should be equal. Another thing to check is that the first observation starts in the hour following the final observation, i.e. if the hour of observation 1 == "00:00", then the hour of the final observation should be 23:00.
To provide a plot of the mean of the 8 hour periods by year, you can again use aggregate:
intervalMeans.year <- aggregate(var~group, data=intervalMedian,
FUN=mean, na.rm=T)
The inclusion of the group, day, month, and year variables in the intervalMedian data.frame allow for a lot of different aggregations. For example, with a minor adjustment, it is possible to get the average value of a variable over the 5 year period for each time period-day-month:
intervalMedian$periodDay <- rep(1:3, length.out=intervalMedian)
intervalMeans.dayMonthPeriod <- aggregate(var~periodDay+day+month,
data=intervalMedian, FUN=mean, na.rm=T)
Here is a basic example using a dplyr pipe rather than a plyr approach as well as ifelse(). Everything is self contained here:
library(dplyr)
## OP data
df <-
structure(list(date = structure(c(1L, 1L, 1L, 1L), .Label = "01.01.2010", class = "factor"),
day.of = c(1L, 1L, 1L, 1L), time = structure(1:4, .Label = c("00:00",
"01:00", "02:00", "03:00"), class = "factor"), SVF_Ray = c(1L,
1L, 1L, 1L), Gmax = c(0, 0, 0, 0), Ta = c(-1.3, -1.2, -1.2,
-1.2), Tmrt = c(-19.3, -12.1, -12, -12.1), PET = c(-10.4,
-8.7, -8.7, -8.7), PT = c(-11.3, -9.3, -9.3, -9.3), Ozon = c(61.35,
62.65, 63.4, 63.85), rDatum = structure(c(14610, 14610, 14610,
14610), class = "Date"), year = c(2010, 2010, 2010, 2010),
month = c(1, 1, 1, 1), day = c(1, 1, 1, 1), hour = c(0, 1,
2, 3)), .Names = c("date", "day.of", "time", "SVF_Ray", "Gmax",
"Ta", "Tmrt", "PET", "PT", "Ozon", "rDatum", "year", "month",
"day", "hour"), row.names = c(NA, 4L), class = "data.frame")
df %>%
mutate(DayChunk=ifelse(hour %in% c(0:7),"FirstThird",
ifelse(hour %in% c(8:15), "SecondThird"
,"ThirdThird")
)) %>%
group_by(Date, DayChunk) %>%
summarise(MedOzon=median(Ozon))
Look up the function seq.POSIXt. There are options to specify the start and stop intervals. This function is designed to create sequences of time. For your problem:
myseq<-seq(ISOdate(2010,01,01, 00, 00, 00, tz="GMT"), to=ISOdate(2016,01,05), by = "8 hour")
Use the ISOdate functions to set the start and stop times. If you are going to be working much with times, I suggest researching the function strptime and the POSIXlt/ct time classes.
Now with the breaks defined and assuming you have a column in your dataframe (Datum_Ozon) named "datetime", then use "cut" to group/subset your data.
Datum_Ozon$datetime<-as.POSIXct(paste(as.character(Datum_Ozon$date),
as.character(Datum_Ozon$time)), "%d.%m.%Y %H:%M", tz="GMT" )
library(dplyr)
summarize(group_by(Datum_Ozon, cut(Datum_Ozon$datetime, myseq)), mean(Ozon))

Get all.polarity value from qdap package results in R

I wanted to do sentimental analysis in R using qdap package.
It gives out a data frame containing all.all, all.wc, all.polarity, all.pos.words, all.neg.words etc.
I want to extract the values of all.polarity, all.pos.words,all.neg.words but when i use
sentiment$all.polarity or sentiment$all.pos.words,
I get NULL in result.
dput(head(sentiment))
list(structure(list(all = c("all", "all", "all"), wc = c(44L,
1L, 1L), polarity = c(-0.422115882408869, 0, 0), pos.words = list(
"-", "-", "-"), neg.words = list(c("disappointed", "issue"
), "-", "-"), text.var = c("list(list(content = \" misleaded icici bank customer care branch excutive really disappointed bank dont know steps take get issue fixed\", meta = list(author = character(0), datetimestamp = list(sec = 20.097678899765, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(61L,
1L, 1L), polarity = c(0, 0, 0), pos.words = list("led", "-",
"-"), neg.words = list("expire", "-", "-"), text.var = c("list(list(content = \" didnt know customer banking icici years will led people looking student travel card staff mg road treat customers tried offer card wud expire one year n told get new card one year dont know\", meta = list(author = character(0), datetimestamp = list(sec = 20.3989679813385, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(58L,
1L, 1L), polarity = c(0, 0, 0), pos.words = list("top", "-",
"-"), neg.words = list("worst", "-", "-"), text.var = c("list(list(content = \" asked staff can upgrade platinum coral card documentation fee will involoved even receiving card poeple sill keep calling top levied rs joining fee interested paying card one worst customer care experienced\", meta = list(author = character(0), datetimestamp = list(sec = 20.648964881897, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", \n origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(59L,
1L, 1L), polarity = c(-0.494717861727131, 0, 0), pos.words = list(
"-", "-", "-"), neg.words = list(c("long time", "long time",
"disappointed"), "-", "-"), text.var = c("list(list(content = \" applied credit card corporate scheme long time back got verification call also long time back initially getting least response executive now longer picking call neither letting know status application extremely disappointed service\", meta = list(author = character(0), datetimestamp = list(sec = 20.8989698886871, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", \n language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(66L,
1L, 1L), polarity = c(0.0246182981958665, 0, 0), pos.words = list(
c("work", "support"), "-", "-"), neg.words = list("disappointed",
"-", "-"), text.var = c("list(list(content = \" otp service working used work month decided change everything im getting otp sms registered mobile number ive tried contacting customer support several times keep asking send sms despite done several times several days havent received otps ever really disappointed\", meta = list(author = character(0), datetimestamp = list(sec = 21.1935319900513, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), \n heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(50L,
1L, 1L), polarity = c(-0.282842712474619, 0, 0), pos.words = list(
"-", "-", "-"), neg.words = list(c("pathetic", "lied"
), "-", "-"), text.var = c("list(list(content = \" pathetic service behavior icici bank facing past days icici executive lied luring upgrade debit card terms conditions just opposite booklet received told phone\", meta = list(author = character(0), datetimestamp = list(sec = 21.4258019924164, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"))
Can anyone suggest how to do this?
The following works for me -
library(qdap)
text <- "I am liking the work " # the text for which polarity score is needed
sentiment <- polarity(text) #make the call
sentiment$all$pos.words # returns the positive words detected by the algo
#[[1]]
#[1] "liking" "work"
sentiment$all$polarity # returns the sentence polarity score
#[1] 0.8944272

Resources