Related
I have a netcdf file (nc.in) containing 30 years of 3-hourly temperature data from 1981-2010. When I convert time to the gregorian calendar the resulting dates are incorrect. The output starts on 02/13/04 (should be 01/01/1981) and then skips three days each time rather than three hours, e.g. 01/13/04, then 02/16/04 etc. instead of 01/01/1981 00:00, then 01/01/1981 03:00 etc.
I used the following code to get time:
# Get time
nc.t <- ncvar_get(nc.in,"time")
nc.tunits <- ncatt_get(nc.in,"time","units")
and then converted it to Gregorian dates using:
# Split the time units string into fields
nc.tustr <- strsplit(nc.tunits$value, " ")
nc.tdstr <- strsplit(unlist(nc.tustr)[3], "-")
nc.tyear = as.integer(unlist(nc.tdstr)[1])
nc.tmonth = as.integer(unlist(nc.tdstr)[2])
nc.tday = as.integer(unlist(nc.tdstr)[3])
nc.chron=chron(nc.t, origin = c(nc.tyear, nc.tmonth, nc.tday))
See below for reproducibility. Sorry if this is not the correct info to provide - I am still learning how to provide reproducible examples.
dput(nc.in[1:10])
list(filename = "ERA5-STP-RUS-t2m.nc", writable = FALSE, id = 65536L,
safemode = FALSE, format = "NC_FORMAT_64BIT", is_GMT = FALSE,
groups = list(structure(list(id = 65536L, name = "", ndims = 3L,
nvars = 4L, natts = 2L, dimid = structure(0:2, .Dim = 3L),
fqgn = ""), class = "ncgroup4")), fqgn2Rindex = structure(list(
1L), .Names = ""), ndims = 3, natts = 2)
dput(nc.t[1:10])
structure(c(710040L, 710043L, 710046L, 710049L, 710052L, 710055L,
710058L, 710061L, 710064L, 710067L), .Dim = 10L)
dput(nc.tunits[1:10])
structure(list(TRUE, "hours since 1900-01-01 00:00:00.0", NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL), .Names = c("hasatt",
"value", NA, NA, NA, NA, NA, NA, NA, NA))
dput(nc.chron[1:10])
structure(c(710040L, 710043L, 710046L, 710049L, 710052L, 710055L,
710058L, 710061L, 710064L, 710067L), .Dim = 10L, format = "m/d/y", origin = c(1900L,
1L, 1L), class = c("dates", "times"))
I'm new to working with nested lists, so I'm hoping the solution provided can also provide some commenting on the how. I have a nested list that I scraped using jsonlite. How do I take how to take the list data for all teams, and bind together into a single data.frame? The list is setup below. I copied one element of the list (for 1 team)
Here is the code I used to get to the list that I've pasted below. I'm showing simply so that I can provide how the list is setup.
json <-
url %>%
fromJSON(simplifyDataFrame = T)
df <- json$body$rosters
# DF with each team showing up on it's own line, but nested lists in players
df_teams <- df$teams
# One teams worth of data
JSON_list <- df_teams[1, ]
My list content is below.
JSON_list <- structure(list(
projected_points = NA, long_abbr = "KE", lineup_status = "ok",
short_name = "Kramerica", total_roster_salary = 22L, division = "",
players = list(structure(list(
firstname = c(
"Jonathan", "Anthony"
), wildcards = structure(list(
contract = c("1", "1"),
salary = c("1", "21")
), class = "data.frame", row.names = c(
NA,
2L
)), on_waivers = c(
0L, 0L
), photo = c(
"http://sports.cbsimg.net/images/baseball/mlb/players/170x170/1657581.png",
"http://sports.cbsimg.net/images/baseball/mlb/players/170x170/1670417.png"
),
eligible_for_offense_and_defense = c(0L, 0L),
opponents = list(
structure(list(
game_id = c(
"", ""
), weather_error = c(
"Weather is not available for this game yet",
"Weather is not available for this game yet"
),
weather_icon_code = c(
"", ""
), home_team = c("true", "true"),
abbrev = c("OAK", "OAK"),
time = c(
1553803620L,
1553911620L
),
date = c(
"20190328",
"20190329"
), weather_icon_url = c(
"", ""
), venue_type = c("", ""), game_abbr = c("", ""),
weather = c("", ""), temperature = c(
NA, NA
)
), class = "data.frame", row.names = c(NA, 2L)),
structure(list(game_id = c("", "", ""), weather_error = c(
"Weather is not available for this game yet",
"Weather is not available for this game yet", "Weather is not available for this game yet"
), weather_icon_code = c("", "", ""), home_team = c(
"true",
"true", "true"
), abbrev = c("TEX", "TEX", "TEX"), time = c(
1553803500L,
1553990700L, 1554062700L
), date = c(
"20190328", "20190330",
"20190331"
), weather_icon_url = c("", "", ""), venue_type = c(
"",
"", ""
), game_abbr = c("", "", ""), weather = c(
"", "",
""
), temperature = c(NA, NA, NA)), class = "data.frame", row.names = c(
NA,
3L
))
), icons = structure(list(
headline = c(
"Angels' Jonathan Lucroy: Inks deal with Angels",
NA
),
hot = c(NA, 1L),
cold = c(1L, NA),
injury = c(
"Knee: Questionable for start of season",
NA
)
), class = "data.frame", row.names = c(NA, 21L)), elias_id = c(
"LUC758619", "RIZ253611"
), percentstarted = c(
"48%", "97%"
),
profile_link = c(
"<a class='playerLink' aria-label=' Jonathan Lucroy C LAA' href='http://baseball.cbssports.com/players/playerpage/1657581'>Jonathan Lucroy</a> <span class=\"playerPositionAndTeam\">C | LAA</span> ",
"<a class='playerLink' aria-label=' Anthony Rizzo 1B CHC' href='http://baseball.cbssports.com/players/playerpage/1670417'>Anthony Rizzo</a> <span class=\"playerPositionAndTeam\">1B | CHC</span>"
),
id = c(
"1657581", "1670417"
), pro_status = c(
"A", "A"
), on_waivers_until = c(NA, NA), jersey = c("20", "44"),
percentowned = c("61%", "99%"),
pro_team = c(
"LAA", "CHC"
), position = c(
"C", "1B"
), lastname = c(
"Lucroy", "Rizzo"
),
roster_pos = c("C", "1B"),
update_type = c("normal", "normal"),
age = c(
32L, 29L
), eligible = c(
"C,U", "1B,U"
), is_locked = c(
0L,
0L
), bats = c(
"R", "L"
), owned_by_team_id = c(
12L, 12L
), ytd_points = c(
0L, 0L
), roster_status = c(
"A", "A"
), is_keeper = c(
0L, 0L
), profile_url = c(
"http://baseball.cbssports.com/players/playerpage/1657581",
"http://baseball.cbssports.com/players/playerpage/1670417"
), fullname = c(
"Jonathan Lucroy", "Anthony Rizzo"
), throws = c(
"R",
"L"
), headline = c(
"Angels' Jonathan Lucroy: Inks deal with Angels",
NA
), `starting-pitcher-today` = c(
NA, "false"
), injury = c(NA, "Knee"), return = c(
"Questionable for start of season",
NA
)
), class = "data.frame", row.names = c(NA, 2L))),
name = "Kramerica Enterprises", logo = "http://baseball.cbssports.com/images/team-logo/main-36x36.jpg",
abbr = "KE", point = "20190328", id = "12", active_roster_salary = 22L,
warning = structure(list(description = NA_character_), row.names = 1L, class = "data.frame")
), row.names = 1L, class = "data.frame")
# Desired table sample (does not include all columns)
tibble::tribble(
~projected_points, ~long_abbr, ~lineup_status, ~short_name, ~total_roster_salary, ~division, ~name, ~logo, ~abbr, ~point5, ~active_roster_salary, ~id2, ~firstname, ~contract, ~salary,
NA, "KE", "ok", "Kramerica", 22, NA, "Biloxi Blackjacks", NA, "KE", 20190328, 22, 1657581, "Jonathan", 1, 1
)
The issue I'm running into is that the players column looks to be a nested df, and also has other nested df in it. Specifically: "wildcards", "opponents" and "icons". I am looking for a data frame that contains all of the columns. For the nested lists, I'd like their content to show up as columns for that particular player. I.E. Wildcards, create a column for "contract" and "salary". Also, how would I bind the list together if I wanted to specifically choose columns from JSON_list I.E. "long_abbr", "lineup_status", etc. from the and "firstname", both wildcard columns, "id", and some other from the JSON_list$players?
You can isolate the list elements using [[]] and the columns using [] if you have a nested structure. If the number if rows are equal, you can directly make your dataframe using cbind
Let's make a reproducible example
Create 3 data frames of similar dimensions
df1 <- data.frame(var1=c('a', 'b', 'c'), var2=c('d', 'e', 'f'), var3=1:3)
df2 <- data.frame(var4=c('g', 'h', 'i'), var5=c('j', 'k', 'l'), var6=4:6)
df3 <- data.frame(var7=c(6:8), var8=c('j', 'k', 'l'), var9=4:6)
Put the data frames in a nested list structure
list <- list(df1,df2)
nested.list <- list(list, df3)
Make a binded data frame made of var2, var6 and var7
binded.df <- cbind(nested.list[[1]][[1]][2],nested.list[[1]][[2]][3],nested.list[[2]][1])
I want to access a variable 'bandSpecificMatadata' from a multi-dimensional list in R, and create a vector of 'reflectanceCoefficient' for my remote sensing project.
Firstly, I was able to reduce the dimension of the list and then used nodes <- get('EarthObservationResult', matadata.list$resultOf) to exact the list.
Then it comes a problem when I try to create something like (bandNumber1 corresponds to reflectance coefficient 2.21e-5) using FOR loop.
for(node in nodes[6:9]) {
bn = get("bandNumber", node)
if(bn %in% c('1','2','3','4')){
i = integer(bn)
coeffs = get("reflectanceCoefficient", node)
}
print(coeffs)
}
which prints out:
[1] "2.21386105481e-05"
[1] "2.31474175457e-05"
[1] "2.60208594123e-05"
[1] "3.83481925626e-05"
But I want a vector with 1, 2, 3, 4 with the corresponding numbers. It seems to me that the number overwrites the last one every time it prints.
Then I tried:
for(node in nodes[6:9]) {
n = 1:4
b[n] = get("bandNumber", node)
if(b[n] %in% c('1','2','3','4')){
i = integer(b[n])
coeffs[i] = get("reflectanceCoefficient", node)
}
print(coeffs)
}
But turns out
Error in integer(b[n]) : invalid 'length' argument
In addition: Warning message:
In if (b[n] %in% c("1", "2", "3", "4")) { :
the condition has length > 1 and only the first element will be used
How do I fix this?
I used XML::xmlParse() to parse the xml and matadata.list <- XML::xmlToList() to convert the data to list.
For reproducible example, see below:
dput(matadata.list)
structure(list(metaDataProperty = structure(list(EarthObservationMetaData = structure(list(
identifier = "20170127_213132_0e0e_3B_AnalyticMS", acquisitionType = "NOMINAL",
productType = "L3B", status = "ARCHIVED", downlinkedTo = structure(list(
DownlinkInformation = structure(list(acquisitionStation = structure(list(
text = "Planet Ground Station Network", .attrs = structure("urn:eop:PS:stationLocation", .Names = "codeSpace")), .Names = c("text",
".attrs")), acquisitionDate = "2017-01-27T21:31:32+00:00"), .Names = c("acquisitionStation",
"acquisitionDate"))), .Names = "DownlinkInformation"),
archivedIn = structure(list(ArchivingInformation = structure(list(
archivingCenter = structure(list(text = "Planet Archive Center",
.attrs = structure("urn:eop:PS:stationLocation", .Names = "codeSpace")), .Names = c("text",
".attrs")), archivingDate = "2017-01-27T21:31:32+00:00",
archivingIdentifier = structure(list(text = "385180",
.attrs = structure("urn:eop:PS:dmsCatalogueId", .Names = "codeSpace")), .Names = c("text",
".attrs"))), .Names = c("archivingCenter", "archivingDate",
"archivingIdentifier"))), .Names = "ArchivingInformation"),
processing = structure(list(ProcessingInformation = structure(list(
processorName = "CMO Processor", processorVersion = "4.1.4",
nativeProductFormat = "GeoTIFF"), .Names = c("processorName",
"processorVersion", "nativeProductFormat"))), .Names = "ProcessingInformation"),
license = structure(list(licenseType = "20160101 - Inc - Single User",
resourceLink = structure(c("PL EULA", "https://assets.planet.com/docs/20160101_Inc_SingleUser.txt"
), class = structure("XMLAttributes", package = "XML"), namespaces = structure(c("xlink",
"xlink"), .Names = c("http://www.w3.org/1999/xlink",
"http://www.w3.org/1999/xlink")), .Names = c("title",
"href"))), .Names = c("licenseType", "resourceLink")),
versionIsd = "1.0", pixelFormat = "16U"), .Names = c("identifier",
"acquisitionType", "productType", "status", "downlinkedTo", "archivedIn",
"processing", "license", "versionIsd", "pixelFormat"))), .Names = "EarthObservationMetaData"),
validTime = structure(list(TimePeriod = structure(list(beginPosition = "2017-01-27T21:31:32+00:00",
endPosition = "2017-01-27T21:31:32+00:00"), .Names = c("beginPosition",
"endPosition"))), .Names = "TimePeriod"), using = structure(list(
EarthObservationEquipment = structure(list(platform = structure(list(
Platform = structure(list(shortName = "PlanetScope",
serialIdentifier = "0e0e", orbitType = "LEO-SSO"), .Names = c("shortName",
"serialIdentifier", "orbitType"))), .Names = "Platform"),
instrument = structure(list(Instrument = structure(list(
shortName = "PS2"), .Names = "shortName")), .Names = "Instrument"),
sensor = structure(list(Sensor = structure(list(sensorType = "OPTICAL",
resolution = structure(list(text = "3.0000",
.attrs = structure("m", .Names = "uom")), .Names = c("text",
".attrs")), scanType = "FRAME"), .Names = c("sensorType",
"resolution", "scanType"))), .Names = "Sensor"),
acquisitionParameters = structure(list(Acquisition = structure(list(
orbitDirection = "DESCENDING", incidenceAngle = structure(list(
text = "8.072969e-02", .attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), illuminationAzimuthAngle = structure(list(
text = "7.610387e+01", .attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), illuminationElevationAngle = structure(list(
text = "4.649194e+01", .attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), azimuthAngle = structure(list(text = "1.242074e+01",
.attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), spaceCraftViewAngle = structure(list(
text = "5.692807e-02", .attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), acquisitionDateTime = "2017-01-27T21:31:32+00:00"), .Names = c("orbitDirection",
"incidenceAngle", "illuminationAzimuthAngle", "illuminationElevationAngle",
"azimuthAngle", "spaceCraftViewAngle", "acquisitionDateTime"
))), .Names = "Acquisition")), .Names = c("platform",
"instrument", "sensor", "acquisitionParameters"))), .Names = "EarthObservationEquipment"),
target = structure(list(Footprint = structure(list(multiExtentOf = structure(list(
MultiSurface = structure(list(surfaceMembers = structure(list(
Polygon = structure(list(outerBoundaryIs = structure(list(
LinearRing = structure(list(coordinates = "175.446585079397,-37.7068873856657 175.446633607572,-37.7045627724835 175.46731776545,-37.6311749428137 175.468010520596,-37.6311839417076 175.75989021492,-37.6819836599337 175.759889856814,-37.6820051679817 175.739424097003,-37.757826933992 175.739359440859,-37.7578262423109 175.446585079397,-37.7068873856657"), .Names = "coordinates")), .Names = "LinearRing"),
.attrs = structure("EPSG:4326", .Names = "srsName")), .Names = c("outerBoundaryIs",
".attrs"))), .Names = "Polygon"), .attrs = structure("EPSG:4326", .Names = "srsName")), .Names = c("surfaceMembers",
".attrs"))), .Names = "MultiSurface"), centerOf = structure(list(
Point = structure(list(pos = "175.603162359 -37.6944367036",
.attrs = structure("EPSG:4326", .Names = "srsName")), .Names = c("pos",
".attrs"))), .Names = "Point"), geographicLocation = structure(list(
topLeft = structure(list(latitude = "-37.6311749428",
longitude = "175.446585079"), .Names = c("latitude",
"longitude")), topRight = structure(list(latitude = "-37.6311749428",
longitude = "175.759890215"), .Names = c("latitude",
"longitude")), bottomRight = structure(list(latitude = "-37.757826934",
longitude = "175.759890215"), .Names = c("latitude",
"longitude")), bottomLeft = structure(list(latitude = "-37.757826934",
longitude = "175.446585079"), .Names = c("latitude",
"longitude"))), .Names = c("topLeft", "topRight", "bottomRight",
"bottomLeft"))), .Names = c("multiExtentOf", "centerOf",
"geographicLocation"))), .Names = "Footprint"), resultOf = structure(list(
EarthObservationResult = structure(list(product = structure(list(
ProductInformation = structure(list(fileName = "20170127_213132_0e0e_3B_AnalyticMS.tif",
productFormat = "GeoTIFF", spatialReferenceSystem = structure(list(
epsgCode = "32760", geodeticDatum = "WGS_1984",
projection = "WGS 84 / UTM zone 60S", projectionZone = "160"), .Names = c("epsgCode",
"geodeticDatum", "projection", "projectionZone"
)), resamplingKernel = "CC", numRows = "4565",
numColumns = "9194", numBands = "4", rowGsd = "3.0",
columnGsd = "3.0", radiometricCorrectionApplied = "true",
geoCorrectionLevel = "Precision Geocorrection",
elevationCorrectionApplied = "FineDEM", atmosphericCorrectionApplied = "false"), .Names = c("fileName",
"productFormat", "spatialReferenceSystem", "resamplingKernel",
"numRows", "numColumns", "numBands", "rowGsd", "columnGsd",
"radiometricCorrectionApplied", "geoCorrectionLevel",
"elevationCorrectionApplied", "atmosphericCorrectionApplied"
))), .Names = "ProductInformation"), mask = structure(list(
MaskInformation = structure(list(type = "UNUSABLE DATA",
format = "RASTER", referenceSystemIdentifier = structure(list(
text = "32760", .attrs = structure("EPSG", .Names = "codeSpace")), .Names = c("text",
".attrs")), fileName = "20170127_213132_0e0e_3B_AnalyticMS_DN_udm.tif"), .Names = c("type",
"format", "referenceSystemIdentifier", "fileName"
))), .Names = "MaskInformation"), cloudCoverPercentage = structure(list(
text = "0.01", .attrs = structure("percentage", .Names = "uom")), .Names = c("text",
".attrs")), cloudCoverPercentageQuotationMode = "AUTOMATIC",
unusableDataPercentage = structure(list(text = "0.0",
.attrs = structure("percentage", .Names = "uom")), .Names = c("text",
".attrs")), bandSpecificMetadata = structure(list(
bandNumber = "1", comment = NULL, radiometricScaleFactor = "0.01",
comment = NULL, reflectanceCoefficient = "2.21386105481e-05"), .Names = c("bandNumber",
"comment", "radiometricScaleFactor", "comment", "reflectanceCoefficient"
)), bandSpecificMetadata = structure(list(bandNumber = "2",
comment = NULL, radiometricScaleFactor = "0.01",
comment = NULL, reflectanceCoefficient = "2.31474175457e-05"), .Names = c("bandNumber",
"comment", "radiometricScaleFactor", "comment", "reflectanceCoefficient"
)), bandSpecificMetadata = structure(list(bandNumber = "3",
comment = NULL, radiometricScaleFactor = "0.01",
comment = NULL, reflectanceCoefficient = "2.60208594123e-05"), .Names = c("bandNumber",
"comment", "radiometricScaleFactor", "comment", "reflectanceCoefficient"
)), bandSpecificMetadata = structure(list(bandNumber = "4",
comment = NULL, radiometricScaleFactor = "0.01",
comment = NULL, reflectanceCoefficient = "3.83481925626e-05"), .Names = c("bandNumber",
"comment", "radiometricScaleFactor", "comment", "reflectanceCoefficient"
))), .Names = c("product", "mask", "cloudCoverPercentage",
"cloudCoverPercentageQuotationMode", "unusableDataPercentage",
"bandSpecificMetadata", "bandSpecificMetadata", "bandSpecificMetadata",
"bandSpecificMetadata"))), .Names = "EarthObservationResult"),
.attrs = structure(c("http://schemas.planet.com/ps/v1/planet_product_metadata_geocorrected_level http://schemas.planet.com/ps/v1/planet_product_metadata_geocorrected_level.xsd",
"1.2.1", "1.0"), class = structure("XMLAttributes", package = "XML"), namespaces = structure(c("xsi",
"", ""), .Names = c("http://www.w3.org/2001/XMLSchema-instance",
"", "")), .Names = c("schemaLocation", "version", "planet_standard_product_version"
))), .Names = c("metaDataProperty", "validTime", "using",
"target", "resultOf", ".attrs"))
As you did not provide any reproducible data, the following attempt may not work:
# Initialise vectors:
b <- vector(mode = "character", length = 4)
coeffs <- vector(mode = "character", length = 4)
# Get coefficients
for(i in 6:9) {
b[i] = get("bandNumber", nodes[[i]])
coeffs[i] <- ifelse(b[i] %in% 6:9),
get("reflectanceCoefficient", nodes[[i]]), # Yes cond val
NA) # No cond val
}
coeffs
(edited to answer the updated question)
Have a look at these answers to work with original xml data: How to parse XML to R data frame
You already parsed the xml file and now you have lists. I think package purrr (https://purrr.tidyverse.org/) helps a lot in this case.
I assume that we know the path to the EarthObservationResult. Note how we extract reflectanceCoefficient from all sub-nodes and discard the NULL elements with compact.
library(tidyverse)
nodes <- matadata.list$resultOf$EarthObservationResult
coefff <- nodes %>%
purrr::map("reflectanceCoefficient") %>%
purrr::compact() %>%
purrr::map_dbl(~ as.numeric(.x)) %>%
purrr::set_names(nm = NULL)
print(coeffs)
#> [1] 2.213861e-05 2.314742e-05 2.602086e-05 3.834819e-05
Created on 2018-08-28 by the reprex package (v0.2.0).
I have two data frames: users and events.
Both data frames contain a field that links events to users.
How can I create a for loop where every user's unique ID is matched against an event of a particular type and then stores the number of occurrences into a new column within users (users$conversation_started, users$conversation_missed, etc.)?
In short, it is a conditional for loop.
So far I have this but it is wrong:
for(i in users$id){
users$conversation_started <- nrow(event[event$type = "conversation-started"])
}
An example of how to do this would be ideal.
The idea is:
for(each user)
find the matching user ID in events
count the number of event types == "conversation-started"
assign count value to user$conversation_started
end for
Important note:
The type field can contain one of five values so I will need to be able to effectively filter on each type for each associate:
> events$type %>% table %>% as.matrix
[,1]
conversation-accepted 3120
conversation-already-accepted 19673
conversation-declined 27
conversation-missed 831
conversation-request 23427
Data frames (note that these are reduced versions as confidential information has been removed):
users <- structure(list(`_id` = c("JTuXhdI4Ai", "iGIeCEXyVE", "6XFtOJh0bD",
"mNN986oQv9", "9NI71KBMX9", "x1jH7t0Cmy"), language = c("en",
"en", "en", "en", "en", "en"), registering = c(TRUE, TRUE, FALSE,
FALSE, FALSE, NA), `_created_at` = structure(c(1485995043.131,
1488898839.838, 1480461193.146, 1481407887.979, 1489942757.189,
1491311381.916), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
`_updated_at` = structure(c(1521039527.236, 1488898864.834,
1527618624.877, 1481407959.116, 1490043838.561, 1491320333.09
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), lastOnlineTimestamp = c(1521039526.90314,
NA, 1480461472, 1481407959, 1490043838, NA), isAgent = c(FALSE,
NA, FALSE, FALSE, FALSE, NA), lastAvailableTime = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = c("POSIXct",
"POSIXt"), tzone = ""), available = c(NA, NA, NA, NA, NA,
NA), busy = c(NA, NA, NA, NA, NA, NA), joinedTeam = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = c("POSIXct",
"POSIXt"), tzone = ""), timezone = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
)), row.names = c("list.1", "list.2", "list.3", "list.4",
"list.5", "list.6"), class = "data.frame")
and
events <- structure(list(`_id` = c("JKY8ZwkM1S", "CG7Xj8dAsA", "pUkFFxoahy",
"yJVJ34rUCl", "XxXelkIFh7", "GCOsENVSz6"), expirationTime = structure(c(1527261147.873,
NA, 1527262121.332, NA, 1527263411.619, 1527263411.619), class = c("POSIXct",
"POSIXt"), tzone = ""), partId = c("d22bfddc-cd51-489f-aec8-5ab9225c0dd5",
"d22bfddc-cd51-489f-aec8-5ab9225c0dd5", "cf4356da-b63e-4e4d-8e7b-fb63035801d8",
"cf4356da-b63e-4e4d-8e7b-fb63035801d8", "a720185e-c300-47c0-b30d-64e1f272d482",
"a720185e-c300-47c0-b30d-64e1f272d482"), type = c("conversation-request",
"conversation-accepted", "conversation-request", "conversation-accepted",
"conversation-request", "conversation-request"), `_p_conversation` = c("Conversation$6nSaLeWqs7",
"Conversation$6nSaLeWqs7", "Conversation$6nSaLeWqs7", "Conversation$6nSaLeWqs7",
"Conversation$bDuAYSZgen", "Conversation$bDuAYSZgen"), `_p_merchant` = c("Merchant$0A2UYADe5x",
"Merchant$0A2UYADe5x", "Merchant$0A2UYADe5x", "Merchant$0A2UYADe5x",
"Merchant$0A2UYADe5x", "Merchant$0A2UYADe5x"), `_p_associate` = c("D9ihQOWrXC",
"D9ihQOWrXC", "D9ihQOWrXC", "D9ihQOWrXC", "D9ihQOWrXC", "D9ihQOWrXC"
), `_wperm` = list(list(), list(), list(), list(), list(), list()),
`_rperm` = list("*", "*", "*", "*", "*", "*"), `_created_at` = structure(c(1527264657.998,
1527264662.043, 1527265661.846, 1527265669.435, 1527266922.056,
1527266922.059), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
`_updated_at` = structure(c(1527264657.998, 1527264662.043,
1527265661.846, 1527265669.435, 1527266922.056, 1527266922.059
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), read = c(TRUE,
NA, TRUE, NA, NA, NA), data.customerName = c("Shopper 109339",
NA, "Shopper 109339", NA, "Shopper 109364", "Shopper 109364"
), data.departmentName = c("Personal advisors", NA, "Personal advisors",
NA, "Personal advisors", "Personal advisors"), data.recurring = c(FALSE,
NA, TRUE, NA, FALSE, FALSE), data.new = c(TRUE, NA, FALSE,
NA, TRUE, TRUE), data.missed = c(0L, NA, 0L, NA, 0L, 0L),
data.customerId = c("84uOFRLmLd", "84uOFRLmLd", "84uOFRLmLd",
"84uOFRLmLd", "5Dw4iax3Tj", "5Dw4iax3Tj"), data.claimingTime = c(NA,
4L, NA, 7L, NA, NA), data.lead = c(NA, NA, FALSE, NA, NA,
NA), data.maxMissed = c(NA, NA, NA, NA, NA, NA), data.associateName = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), data.maxDecline = c(NA, NA, NA, NA, NA, NA
), data.goUnavailable = c(NA, NA, NA, NA, NA, NA)), row.names = c("list.1",
"list.2", "list.3", "list.4", "list.5", "list.6"), class = "data.frame")
Update: 21st September 2018
This solution now results in an NA-only data frame being produced at the end of the function. When written to a .csv, this is what I get (naturally, Excel displays NA-values as blank values):
My data source has not changed, nor has my script.
What might be causing this?
My guess is that this is an unforeseen case where there may have been 0 hits for each step has occurred; as such, is there a way to add 0 to those cases where there weren't any hits, rather than NA/ blank values?
Is there a way to avoid this?
New solution based on the provided data.
Note: As your data had no overlap in _id, I changed the events$_id to be the same as in users.
Simplified example data:
users <- structure(list(`_id` = structure(c(4L, 3L, 1L, 5L, 2L, 6L),
.Label = c("6XFtOJh0bD", "9NI71KBMX9", "iGIeCEXyVE",
"JTuXhdI4Ai", "mNN986oQv9", "x1jH7t0Cmy"),
class = "factor")), .Names = "_id",
row.names = c(NA, -6L), class = "data.frame")
events <- structure(list(`_id` = c("JKY8ZwkM1S", "CG7Xj8dAsA", "pUkFFxoahy",
"yJVJ34rUCl", "XxXelkIFh7", "GCOsENVSz6"),
type = c("conversation-request", "conversation-accepted",
"conversation-request", "conversation-accepted",
"conversation-request", "conversation-request")),
.Names = c("_id", "type"), class = "data.frame",
row.names = c("list.1", "list.2", "list.3", "list.4", "list.5", "list.6"))
events$`_id` <- users$`_id`
> users
_id
1 JTuXhdI4Ai
2 iGIeCEXyVE
3 6XFtOJh0bD
4 mNN986oQv9
5 9NI71KBMX9
6 x1jH7t0Cmy
> events
_id type
list.1 JTuXhdI4Ai conversation-request
list.2 iGIeCEXyVE conversation-accepted
list.3 6XFtOJh0bD conversation-request
list.4 mNN986oQv9 conversation-accepted
list.5 9NI71KBMX9 conversation-request
list.6 x1jH7t0Cmy conversation-request
We can use the same approach I suggested before, just enhance it a bit.
First we loop over unique(events$type) to store a table() of every type of event per id in a list:
test <- lapply(unique(events$type), function(x) table(events$`_id`, events$type == x))
Then we store the specific type as the name of the respective table in the list:
names(test) <- unique(events$type)
Now we use a simple for-loop to match() the user$_id with the rownames of the table and store the information in a new variable with the name of the event type:
for(i in names(test)){
users[, i] <- test[[i]][, 2][match(users$`_id`, rownames(test[[i]]))]
}
Result:
> users
_id conversation-request conversation-accepted
1 JTuXhdI4Ai 1 0
2 iGIeCEXyVE 0 1
3 6XFtOJh0bD 1 0
4 mNN986oQv9 0 1
5 9NI71KBMX9 1 0
6 x1jH7t0Cmy 1 0
Hope this helps!
I wanted to do sentimental analysis in R using qdap package.
It gives out a data frame containing all.all, all.wc, all.polarity, all.pos.words, all.neg.words etc.
I want to extract the values of all.polarity, all.pos.words,all.neg.words but when i use
sentiment$all.polarity or sentiment$all.pos.words,
I get NULL in result.
dput(head(sentiment))
list(structure(list(all = c("all", "all", "all"), wc = c(44L,
1L, 1L), polarity = c(-0.422115882408869, 0, 0), pos.words = list(
"-", "-", "-"), neg.words = list(c("disappointed", "issue"
), "-", "-"), text.var = c("list(list(content = \" misleaded icici bank customer care branch excutive really disappointed bank dont know steps take get issue fixed\", meta = list(author = character(0), datetimestamp = list(sec = 20.097678899765, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(61L,
1L, 1L), polarity = c(0, 0, 0), pos.words = list("led", "-",
"-"), neg.words = list("expire", "-", "-"), text.var = c("list(list(content = \" didnt know customer banking icici years will led people looking student travel card staff mg road treat customers tried offer card wud expire one year n told get new card one year dont know\", meta = list(author = character(0), datetimestamp = list(sec = 20.3989679813385, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(58L,
1L, 1L), polarity = c(0, 0, 0), pos.words = list("top", "-",
"-"), neg.words = list("worst", "-", "-"), text.var = c("list(list(content = \" asked staff can upgrade platinum coral card documentation fee will involoved even receiving card poeple sill keep calling top levied rs joining fee interested paying card one worst customer care experienced\", meta = list(author = character(0), datetimestamp = list(sec = 20.648964881897, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", \n origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(59L,
1L, 1L), polarity = c(-0.494717861727131, 0, 0), pos.words = list(
"-", "-", "-"), neg.words = list(c("long time", "long time",
"disappointed"), "-", "-"), text.var = c("list(list(content = \" applied credit card corporate scheme long time back got verification call also long time back initially getting least response executive now longer picking call neither letting know status application extremely disappointed service\", meta = list(author = character(0), datetimestamp = list(sec = 20.8989698886871, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", \n language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(66L,
1L, 1L), polarity = c(0.0246182981958665, 0, 0), pos.words = list(
c("work", "support"), "-", "-"), neg.words = list("disappointed",
"-", "-"), text.var = c("list(list(content = \" otp service working used work month decided change everything im getting otp sms registered mobile number ive tried contacting customer support several times keep asking send sms despite done several times several days havent received otps ever really disappointed\", meta = list(author = character(0), datetimestamp = list(sec = 21.1935319900513, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), \n heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(50L,
1L, 1L), polarity = c(-0.282842712474619, 0, 0), pos.words = list(
"-", "-", "-"), neg.words = list(c("pathetic", "lied"
), "-", "-"), text.var = c("list(list(content = \" pathetic service behavior icici bank facing past days icici executive lied luring upgrade debit card terms conditions just opposite booklet received told phone\", meta = list(author = character(0), datetimestamp = list(sec = 21.4258019924164, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"))
Can anyone suggest how to do this?
The following works for me -
library(qdap)
text <- "I am liking the work " # the text for which polarity score is needed
sentiment <- polarity(text) #make the call
sentiment$all$pos.words # returns the positive words detected by the algo
#[[1]]
#[1] "liking" "work"
sentiment$all$polarity # returns the sentence polarity score
#[1] 0.8944272