I'm new to working with nested lists, so I'm hoping the solution provided can also provide some commenting on the how. I have a nested list that I scraped using jsonlite. How do I take how to take the list data for all teams, and bind together into a single data.frame? The list is setup below. I copied one element of the list (for 1 team)
Here is the code I used to get to the list that I've pasted below. I'm showing simply so that I can provide how the list is setup.
json <-
url %>%
fromJSON(simplifyDataFrame = T)
df <- json$body$rosters
# DF with each team showing up on it's own line, but nested lists in players
df_teams <- df$teams
# One teams worth of data
JSON_list <- df_teams[1, ]
My list content is below.
JSON_list <- structure(list(
projected_points = NA, long_abbr = "KE", lineup_status = "ok",
short_name = "Kramerica", total_roster_salary = 22L, division = "",
players = list(structure(list(
firstname = c(
"Jonathan", "Anthony"
), wildcards = structure(list(
contract = c("1", "1"),
salary = c("1", "21")
), class = "data.frame", row.names = c(
NA,
2L
)), on_waivers = c(
0L, 0L
), photo = c(
"http://sports.cbsimg.net/images/baseball/mlb/players/170x170/1657581.png",
"http://sports.cbsimg.net/images/baseball/mlb/players/170x170/1670417.png"
),
eligible_for_offense_and_defense = c(0L, 0L),
opponents = list(
structure(list(
game_id = c(
"", ""
), weather_error = c(
"Weather is not available for this game yet",
"Weather is not available for this game yet"
),
weather_icon_code = c(
"", ""
), home_team = c("true", "true"),
abbrev = c("OAK", "OAK"),
time = c(
1553803620L,
1553911620L
),
date = c(
"20190328",
"20190329"
), weather_icon_url = c(
"", ""
), venue_type = c("", ""), game_abbr = c("", ""),
weather = c("", ""), temperature = c(
NA, NA
)
), class = "data.frame", row.names = c(NA, 2L)),
structure(list(game_id = c("", "", ""), weather_error = c(
"Weather is not available for this game yet",
"Weather is not available for this game yet", "Weather is not available for this game yet"
), weather_icon_code = c("", "", ""), home_team = c(
"true",
"true", "true"
), abbrev = c("TEX", "TEX", "TEX"), time = c(
1553803500L,
1553990700L, 1554062700L
), date = c(
"20190328", "20190330",
"20190331"
), weather_icon_url = c("", "", ""), venue_type = c(
"",
"", ""
), game_abbr = c("", "", ""), weather = c(
"", "",
""
), temperature = c(NA, NA, NA)), class = "data.frame", row.names = c(
NA,
3L
))
), icons = structure(list(
headline = c(
"Angels' Jonathan Lucroy: Inks deal with Angels",
NA
),
hot = c(NA, 1L),
cold = c(1L, NA),
injury = c(
"Knee: Questionable for start of season",
NA
)
), class = "data.frame", row.names = c(NA, 21L)), elias_id = c(
"LUC758619", "RIZ253611"
), percentstarted = c(
"48%", "97%"
),
profile_link = c(
"<a class='playerLink' aria-label=' Jonathan Lucroy C LAA' href='http://baseball.cbssports.com/players/playerpage/1657581'>Jonathan Lucroy</a> <span class=\"playerPositionAndTeam\">C | LAA</span> ",
"<a class='playerLink' aria-label=' Anthony Rizzo 1B CHC' href='http://baseball.cbssports.com/players/playerpage/1670417'>Anthony Rizzo</a> <span class=\"playerPositionAndTeam\">1B | CHC</span>"
),
id = c(
"1657581", "1670417"
), pro_status = c(
"A", "A"
), on_waivers_until = c(NA, NA), jersey = c("20", "44"),
percentowned = c("61%", "99%"),
pro_team = c(
"LAA", "CHC"
), position = c(
"C", "1B"
), lastname = c(
"Lucroy", "Rizzo"
),
roster_pos = c("C", "1B"),
update_type = c("normal", "normal"),
age = c(
32L, 29L
), eligible = c(
"C,U", "1B,U"
), is_locked = c(
0L,
0L
), bats = c(
"R", "L"
), owned_by_team_id = c(
12L, 12L
), ytd_points = c(
0L, 0L
), roster_status = c(
"A", "A"
), is_keeper = c(
0L, 0L
), profile_url = c(
"http://baseball.cbssports.com/players/playerpage/1657581",
"http://baseball.cbssports.com/players/playerpage/1670417"
), fullname = c(
"Jonathan Lucroy", "Anthony Rizzo"
), throws = c(
"R",
"L"
), headline = c(
"Angels' Jonathan Lucroy: Inks deal with Angels",
NA
), `starting-pitcher-today` = c(
NA, "false"
), injury = c(NA, "Knee"), return = c(
"Questionable for start of season",
NA
)
), class = "data.frame", row.names = c(NA, 2L))),
name = "Kramerica Enterprises", logo = "http://baseball.cbssports.com/images/team-logo/main-36x36.jpg",
abbr = "KE", point = "20190328", id = "12", active_roster_salary = 22L,
warning = structure(list(description = NA_character_), row.names = 1L, class = "data.frame")
), row.names = 1L, class = "data.frame")
# Desired table sample (does not include all columns)
tibble::tribble(
~projected_points, ~long_abbr, ~lineup_status, ~short_name, ~total_roster_salary, ~division, ~name, ~logo, ~abbr, ~point5, ~active_roster_salary, ~id2, ~firstname, ~contract, ~salary,
NA, "KE", "ok", "Kramerica", 22, NA, "Biloxi Blackjacks", NA, "KE", 20190328, 22, 1657581, "Jonathan", 1, 1
)
The issue I'm running into is that the players column looks to be a nested df, and also has other nested df in it. Specifically: "wildcards", "opponents" and "icons". I am looking for a data frame that contains all of the columns. For the nested lists, I'd like their content to show up as columns for that particular player. I.E. Wildcards, create a column for "contract" and "salary". Also, how would I bind the list together if I wanted to specifically choose columns from JSON_list I.E. "long_abbr", "lineup_status", etc. from the and "firstname", both wildcard columns, "id", and some other from the JSON_list$players?
You can isolate the list elements using [[]] and the columns using [] if you have a nested structure. If the number if rows are equal, you can directly make your dataframe using cbind
Let's make a reproducible example
Create 3 data frames of similar dimensions
df1 <- data.frame(var1=c('a', 'b', 'c'), var2=c('d', 'e', 'f'), var3=1:3)
df2 <- data.frame(var4=c('g', 'h', 'i'), var5=c('j', 'k', 'l'), var6=4:6)
df3 <- data.frame(var7=c(6:8), var8=c('j', 'k', 'l'), var9=4:6)
Put the data frames in a nested list structure
list <- list(df1,df2)
nested.list <- list(list, df3)
Make a binded data frame made of var2, var6 and var7
binded.df <- cbind(nested.list[[1]][[1]][2],nested.list[[1]][[2]][3],nested.list[[2]][1])
Related
From a list from this process:
library(stackr)
df <- data.frame (qid = c(71663375, 71674701, 71724524))
lst1 <- split(df$qid, as.integer(gl(nrow(df), 100, nrow(df))))
out <- vector('list', length(lst1))
for(i in seq_along(lst1)) {
out[[i]] <- stack_questions(lst1[[i]])
}
How is it possible to create from out list a new dataframe with the columns tags, creation_date, question_id?
dput of the out list
dput(out)
list(structure(list(tags = c("r", "r", "sql,dataexplorer"), is_answered = c(TRUE,
TRUE, FALSE), view_count = c(33L, 19L, 27L), accepted_answer_id = c(71724636L,
71674900L, NA), answer_count = c(1L, 1L, 1L), score = c(0L, 0L,
0L), last_activity_date = structure(c(1648978330, 1648633121,
1648563500), tzone = "", class = c("POSIXct", "POSIXt")), creation_date = structure(c(1648977343,
1648632306, 1648562092), tzone = "", class = c("POSIXct", "POSIXt"
)), last_edit_date = structure(c(1648977839, 1648632778, 1648562436
), tzone = "", class = c("POSIXct", "POSIXt")), question_id = c(71724524L,
71674701L, 71663375L), content_license = c("CC BY-SA 4.0", "CC BY-SA 4.0",
"CC BY-SA 4.0"), link = c("https://stackoverflow.com/questions/71724524/melt-a-dataframe-using-a-list-column",
"https://stackoverflow.com/questions/71674701/create-a-new-column-using-detecting-the-domain-of-a-url-from-an-existing-column",
"https://stackoverflow.com/questions/71663375/paginate-pages-to-receive-results-from-tsql"
), title = c("Melt a dataframe using a list column", "Create a new column using detecting the domain of a url from an existing column",
"Paginate pages to receive results from tSQL"), owner_account_id = c(24733596L,
24733596L, 24733596L), owner_reputation = c(17L, 17L, 17L), owner_user_id = c(18621268L,
18621268L, 18621268L), owner_user_type = c("registered", "registered",
"registered"), owner_profile_image = c("https://lh3.googleusercontent.com/a/AATXAJwQRtIYRrvKJi1a4AfvTHoE4ht8f_WQ1Qv3jtbr=k-s256",
"https://lh3.googleusercontent.com/a/AATXAJwQRtIYRrvKJi1a4AfvTHoE4ht8f_WQ1Qv3jtbr=k-s256",
"https://lh3.googleusercontent.com/a/AATXAJwQRtIYRrvKJi1a4AfvTHoE4ht8f_WQ1Qv3jtbr=k-s256"
), owner_display_name = c("Domin D", "Domin D", "Domin D"), owner_link = c("https://stackoverflow.com/users/18621268/domin-d",
"https://stackoverflow.com/users/18621268/domin-d", "https://stackoverflow.com/users/18621268/domin-d"
)), row.names = c(NA, -3L), class = "data.frame", metadata = list(
has_more = FALSE, quota_max = 10000L, quota_remaining = 1323L)))
out[[1]][c('tags', 'creation_date', 'question_id')]
tags creation_date question_id
1 r 2022-04-03 05:15:43 71724524
2 r 2022-03-30 05:25:06 71674701
3 sql,dataexplorer 2022-03-29 09:54:52 71663375
Or if out is a list containing multiple data frames per element:
lapply(out, function(x) x[c('tags', 'creation_date', 'question_id')])
I have a list containing multiple lists which all have the same structure:
ls <- list(
one = list(df = data.frame(var1_1 = c(1, 1, 1),
var1_2 = c('a', 'a', 'a')),
ls = list(n_df_1 = data.frame(var3_1 = c('x', 'x', 'x'),
var3_2 = c(4, 4, 4))),
name = c("one", "one", "one")),
two = list(df = data.frame(var1_1 = c(1, 1, 1),
var1_2 = c('a', 'a', 'a')),
ls = list(n_df_1 = data.frame(var3_1 = c('x', 'x', 'x'),
var3_2 = c(4, 4, 4))),
name = c("two", "two", "two")))
I want to merge all these nested lists like stated here: Merge Two Lists in R
It does exactly what want if I do this:
merged <- mapply(c, ls[[1]], ls[[2]], SIMPLIFY = FALSE)
The problem is, is that the main list (ls) doesn't always have only two nested lists. How can I make this code more modular?
I tried to make a vector containing all indexes of the nested lists:
sapply(seq_along(ls), function(x) paste0("ls[[", x, "]]"))
Which output this:
[1] "ls[[1]]" "ls[[2]]"
I thought I could unquote these character vector so that R sees them as object. But I can't figure out how to do that (if it's even possible). I looked at tidy eval for this, but I'm don't know if this is the way to do it.
Any suggestions?
You can use Reduce to do it on an abstract number of list elements, i.e.
Reduce(function(...)Map(c, ...), l1) #Map = mapply(..., simplify = FALSE)
which gives,
$df
$df$var1_1
[1] 1 1 1
$df$var1_2
[1] a a a
Levels: a
$df$var1_1
[1] 1 1 1
$df$var1_2
[1] a a a
Levels: a
$ls
$ls$n_df_1
var3_1 var3_2
1 x 4
2 x 4
3 x 4
$ls$n_df_1
var3_1 var3_2
1 x 4
2 x 4
3 x 4
$name
[1] "one" "one" "one" "two" "two" "two"
DATA:
dput(l1)
list(one = list(df = structure(list(var1_1 = c(1, 1, 1), var1_2 = structure(c(1L,
1L, 1L), .Label = "a", class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), ls = list(n_df_1 = structure(list(var3_1 = structure(c(1L,
1L, 1L), .Label = "x", class = "factor"), var3_2 = c(4, 4, 4)), class = "data.frame", row.names = c(NA,
-3L))), name = c("one", "one", "one")), two = list(df = structure(list(
var1_1 = c(1, 1, 1), var1_2 = structure(c(1L, 1L, 1L), .Label = "a", class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), ls = list(n_df_1 = structure(list(var3_1 = structure(c(1L,
1L, 1L), .Label = "x", class = "factor"), var3_2 = c(4, 4, 4)), class = "data.frame", row.names = c(NA,
-3L))), name = c("two", "two", "two")))
I want to access a variable 'bandSpecificMatadata' from a multi-dimensional list in R, and create a vector of 'reflectanceCoefficient' for my remote sensing project.
Firstly, I was able to reduce the dimension of the list and then used nodes <- get('EarthObservationResult', matadata.list$resultOf) to exact the list.
Then it comes a problem when I try to create something like (bandNumber1 corresponds to reflectance coefficient 2.21e-5) using FOR loop.
for(node in nodes[6:9]) {
bn = get("bandNumber", node)
if(bn %in% c('1','2','3','4')){
i = integer(bn)
coeffs = get("reflectanceCoefficient", node)
}
print(coeffs)
}
which prints out:
[1] "2.21386105481e-05"
[1] "2.31474175457e-05"
[1] "2.60208594123e-05"
[1] "3.83481925626e-05"
But I want a vector with 1, 2, 3, 4 with the corresponding numbers. It seems to me that the number overwrites the last one every time it prints.
Then I tried:
for(node in nodes[6:9]) {
n = 1:4
b[n] = get("bandNumber", node)
if(b[n] %in% c('1','2','3','4')){
i = integer(b[n])
coeffs[i] = get("reflectanceCoefficient", node)
}
print(coeffs)
}
But turns out
Error in integer(b[n]) : invalid 'length' argument
In addition: Warning message:
In if (b[n] %in% c("1", "2", "3", "4")) { :
the condition has length > 1 and only the first element will be used
How do I fix this?
I used XML::xmlParse() to parse the xml and matadata.list <- XML::xmlToList() to convert the data to list.
For reproducible example, see below:
dput(matadata.list)
structure(list(metaDataProperty = structure(list(EarthObservationMetaData = structure(list(
identifier = "20170127_213132_0e0e_3B_AnalyticMS", acquisitionType = "NOMINAL",
productType = "L3B", status = "ARCHIVED", downlinkedTo = structure(list(
DownlinkInformation = structure(list(acquisitionStation = structure(list(
text = "Planet Ground Station Network", .attrs = structure("urn:eop:PS:stationLocation", .Names = "codeSpace")), .Names = c("text",
".attrs")), acquisitionDate = "2017-01-27T21:31:32+00:00"), .Names = c("acquisitionStation",
"acquisitionDate"))), .Names = "DownlinkInformation"),
archivedIn = structure(list(ArchivingInformation = structure(list(
archivingCenter = structure(list(text = "Planet Archive Center",
.attrs = structure("urn:eop:PS:stationLocation", .Names = "codeSpace")), .Names = c("text",
".attrs")), archivingDate = "2017-01-27T21:31:32+00:00",
archivingIdentifier = structure(list(text = "385180",
.attrs = structure("urn:eop:PS:dmsCatalogueId", .Names = "codeSpace")), .Names = c("text",
".attrs"))), .Names = c("archivingCenter", "archivingDate",
"archivingIdentifier"))), .Names = "ArchivingInformation"),
processing = structure(list(ProcessingInformation = structure(list(
processorName = "CMO Processor", processorVersion = "4.1.4",
nativeProductFormat = "GeoTIFF"), .Names = c("processorName",
"processorVersion", "nativeProductFormat"))), .Names = "ProcessingInformation"),
license = structure(list(licenseType = "20160101 - Inc - Single User",
resourceLink = structure(c("PL EULA", "https://assets.planet.com/docs/20160101_Inc_SingleUser.txt"
), class = structure("XMLAttributes", package = "XML"), namespaces = structure(c("xlink",
"xlink"), .Names = c("http://www.w3.org/1999/xlink",
"http://www.w3.org/1999/xlink")), .Names = c("title",
"href"))), .Names = c("licenseType", "resourceLink")),
versionIsd = "1.0", pixelFormat = "16U"), .Names = c("identifier",
"acquisitionType", "productType", "status", "downlinkedTo", "archivedIn",
"processing", "license", "versionIsd", "pixelFormat"))), .Names = "EarthObservationMetaData"),
validTime = structure(list(TimePeriod = structure(list(beginPosition = "2017-01-27T21:31:32+00:00",
endPosition = "2017-01-27T21:31:32+00:00"), .Names = c("beginPosition",
"endPosition"))), .Names = "TimePeriod"), using = structure(list(
EarthObservationEquipment = structure(list(platform = structure(list(
Platform = structure(list(shortName = "PlanetScope",
serialIdentifier = "0e0e", orbitType = "LEO-SSO"), .Names = c("shortName",
"serialIdentifier", "orbitType"))), .Names = "Platform"),
instrument = structure(list(Instrument = structure(list(
shortName = "PS2"), .Names = "shortName")), .Names = "Instrument"),
sensor = structure(list(Sensor = structure(list(sensorType = "OPTICAL",
resolution = structure(list(text = "3.0000",
.attrs = structure("m", .Names = "uom")), .Names = c("text",
".attrs")), scanType = "FRAME"), .Names = c("sensorType",
"resolution", "scanType"))), .Names = "Sensor"),
acquisitionParameters = structure(list(Acquisition = structure(list(
orbitDirection = "DESCENDING", incidenceAngle = structure(list(
text = "8.072969e-02", .attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), illuminationAzimuthAngle = structure(list(
text = "7.610387e+01", .attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), illuminationElevationAngle = structure(list(
text = "4.649194e+01", .attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), azimuthAngle = structure(list(text = "1.242074e+01",
.attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), spaceCraftViewAngle = structure(list(
text = "5.692807e-02", .attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), acquisitionDateTime = "2017-01-27T21:31:32+00:00"), .Names = c("orbitDirection",
"incidenceAngle", "illuminationAzimuthAngle", "illuminationElevationAngle",
"azimuthAngle", "spaceCraftViewAngle", "acquisitionDateTime"
))), .Names = "Acquisition")), .Names = c("platform",
"instrument", "sensor", "acquisitionParameters"))), .Names = "EarthObservationEquipment"),
target = structure(list(Footprint = structure(list(multiExtentOf = structure(list(
MultiSurface = structure(list(surfaceMembers = structure(list(
Polygon = structure(list(outerBoundaryIs = structure(list(
LinearRing = structure(list(coordinates = "175.446585079397,-37.7068873856657 175.446633607572,-37.7045627724835 175.46731776545,-37.6311749428137 175.468010520596,-37.6311839417076 175.75989021492,-37.6819836599337 175.759889856814,-37.6820051679817 175.739424097003,-37.757826933992 175.739359440859,-37.7578262423109 175.446585079397,-37.7068873856657"), .Names = "coordinates")), .Names = "LinearRing"),
.attrs = structure("EPSG:4326", .Names = "srsName")), .Names = c("outerBoundaryIs",
".attrs"))), .Names = "Polygon"), .attrs = structure("EPSG:4326", .Names = "srsName")), .Names = c("surfaceMembers",
".attrs"))), .Names = "MultiSurface"), centerOf = structure(list(
Point = structure(list(pos = "175.603162359 -37.6944367036",
.attrs = structure("EPSG:4326", .Names = "srsName")), .Names = c("pos",
".attrs"))), .Names = "Point"), geographicLocation = structure(list(
topLeft = structure(list(latitude = "-37.6311749428",
longitude = "175.446585079"), .Names = c("latitude",
"longitude")), topRight = structure(list(latitude = "-37.6311749428",
longitude = "175.759890215"), .Names = c("latitude",
"longitude")), bottomRight = structure(list(latitude = "-37.757826934",
longitude = "175.759890215"), .Names = c("latitude",
"longitude")), bottomLeft = structure(list(latitude = "-37.757826934",
longitude = "175.446585079"), .Names = c("latitude",
"longitude"))), .Names = c("topLeft", "topRight", "bottomRight",
"bottomLeft"))), .Names = c("multiExtentOf", "centerOf",
"geographicLocation"))), .Names = "Footprint"), resultOf = structure(list(
EarthObservationResult = structure(list(product = structure(list(
ProductInformation = structure(list(fileName = "20170127_213132_0e0e_3B_AnalyticMS.tif",
productFormat = "GeoTIFF", spatialReferenceSystem = structure(list(
epsgCode = "32760", geodeticDatum = "WGS_1984",
projection = "WGS 84 / UTM zone 60S", projectionZone = "160"), .Names = c("epsgCode",
"geodeticDatum", "projection", "projectionZone"
)), resamplingKernel = "CC", numRows = "4565",
numColumns = "9194", numBands = "4", rowGsd = "3.0",
columnGsd = "3.0", radiometricCorrectionApplied = "true",
geoCorrectionLevel = "Precision Geocorrection",
elevationCorrectionApplied = "FineDEM", atmosphericCorrectionApplied = "false"), .Names = c("fileName",
"productFormat", "spatialReferenceSystem", "resamplingKernel",
"numRows", "numColumns", "numBands", "rowGsd", "columnGsd",
"radiometricCorrectionApplied", "geoCorrectionLevel",
"elevationCorrectionApplied", "atmosphericCorrectionApplied"
))), .Names = "ProductInformation"), mask = structure(list(
MaskInformation = structure(list(type = "UNUSABLE DATA",
format = "RASTER", referenceSystemIdentifier = structure(list(
text = "32760", .attrs = structure("EPSG", .Names = "codeSpace")), .Names = c("text",
".attrs")), fileName = "20170127_213132_0e0e_3B_AnalyticMS_DN_udm.tif"), .Names = c("type",
"format", "referenceSystemIdentifier", "fileName"
))), .Names = "MaskInformation"), cloudCoverPercentage = structure(list(
text = "0.01", .attrs = structure("percentage", .Names = "uom")), .Names = c("text",
".attrs")), cloudCoverPercentageQuotationMode = "AUTOMATIC",
unusableDataPercentage = structure(list(text = "0.0",
.attrs = structure("percentage", .Names = "uom")), .Names = c("text",
".attrs")), bandSpecificMetadata = structure(list(
bandNumber = "1", comment = NULL, radiometricScaleFactor = "0.01",
comment = NULL, reflectanceCoefficient = "2.21386105481e-05"), .Names = c("bandNumber",
"comment", "radiometricScaleFactor", "comment", "reflectanceCoefficient"
)), bandSpecificMetadata = structure(list(bandNumber = "2",
comment = NULL, radiometricScaleFactor = "0.01",
comment = NULL, reflectanceCoefficient = "2.31474175457e-05"), .Names = c("bandNumber",
"comment", "radiometricScaleFactor", "comment", "reflectanceCoefficient"
)), bandSpecificMetadata = structure(list(bandNumber = "3",
comment = NULL, radiometricScaleFactor = "0.01",
comment = NULL, reflectanceCoefficient = "2.60208594123e-05"), .Names = c("bandNumber",
"comment", "radiometricScaleFactor", "comment", "reflectanceCoefficient"
)), bandSpecificMetadata = structure(list(bandNumber = "4",
comment = NULL, radiometricScaleFactor = "0.01",
comment = NULL, reflectanceCoefficient = "3.83481925626e-05"), .Names = c("bandNumber",
"comment", "radiometricScaleFactor", "comment", "reflectanceCoefficient"
))), .Names = c("product", "mask", "cloudCoverPercentage",
"cloudCoverPercentageQuotationMode", "unusableDataPercentage",
"bandSpecificMetadata", "bandSpecificMetadata", "bandSpecificMetadata",
"bandSpecificMetadata"))), .Names = "EarthObservationResult"),
.attrs = structure(c("http://schemas.planet.com/ps/v1/planet_product_metadata_geocorrected_level http://schemas.planet.com/ps/v1/planet_product_metadata_geocorrected_level.xsd",
"1.2.1", "1.0"), class = structure("XMLAttributes", package = "XML"), namespaces = structure(c("xsi",
"", ""), .Names = c("http://www.w3.org/2001/XMLSchema-instance",
"", "")), .Names = c("schemaLocation", "version", "planet_standard_product_version"
))), .Names = c("metaDataProperty", "validTime", "using",
"target", "resultOf", ".attrs"))
As you did not provide any reproducible data, the following attempt may not work:
# Initialise vectors:
b <- vector(mode = "character", length = 4)
coeffs <- vector(mode = "character", length = 4)
# Get coefficients
for(i in 6:9) {
b[i] = get("bandNumber", nodes[[i]])
coeffs[i] <- ifelse(b[i] %in% 6:9),
get("reflectanceCoefficient", nodes[[i]]), # Yes cond val
NA) # No cond val
}
coeffs
(edited to answer the updated question)
Have a look at these answers to work with original xml data: How to parse XML to R data frame
You already parsed the xml file and now you have lists. I think package purrr (https://purrr.tidyverse.org/) helps a lot in this case.
I assume that we know the path to the EarthObservationResult. Note how we extract reflectanceCoefficient from all sub-nodes and discard the NULL elements with compact.
library(tidyverse)
nodes <- matadata.list$resultOf$EarthObservationResult
coefff <- nodes %>%
purrr::map("reflectanceCoefficient") %>%
purrr::compact() %>%
purrr::map_dbl(~ as.numeric(.x)) %>%
purrr::set_names(nm = NULL)
print(coeffs)
#> [1] 2.213861e-05 2.314742e-05 2.602086e-05 3.834819e-05
Created on 2018-08-28 by the reprex package (v0.2.0).
I have a nested list of geocoded Moscow street addresses, converted from a nested list. However, the dataframe I was geocoding from had only addresses without zip codes, and in a few hundred (out of 33k) cases, the address returned multiple results for the same street address with different zipcodes. This created additional nesting in the list, which when converted to a dataframe results in a differing number of observations from the initial dataframe.
A result with only one address has the following structure:
(Ignore the gibberish, R console will not render Cyrillic correctly)
structure(list(results = structure(list(address_components = list(
structure(list(long_name = c("4", "óëèöà Áîëüøàÿ Àêàäåìè÷åñêàÿ",
"Ñåâåðíûé àäìèíèñòðàòèâíûé îêðóã", "Ìîñêâà", "Ìîñêâà", "Ðîññèÿ",
"127299"), short_name = c("4", "óë. Áîëüøàÿ Àêàäåìè÷åñêàÿ",
"Ñåâåðíûé àäìèíèñòðàòèâíûé îêðóã", "Ìîñêâà", "Ìîñêâà", "RU",
"127299"), types = list("street_number", "route", c("political",
"sublocality", "sublocality_level_1"), c("locality", "political"
), c("administrative_area_level_2", "political"), c("country",
"political"), "postal_code")), .Names = c("long_name", "short_name",
"types"), class = "data.frame", row.names = c(NA, 7L))),
formatted_address = "óë. Áîëüøàÿ Àêàäåìè÷åñêàÿ, 4, Ìîñêâà, Ðîññèÿ, 127299",
geometry = structure(list(location = structure(list(lat = 55.8176896,
lng = 37.522891), .Names = c("lat", "lng"), class = "data.frame", row.names = 1L),
location_type = "ROOFTOP", viewport = structure(list(
northeast = structure(list(lat = 55.8190385802915,
lng = 37.5242399802915), .Names = c("lat", "lng"
), class = "data.frame", row.names = 1L), southwest = structure(list(
lat = 55.8163406197085, lng = 37.5215420197085), .Names = c("lat",
"lng"), class = "data.frame", row.names = 1L)), .Names = c("northeast",
"southwest"), class = "data.frame", row.names = 1L)), .Names = c("location",
"location_type", "viewport"), class = "data.frame", row.names = 1L),
partial_match = TRUE, place_id = "ChIJ59yLsy1ItUYR5EEBFbFJoSA",
types = list("street_address")), .Names = c("address_components",
"formatted_address", "geometry", "partial_match", "place_id",
"types"), class = "data.frame", row.names = 1L), status = "OK"), .Names = c("results",
"status"))
Whereas a result with multiple possible addresses looks like:
structure(list(results = structure(list(address_components = list(
structure(list(long_name = c("23", "óëèöà Áîëüøàÿ Àêàäåìè÷åñêàÿ",
"Ñåâåðíûé àäìèíèñòðàòèâíûé îêðóã", "Ìîñêâà", "Ìîñêâà", "Ðîññèÿ",
"127299"), short_name = c("23", "óë. Áîëüøàÿ Àêàäåìè÷åñêàÿ",
"Ñåâåðíûé àäìèíèñòðàòèâíûé îêðóã", "Ìîñêâà", "Ìîñêâà", "RU",
"127299"), types = list("street_number", "route", c("political",
"sublocality", "sublocality_level_1"), c("locality", "political"
), c("administrative_area_level_2", "political"), c("country",
"political"), "postal_code")), .Names = c("long_name", "short_name",
"types"), class = "data.frame", row.names = c(NA, 7L)), structure(list(
long_name = c("23", "óëèöà Áîëüøàÿ Àêàäåìè÷åñêàÿ", "Ñåâåðíûé àäìèíèñòðàòèâíûé îêðóã",
"Ìîñêâà", "Ìîñêâà", "Ðîññèÿ", "125008"), short_name = c("23",
"óë. Áîëüøàÿ Àêàäåìè÷åñêàÿ", "Ñåâåðíûé àäìèíèñòðàòèâíûé îêðóã",
"Ìîñêâà", "Ìîñêâà", "RU", "125008"), types = list("street_number",
"route", c("political", "sublocality", "sublocality_level_1"
), c("locality", "political"), c("administrative_area_level_2",
"political"), c("country", "political"), "postal_code")), .Names = c("long_name",
"short_name", "types"), class = "data.frame", row.names = c(NA,
7L))), formatted_address = c("óë. Áîëüøàÿ Àêàäåìè÷åñêàÿ, 23, Ìîñêâà, Ðîññèÿ, 127299",
"óë. Áîëüøàÿ Àêàäåìè÷åñêàÿ, 23, Ìîñêâà, Ðîññèÿ, 125008"), geometry = structure(list(
location = structure(list(lat = c(55.8169112, 55.826859),
lng = c(37.5202899, 37.529427)), .Names = c("lat", "lng"
), class = "data.frame", row.names = 1:2), location_type = c("ROOFTOP",
"ROOFTOP"), viewport = structure(list(northeast = structure(list(
lat = c(55.8182601802915, 55.8282079802915), lng = c(37.5216388802915,
37.5307759802915)), .Names = c("lat", "lng"), class = "data.frame", row.names = 1:2),
southwest = structure(list(lat = c(55.8155622197085,
55.8255100197085), lng = c(37.5189409197085, 37.5280780197085
)), .Names = c("lat", "lng"), class = "data.frame", row.names = 1:2)), .Names = c("northeast",
"southwest"), class = "data.frame", row.names = 1:2)), .Names = c("location",
"location_type", "viewport"), class = "data.frame", row.names = 1:2),
partial_match = c(TRUE, TRUE), place_id = c("ChIJnVMw7C1ItUYRdfeWEQrXuAk",
"ChIJnbnwOdY3tUYR1_D9pHTqCsI"), types = list("street_address",
"street_address")), .Names = c("address_components",
"formatted_address", "geometry", "partial_match", "place_id",
"types"), class = "data.frame", row.names = 1:2), status = "OK"), .Names = c("results",
"status"))
In the results element in the second list, there is an additional level of nesting for each possible address, which when flattened creates an "extra" observation for that address, making it impossible to cbind() the geocoding results back to the list of addresses. I am using the following functions to flatten my nested lists to data-frames. How can I modify them to take only the first address when this additional nesting occurs? If the address is incorrect, the buildings will simply be discarded from the sample when I later merge with another dataframe, so I am concerned only with making each geocoded observation match to the appropriate row in the original dataframe (the source of the addresses).
flatten_googleway <- function(df) {
require(jsonlite)
res <- jsonlite::flatten(df)
res[, names(res) %in% c("geometry.location_type", "geometry.location.lat",
"geometry.location.lng", "formatted_address")]
}
moscowhousegeo.df <- do.call(rbind, lapply(moscowhouse.list, function(x) {
if (length(x$results) == 0) template_res[1, ] else flatten_googleway(x$results)
}))
##template for NA results
structure(list(formatted_address = character(0), geometry.location_type = character(0),
geometry.location.lat = numeric(0), geometry.location.lng = numeric(0)), .Names = c("formatted_address",
"geometry.location_type", "geometry.location.lat", "geometry.location.lng"
), row.names = integer(0), class = "data.frame")
Whoops, I was massively over-complicating things, as usual. I was able to fix this simply by modifying the lapply() call to replace all list elements with no results, and elements where x$results$address_components is greater than length 1 (as is the case when multiple possible results are returned).
moscowhousegeo.df <- do.call(rbind, lapply(moscowhouse.list, function(x) {
if (length(x$results) == 0 | length(x$results$formatted_address) > 1) template_res[1, ] else flatten_googleway(x$results)
}))
I still lose some data this way unfortunately, but identifying which address is correct out of the options given would likely be too time-consuming, and a bit silly in a dataset with so many observations.
I wanted to do sentimental analysis in R using qdap package.
It gives out a data frame containing all.all, all.wc, all.polarity, all.pos.words, all.neg.words etc.
I want to extract the values of all.polarity, all.pos.words,all.neg.words but when i use
sentiment$all.polarity or sentiment$all.pos.words,
I get NULL in result.
dput(head(sentiment))
list(structure(list(all = c("all", "all", "all"), wc = c(44L,
1L, 1L), polarity = c(-0.422115882408869, 0, 0), pos.words = list(
"-", "-", "-"), neg.words = list(c("disappointed", "issue"
), "-", "-"), text.var = c("list(list(content = \" misleaded icici bank customer care branch excutive really disappointed bank dont know steps take get issue fixed\", meta = list(author = character(0), datetimestamp = list(sec = 20.097678899765, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(61L,
1L, 1L), polarity = c(0, 0, 0), pos.words = list("led", "-",
"-"), neg.words = list("expire", "-", "-"), text.var = c("list(list(content = \" didnt know customer banking icici years will led people looking student travel card staff mg road treat customers tried offer card wud expire one year n told get new card one year dont know\", meta = list(author = character(0), datetimestamp = list(sec = 20.3989679813385, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(58L,
1L, 1L), polarity = c(0, 0, 0), pos.words = list("top", "-",
"-"), neg.words = list("worst", "-", "-"), text.var = c("list(list(content = \" asked staff can upgrade platinum coral card documentation fee will involoved even receiving card poeple sill keep calling top levied rs joining fee interested paying card one worst customer care experienced\", meta = list(author = character(0), datetimestamp = list(sec = 20.648964881897, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", \n origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(59L,
1L, 1L), polarity = c(-0.494717861727131, 0, 0), pos.words = list(
"-", "-", "-"), neg.words = list(c("long time", "long time",
"disappointed"), "-", "-"), text.var = c("list(list(content = \" applied credit card corporate scheme long time back got verification call also long time back initially getting least response executive now longer picking call neither letting know status application extremely disappointed service\", meta = list(author = character(0), datetimestamp = list(sec = 20.8989698886871, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", \n language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(66L,
1L, 1L), polarity = c(0.0246182981958665, 0, 0), pos.words = list(
c("work", "support"), "-", "-"), neg.words = list("disappointed",
"-", "-"), text.var = c("list(list(content = \" otp service working used work month decided change everything im getting otp sms registered mobile number ive tried contacting customer support several times keep asking send sms despite done several times several days havent received otps ever really disappointed\", meta = list(author = character(0), datetimestamp = list(sec = 21.1935319900513, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), \n heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(50L,
1L, 1L), polarity = c(-0.282842712474619, 0, 0), pos.words = list(
"-", "-", "-"), neg.words = list(c("pathetic", "lied"
), "-", "-"), text.var = c("list(list(content = \" pathetic service behavior icici bank facing past days icici executive lied luring upgrade debit card terms conditions just opposite booklet received told phone\", meta = list(author = character(0), datetimestamp = list(sec = 21.4258019924164, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"))
Can anyone suggest how to do this?
The following works for me -
library(qdap)
text <- "I am liking the work " # the text for which polarity score is needed
sentiment <- polarity(text) #make the call
sentiment$all$pos.words # returns the positive words detected by the algo
#[[1]]
#[1] "liking" "work"
sentiment$all$polarity # returns the sentence polarity score
#[1] 0.8944272