Get all.polarity value from qdap package results in R - r

I wanted to do sentimental analysis in R using qdap package.
It gives out a data frame containing all.all, all.wc, all.polarity, all.pos.words, all.neg.words etc.
I want to extract the values of all.polarity, all.pos.words,all.neg.words but when i use
sentiment$all.polarity or sentiment$all.pos.words,
I get NULL in result.
dput(head(sentiment))
list(structure(list(all = c("all", "all", "all"), wc = c(44L,
1L, 1L), polarity = c(-0.422115882408869, 0, 0), pos.words = list(
"-", "-", "-"), neg.words = list(c("disappointed", "issue"
), "-", "-"), text.var = c("list(list(content = \" misleaded icici bank customer care branch excutive really disappointed bank dont know steps take get issue fixed\", meta = list(author = character(0), datetimestamp = list(sec = 20.097678899765, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(61L,
1L, 1L), polarity = c(0, 0, 0), pos.words = list("led", "-",
"-"), neg.words = list("expire", "-", "-"), text.var = c("list(list(content = \" didnt know customer banking icici years will led people looking student travel card staff mg road treat customers tried offer card wud expire one year n told get new card one year dont know\", meta = list(author = character(0), datetimestamp = list(sec = 20.3989679813385, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(58L,
1L, 1L), polarity = c(0, 0, 0), pos.words = list("top", "-",
"-"), neg.words = list("worst", "-", "-"), text.var = c("list(list(content = \" asked staff can upgrade platinum coral card documentation fee will involoved even receiving card poeple sill keep calling top levied rs joining fee interested paying card one worst customer care experienced\", meta = list(author = character(0), datetimestamp = list(sec = 20.648964881897, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", \n origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(59L,
1L, 1L), polarity = c(-0.494717861727131, 0, 0), pos.words = list(
"-", "-", "-"), neg.words = list(c("long time", "long time",
"disappointed"), "-", "-"), text.var = c("list(list(content = \" applied credit card corporate scheme long time back got verification call also long time back initially getting least response executive now longer picking call neither letting know status application extremely disappointed service\", meta = list(author = character(0), datetimestamp = list(sec = 20.8989698886871, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", \n language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(66L,
1L, 1L), polarity = c(0.0246182981958665, 0, 0), pos.words = list(
c("work", "support"), "-", "-"), neg.words = list("disappointed",
"-", "-"), text.var = c("list(list(content = \" otp service working used work month decided change everything im getting otp sms registered mobile number ive tried contacting customer support several times keep asking send sms despite done several times several days havent received otps ever really disappointed\", meta = list(author = character(0), datetimestamp = list(sec = 21.1935319900513, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), \n heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"),
structure(list(all = c("all", "all", "all"), wc = c(50L,
1L, 1L), polarity = c(-0.282842712474619, 0, 0), pos.words = list(
"-", "-", "-"), neg.words = list(c("pathetic", "lied"
), "-", "-"), text.var = c("list(list(content = \" pathetic service behavior icici bank facing past days icici executive lied luring upgrade debit card terms conditions just opposite booklet received told phone\", meta = list(author = character(0), datetimestamp = list(sec = 21.4258019924164, min = 51, hour = 11, mday = 6, mon = 6, year = 115, wday = 1, yday = 186, isdst = 0), description = character(0), heading = character(0), id = \"1\", language = \"en\", origin = character(0))))",
"list()", "list()")), row.names = c(NA, -3L), .Names = c("all",
"wc", "polarity", "pos.words", "neg.words", "text.var"), class = "data.frame"))
Can anyone suggest how to do this?

The following works for me -
library(qdap)
text <- "I am liking the work " # the text for which polarity score is needed
sentiment <- polarity(text) #make the call
sentiment$all$pos.words # returns the positive words detected by the algo
#[[1]]
#[1] "liking" "work"
sentiment$all$polarity # returns the sentence polarity score
#[1] 0.8944272

Related

Keep specific rows in a dataframe from a list

From a list from this process:
library(stackr)
df <- data.frame (qid = c(71663375, 71674701, 71724524))
lst1 <- split(df$qid, as.integer(gl(nrow(df), 100, nrow(df))))
out <- vector('list', length(lst1))
for(i in seq_along(lst1)) {
out[[i]] <- stack_questions(lst1[[i]])
}
How is it possible to create from out list a new dataframe with the columns tags, creation_date, question_id?
dput of the out list
dput(out)
list(structure(list(tags = c("r", "r", "sql,dataexplorer"), is_answered = c(TRUE,
TRUE, FALSE), view_count = c(33L, 19L, 27L), accepted_answer_id = c(71724636L,
71674900L, NA), answer_count = c(1L, 1L, 1L), score = c(0L, 0L,
0L), last_activity_date = structure(c(1648978330, 1648633121,
1648563500), tzone = "", class = c("POSIXct", "POSIXt")), creation_date = structure(c(1648977343,
1648632306, 1648562092), tzone = "", class = c("POSIXct", "POSIXt"
)), last_edit_date = structure(c(1648977839, 1648632778, 1648562436
), tzone = "", class = c("POSIXct", "POSIXt")), question_id = c(71724524L,
71674701L, 71663375L), content_license = c("CC BY-SA 4.0", "CC BY-SA 4.0",
"CC BY-SA 4.0"), link = c("https://stackoverflow.com/questions/71724524/melt-a-dataframe-using-a-list-column",
"https://stackoverflow.com/questions/71674701/create-a-new-column-using-detecting-the-domain-of-a-url-from-an-existing-column",
"https://stackoverflow.com/questions/71663375/paginate-pages-to-receive-results-from-tsql"
), title = c("Melt a dataframe using a list column", "Create a new column using detecting the domain of a url from an existing column",
"Paginate pages to receive results from tSQL"), owner_account_id = c(24733596L,
24733596L, 24733596L), owner_reputation = c(17L, 17L, 17L), owner_user_id = c(18621268L,
18621268L, 18621268L), owner_user_type = c("registered", "registered",
"registered"), owner_profile_image = c("https://lh3.googleusercontent.com/a/AATXAJwQRtIYRrvKJi1a4AfvTHoE4ht8f_WQ1Qv3jtbr=k-s256",
"https://lh3.googleusercontent.com/a/AATXAJwQRtIYRrvKJi1a4AfvTHoE4ht8f_WQ1Qv3jtbr=k-s256",
"https://lh3.googleusercontent.com/a/AATXAJwQRtIYRrvKJi1a4AfvTHoE4ht8f_WQ1Qv3jtbr=k-s256"
), owner_display_name = c("Domin D", "Domin D", "Domin D"), owner_link = c("https://stackoverflow.com/users/18621268/domin-d",
"https://stackoverflow.com/users/18621268/domin-d", "https://stackoverflow.com/users/18621268/domin-d"
)), row.names = c(NA, -3L), class = "data.frame", metadata = list(
has_more = FALSE, quota_max = 10000L, quota_remaining = 1323L)))
out[[1]][c('tags', 'creation_date', 'question_id')]
tags creation_date question_id
1 r 2022-04-03 05:15:43 71724524
2 r 2022-03-30 05:25:06 71674701
3 sql,dataexplorer 2022-03-29 09:54:52 71663375
Or if out is a list containing multiple data frames per element:
lapply(out, function(x) x[c('tags', 'creation_date', 'question_id')])

R Function help to obtain only the Unique values to then obtain basic metrics

I am trying to obtain the unique values for Number.Full in the below.
n_distinct() brings me the distinct count of the Number.Full. But it doesn't feed that into the min()/max()/mean() counts.
I have tried putting distinct and unique as part of the filter() and placing it after the filter() as a new variable.
But I can't seem to get it to feed in/work properly.
Any help or suggestions are greatly welcome.
Edit 1 for dput data:
nRequests_byYearMth <- df_Raw_Data %>%
filter(Specimen.Number.Left.2 == "AB") %>%
group_by(Rec_Period_Month_Yr) %>%
summarise(Number.Full = n_distinct(Number.Full), min(TaT_Coll_to_Auth), max(TaT_Coll_to_Auth), mean(TaT_Coll_to_Auth)) %>%
arrange(Rec_Period_Month_Yr)
structure(list(Receive.Date = c("2019-09-20", "2019-09-20", "2019-06-24",
"2019-05-23", "2019-09-05", "2019-07-30"), Number.Full = c("04023119",
"04023119", "02634719", "02190819", "00273419",
"03234219"), Ex.No = c("", "", "19P08645QQ5",
"", "", ""), Order.Comment = c("CT11", "CT11", "HR", "SHU",
"", "ICCZZ"), Coll.Date.Source = c("1931-02-04", "1931-02-04",
"1949-01-04", "2000-12-23", "2012-09-05", "2015-05-02"), Location.Code = c("FH7895SS",
"FHSA785", "VB97S", "RV0158", "FH29567", "N1"), Loc.Des = c("FWC",
"FU", "VHB", "RDO",
"F29", "NSBRU"), Tissue.Code = c("LEX",
"LEX", "RC", "SKL", "NPL", "RC"), T.Name = c("ELung",
"ELung", "Referred", "Skin", "Pleural",
"Referred Case"), Current.Status = c("S", "S", "S", "S",
"S", "S"), Date.Updated = c("2019-10-20", "2019-10-20",
"2019-06-24", "2019-05-28", "2019-09-13", "2019-08-07"), Reporting.1 = c("LYNN",
"LYNN", "ROBCM", "HUSA", "SPOE", "CPATH"), Reporting.2 = c("MAJJ",
"MAJJ", "", "", "ROBB", ""), Reporting.3 = c("",
"", "", "", "FERB", ""), Reporting.4 = c("", "",
"", "", "", ""), Reporting.5 = c("", "", "", "",
"", ""), Number.Left.2 = c("AB", "AB", "AB", "AB", "CN",
"AB"), Auth_Period_Month_Yr = c("2019-10", "2019-10", "2019-06",
"2019-05", "2019-09", "2019-08"), Rec_Period_Month_Yr = c("2019-09",
"2019-09", "2019-06", "2019-05", "2019-09", "2019-07"), TaT_Coll_to_Auth = structure(c(32400,
32400, 25738, 6730, 2564, 1558), class = "difftime", units = "days"),
M.Weighting = c(50L, 50L, 0L, 30L, NA, 0L)), row.names = c(NA,
6L), class = "data.frame")
From the nRequests_byYearMth formula I was expecting it to filter() to only show the AB entries, then group those by the Rec_Period_Moth_Yr, when it was summerised I had it count the distinct entries (n_distinct())and then the min()/max()/mean() would also show the data relating to the filtered results.
But when I've used Excel to look at the data extract I'm using the it doesn't seem to be filtering correctly.
I am thinking that I need to have the filter applied to the summerise() somehow.
Edit with outputs:
The resulting output is:
structure(list(Rec_Period_Month_Yr = c("2019-04", "2019-05",
"2019-06", "2019-07", "2019-08", "2019-09", "2019-10", "2019-11",
"2019-12", "2020-01", "2020-02", "2020-03"), Specimen.Number.Full = c(4881L,
4929L, 4902L, 5289L, 4815L, 5043L, 5697L, 5051L, 4552L, 5434L,
4917L, 4556L), `min(TaT_Coll_to_Auth)` = structure(c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), class = "difftime", units = "days"),
`max(TaT_Coll_to_Auth)` = structure(c(368, 6730, 25738, 1558,
222, 32400, 374, 150, 320, 97, 382, 60), class = "difftime", units = "days"),
`mean(TaT_Coll_to_Auth)` = structure(c(9.80235422940049,
10.768904109589, 14.8278848840458, 10.0686706074708, 10.2533425223983,
19.6828624240824, 11.8121527777778, 10.4033579583613, 10.4007004231723,
9.04840344652813, 8.94940393678958, 8.2197571578474), class = "difftime", units = "days")), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))
The expected output I want is below. But I can only create this if I only look at the AB entries and Summarise() doesn't seem to do that for the min()/max()/mean() and instead looks at the entire entires for the column.
I need it to look at only the entries relating to the AB filter() (for all the summarised items.)
(The last Max entry shows as 60 in R but if properly filtered would show as 50)
structure(list(Year.and.Mth = c("2019-4", "2019-5", "2019-6",
"2019-7", "2019-8", "2019-9", "2019-10", "2019-11", "2019-12",
"2020-1", "2020-2", "2020-3"), Number.Full = c(4881, 4929, 4902,
5289, 4815, 5043, 5697, 5051, 4552, 5434, 4917, 4556), Max = c(113,
6730, 25738, 1558, 156, 32400, 374, 109, 320, 97, 382, 50), Mean = c(7.97705388240115,
9.34286873605194, 13.514891880865, 8.39194554736245, 7.72294911734164,
15.2502478683323, 9.15850447604002, 8.85389031874876, 9.00021968365554,
7.76573426573427, 7.97335773845841, 7.350526778)), class = "data.frame", row.names = c(NA,
-12L))

How do I unnest list embeded in data.frame column?

I'm new to working with nested lists, so I'm hoping the solution provided can also provide some commenting on the how. I have a nested list that I scraped using jsonlite. How do I take how to take the list data for all teams, and bind together into a single data.frame? The list is setup below. I copied one element of the list (for 1 team)
Here is the code I used to get to the list that I've pasted below. I'm showing simply so that I can provide how the list is setup.
json <-
url %>%
fromJSON(simplifyDataFrame = T)
df <- json$body$rosters
# DF with each team showing up on it's own line, but nested lists in players
df_teams <- df$teams
# One teams worth of data
JSON_list <- df_teams[1, ]
My list content is below.
JSON_list <- structure(list(
projected_points = NA, long_abbr = "KE", lineup_status = "ok",
short_name = "Kramerica", total_roster_salary = 22L, division = "",
players = list(structure(list(
firstname = c(
"Jonathan", "Anthony"
), wildcards = structure(list(
contract = c("1", "1"),
salary = c("1", "21")
), class = "data.frame", row.names = c(
NA,
2L
)), on_waivers = c(
0L, 0L
), photo = c(
"http://sports.cbsimg.net/images/baseball/mlb/players/170x170/1657581.png",
"http://sports.cbsimg.net/images/baseball/mlb/players/170x170/1670417.png"
),
eligible_for_offense_and_defense = c(0L, 0L),
opponents = list(
structure(list(
game_id = c(
"", ""
), weather_error = c(
"Weather is not available for this game yet",
"Weather is not available for this game yet"
),
weather_icon_code = c(
"", ""
), home_team = c("true", "true"),
abbrev = c("OAK", "OAK"),
time = c(
1553803620L,
1553911620L
),
date = c(
"20190328",
"20190329"
), weather_icon_url = c(
"", ""
), venue_type = c("", ""), game_abbr = c("", ""),
weather = c("", ""), temperature = c(
NA, NA
)
), class = "data.frame", row.names = c(NA, 2L)),
structure(list(game_id = c("", "", ""), weather_error = c(
"Weather is not available for this game yet",
"Weather is not available for this game yet", "Weather is not available for this game yet"
), weather_icon_code = c("", "", ""), home_team = c(
"true",
"true", "true"
), abbrev = c("TEX", "TEX", "TEX"), time = c(
1553803500L,
1553990700L, 1554062700L
), date = c(
"20190328", "20190330",
"20190331"
), weather_icon_url = c("", "", ""), venue_type = c(
"",
"", ""
), game_abbr = c("", "", ""), weather = c(
"", "",
""
), temperature = c(NA, NA, NA)), class = "data.frame", row.names = c(
NA,
3L
))
), icons = structure(list(
headline = c(
"Angels' Jonathan Lucroy: Inks deal with Angels",
NA
),
hot = c(NA, 1L),
cold = c(1L, NA),
injury = c(
"Knee: Questionable for start of season",
NA
)
), class = "data.frame", row.names = c(NA, 21L)), elias_id = c(
"LUC758619", "RIZ253611"
), percentstarted = c(
"48%", "97%"
),
profile_link = c(
"<a class='playerLink' aria-label=' Jonathan Lucroy C LAA' href='http://baseball.cbssports.com/players/playerpage/1657581'>Jonathan Lucroy</a> <span class=\"playerPositionAndTeam\">C | LAA</span> ",
"<a class='playerLink' aria-label=' Anthony Rizzo 1B CHC' href='http://baseball.cbssports.com/players/playerpage/1670417'>Anthony Rizzo</a> <span class=\"playerPositionAndTeam\">1B | CHC</span>"
),
id = c(
"1657581", "1670417"
), pro_status = c(
"A", "A"
), on_waivers_until = c(NA, NA), jersey = c("20", "44"),
percentowned = c("61%", "99%"),
pro_team = c(
"LAA", "CHC"
), position = c(
"C", "1B"
), lastname = c(
"Lucroy", "Rizzo"
),
roster_pos = c("C", "1B"),
update_type = c("normal", "normal"),
age = c(
32L, 29L
), eligible = c(
"C,U", "1B,U"
), is_locked = c(
0L,
0L
), bats = c(
"R", "L"
), owned_by_team_id = c(
12L, 12L
), ytd_points = c(
0L, 0L
), roster_status = c(
"A", "A"
), is_keeper = c(
0L, 0L
), profile_url = c(
"http://baseball.cbssports.com/players/playerpage/1657581",
"http://baseball.cbssports.com/players/playerpage/1670417"
), fullname = c(
"Jonathan Lucroy", "Anthony Rizzo"
), throws = c(
"R",
"L"
), headline = c(
"Angels' Jonathan Lucroy: Inks deal with Angels",
NA
), `starting-pitcher-today` = c(
NA, "false"
), injury = c(NA, "Knee"), return = c(
"Questionable for start of season",
NA
)
), class = "data.frame", row.names = c(NA, 2L))),
name = "Kramerica Enterprises", logo = "http://baseball.cbssports.com/images/team-logo/main-36x36.jpg",
abbr = "KE", point = "20190328", id = "12", active_roster_salary = 22L,
warning = structure(list(description = NA_character_), row.names = 1L, class = "data.frame")
), row.names = 1L, class = "data.frame")
# Desired table sample (does not include all columns)
tibble::tribble(
~projected_points, ~long_abbr, ~lineup_status, ~short_name, ~total_roster_salary, ~division, ~name, ~logo, ~abbr, ~point5, ~active_roster_salary, ~id2, ~firstname, ~contract, ~salary,
NA, "KE", "ok", "Kramerica", 22, NA, "Biloxi Blackjacks", NA, "KE", 20190328, 22, 1657581, "Jonathan", 1, 1
)
The issue I'm running into is that the players column looks to be a nested df, and also has other nested df in it. Specifically: "wildcards", "opponents" and "icons". I am looking for a data frame that contains all of the columns. For the nested lists, I'd like their content to show up as columns for that particular player. I.E. Wildcards, create a column for "contract" and "salary". Also, how would I bind the list together if I wanted to specifically choose columns from JSON_list I.E. "long_abbr", "lineup_status", etc. from the and "firstname", both wildcard columns, "id", and some other from the JSON_list$players?
You can isolate the list elements using [[]] and the columns using [] if you have a nested structure. If the number if rows are equal, you can directly make your dataframe using cbind
Let's make a reproducible example
Create 3 data frames of similar dimensions
df1 <- data.frame(var1=c('a', 'b', 'c'), var2=c('d', 'e', 'f'), var3=1:3)
df2 <- data.frame(var4=c('g', 'h', 'i'), var5=c('j', 'k', 'l'), var6=4:6)
df3 <- data.frame(var7=c(6:8), var8=c('j', 'k', 'l'), var9=4:6)
Put the data frames in a nested list structure
list <- list(df1,df2)
nested.list <- list(list, df3)
Make a binded data frame made of var2, var6 and var7
binded.df <- cbind(nested.list[[1]][[1]][2],nested.list[[1]][[2]][3],nested.list[[2]][1])

How to access multi-dimensional list element using FOR loop in R

I want to access a variable 'bandSpecificMatadata' from a multi-dimensional list in R, and create a vector of 'reflectanceCoefficient' for my remote sensing project.
Firstly, I was able to reduce the dimension of the list and then used nodes <- get('EarthObservationResult', matadata.list$resultOf) to exact the list.
Then it comes a problem when I try to create something like (bandNumber1 corresponds to reflectance coefficient 2.21e-5) using FOR loop.
for(node in nodes[6:9]) {
bn = get("bandNumber", node)
if(bn %in% c('1','2','3','4')){
i = integer(bn)
coeffs = get("reflectanceCoefficient", node)
}
print(coeffs)
}
which prints out:
[1] "2.21386105481e-05"
[1] "2.31474175457e-05"
[1] "2.60208594123e-05"
[1] "3.83481925626e-05"
But I want a vector with 1, 2, 3, 4 with the corresponding numbers. It seems to me that the number overwrites the last one every time it prints.
Then I tried:
for(node in nodes[6:9]) {
n = 1:4
b[n] = get("bandNumber", node)
if(b[n] %in% c('1','2','3','4')){
i = integer(b[n])
coeffs[i] = get("reflectanceCoefficient", node)
}
print(coeffs)
}
But turns out
Error in integer(b[n]) : invalid 'length' argument
In addition: Warning message:
In if (b[n] %in% c("1", "2", "3", "4")) { :
the condition has length > 1 and only the first element will be used
How do I fix this?
I used XML::xmlParse() to parse the xml and matadata.list <- XML::xmlToList() to convert the data to list.
For reproducible example, see below:
dput(matadata.list)
structure(list(metaDataProperty = structure(list(EarthObservationMetaData = structure(list(
identifier = "20170127_213132_0e0e_3B_AnalyticMS", acquisitionType = "NOMINAL",
productType = "L3B", status = "ARCHIVED", downlinkedTo = structure(list(
DownlinkInformation = structure(list(acquisitionStation = structure(list(
text = "Planet Ground Station Network", .attrs = structure("urn:eop:PS:stationLocation", .Names = "codeSpace")), .Names = c("text",
".attrs")), acquisitionDate = "2017-01-27T21:31:32+00:00"), .Names = c("acquisitionStation",
"acquisitionDate"))), .Names = "DownlinkInformation"),
archivedIn = structure(list(ArchivingInformation = structure(list(
archivingCenter = structure(list(text = "Planet Archive Center",
.attrs = structure("urn:eop:PS:stationLocation", .Names = "codeSpace")), .Names = c("text",
".attrs")), archivingDate = "2017-01-27T21:31:32+00:00",
archivingIdentifier = structure(list(text = "385180",
.attrs = structure("urn:eop:PS:dmsCatalogueId", .Names = "codeSpace")), .Names = c("text",
".attrs"))), .Names = c("archivingCenter", "archivingDate",
"archivingIdentifier"))), .Names = "ArchivingInformation"),
processing = structure(list(ProcessingInformation = structure(list(
processorName = "CMO Processor", processorVersion = "4.1.4",
nativeProductFormat = "GeoTIFF"), .Names = c("processorName",
"processorVersion", "nativeProductFormat"))), .Names = "ProcessingInformation"),
license = structure(list(licenseType = "20160101 - Inc - Single User",
resourceLink = structure(c("PL EULA", "https://assets.planet.com/docs/20160101_Inc_SingleUser.txt"
), class = structure("XMLAttributes", package = "XML"), namespaces = structure(c("xlink",
"xlink"), .Names = c("http://www.w3.org/1999/xlink",
"http://www.w3.org/1999/xlink")), .Names = c("title",
"href"))), .Names = c("licenseType", "resourceLink")),
versionIsd = "1.0", pixelFormat = "16U"), .Names = c("identifier",
"acquisitionType", "productType", "status", "downlinkedTo", "archivedIn",
"processing", "license", "versionIsd", "pixelFormat"))), .Names = "EarthObservationMetaData"),
validTime = structure(list(TimePeriod = structure(list(beginPosition = "2017-01-27T21:31:32+00:00",
endPosition = "2017-01-27T21:31:32+00:00"), .Names = c("beginPosition",
"endPosition"))), .Names = "TimePeriod"), using = structure(list(
EarthObservationEquipment = structure(list(platform = structure(list(
Platform = structure(list(shortName = "PlanetScope",
serialIdentifier = "0e0e", orbitType = "LEO-SSO"), .Names = c("shortName",
"serialIdentifier", "orbitType"))), .Names = "Platform"),
instrument = structure(list(Instrument = structure(list(
shortName = "PS2"), .Names = "shortName")), .Names = "Instrument"),
sensor = structure(list(Sensor = structure(list(sensorType = "OPTICAL",
resolution = structure(list(text = "3.0000",
.attrs = structure("m", .Names = "uom")), .Names = c("text",
".attrs")), scanType = "FRAME"), .Names = c("sensorType",
"resolution", "scanType"))), .Names = "Sensor"),
acquisitionParameters = structure(list(Acquisition = structure(list(
orbitDirection = "DESCENDING", incidenceAngle = structure(list(
text = "8.072969e-02", .attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), illuminationAzimuthAngle = structure(list(
text = "7.610387e+01", .attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), illuminationElevationAngle = structure(list(
text = "4.649194e+01", .attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), azimuthAngle = structure(list(text = "1.242074e+01",
.attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), spaceCraftViewAngle = structure(list(
text = "5.692807e-02", .attrs = structure("deg", .Names = "uom")), .Names = c("text",
".attrs")), acquisitionDateTime = "2017-01-27T21:31:32+00:00"), .Names = c("orbitDirection",
"incidenceAngle", "illuminationAzimuthAngle", "illuminationElevationAngle",
"azimuthAngle", "spaceCraftViewAngle", "acquisitionDateTime"
))), .Names = "Acquisition")), .Names = c("platform",
"instrument", "sensor", "acquisitionParameters"))), .Names = "EarthObservationEquipment"),
target = structure(list(Footprint = structure(list(multiExtentOf = structure(list(
MultiSurface = structure(list(surfaceMembers = structure(list(
Polygon = structure(list(outerBoundaryIs = structure(list(
LinearRing = structure(list(coordinates = "175.446585079397,-37.7068873856657 175.446633607572,-37.7045627724835 175.46731776545,-37.6311749428137 175.468010520596,-37.6311839417076 175.75989021492,-37.6819836599337 175.759889856814,-37.6820051679817 175.739424097003,-37.757826933992 175.739359440859,-37.7578262423109 175.446585079397,-37.7068873856657"), .Names = "coordinates")), .Names = "LinearRing"),
.attrs = structure("EPSG:4326", .Names = "srsName")), .Names = c("outerBoundaryIs",
".attrs"))), .Names = "Polygon"), .attrs = structure("EPSG:4326", .Names = "srsName")), .Names = c("surfaceMembers",
".attrs"))), .Names = "MultiSurface"), centerOf = structure(list(
Point = structure(list(pos = "175.603162359 -37.6944367036",
.attrs = structure("EPSG:4326", .Names = "srsName")), .Names = c("pos",
".attrs"))), .Names = "Point"), geographicLocation = structure(list(
topLeft = structure(list(latitude = "-37.6311749428",
longitude = "175.446585079"), .Names = c("latitude",
"longitude")), topRight = structure(list(latitude = "-37.6311749428",
longitude = "175.759890215"), .Names = c("latitude",
"longitude")), bottomRight = structure(list(latitude = "-37.757826934",
longitude = "175.759890215"), .Names = c("latitude",
"longitude")), bottomLeft = structure(list(latitude = "-37.757826934",
longitude = "175.446585079"), .Names = c("latitude",
"longitude"))), .Names = c("topLeft", "topRight", "bottomRight",
"bottomLeft"))), .Names = c("multiExtentOf", "centerOf",
"geographicLocation"))), .Names = "Footprint"), resultOf = structure(list(
EarthObservationResult = structure(list(product = structure(list(
ProductInformation = structure(list(fileName = "20170127_213132_0e0e_3B_AnalyticMS.tif",
productFormat = "GeoTIFF", spatialReferenceSystem = structure(list(
epsgCode = "32760", geodeticDatum = "WGS_1984",
projection = "WGS 84 / UTM zone 60S", projectionZone = "160"), .Names = c("epsgCode",
"geodeticDatum", "projection", "projectionZone"
)), resamplingKernel = "CC", numRows = "4565",
numColumns = "9194", numBands = "4", rowGsd = "3.0",
columnGsd = "3.0", radiometricCorrectionApplied = "true",
geoCorrectionLevel = "Precision Geocorrection",
elevationCorrectionApplied = "FineDEM", atmosphericCorrectionApplied = "false"), .Names = c("fileName",
"productFormat", "spatialReferenceSystem", "resamplingKernel",
"numRows", "numColumns", "numBands", "rowGsd", "columnGsd",
"radiometricCorrectionApplied", "geoCorrectionLevel",
"elevationCorrectionApplied", "atmosphericCorrectionApplied"
))), .Names = "ProductInformation"), mask = structure(list(
MaskInformation = structure(list(type = "UNUSABLE DATA",
format = "RASTER", referenceSystemIdentifier = structure(list(
text = "32760", .attrs = structure("EPSG", .Names = "codeSpace")), .Names = c("text",
".attrs")), fileName = "20170127_213132_0e0e_3B_AnalyticMS_DN_udm.tif"), .Names = c("type",
"format", "referenceSystemIdentifier", "fileName"
))), .Names = "MaskInformation"), cloudCoverPercentage = structure(list(
text = "0.01", .attrs = structure("percentage", .Names = "uom")), .Names = c("text",
".attrs")), cloudCoverPercentageQuotationMode = "AUTOMATIC",
unusableDataPercentage = structure(list(text = "0.0",
.attrs = structure("percentage", .Names = "uom")), .Names = c("text",
".attrs")), bandSpecificMetadata = structure(list(
bandNumber = "1", comment = NULL, radiometricScaleFactor = "0.01",
comment = NULL, reflectanceCoefficient = "2.21386105481e-05"), .Names = c("bandNumber",
"comment", "radiometricScaleFactor", "comment", "reflectanceCoefficient"
)), bandSpecificMetadata = structure(list(bandNumber = "2",
comment = NULL, radiometricScaleFactor = "0.01",
comment = NULL, reflectanceCoefficient = "2.31474175457e-05"), .Names = c("bandNumber",
"comment", "radiometricScaleFactor", "comment", "reflectanceCoefficient"
)), bandSpecificMetadata = structure(list(bandNumber = "3",
comment = NULL, radiometricScaleFactor = "0.01",
comment = NULL, reflectanceCoefficient = "2.60208594123e-05"), .Names = c("bandNumber",
"comment", "radiometricScaleFactor", "comment", "reflectanceCoefficient"
)), bandSpecificMetadata = structure(list(bandNumber = "4",
comment = NULL, radiometricScaleFactor = "0.01",
comment = NULL, reflectanceCoefficient = "3.83481925626e-05"), .Names = c("bandNumber",
"comment", "radiometricScaleFactor", "comment", "reflectanceCoefficient"
))), .Names = c("product", "mask", "cloudCoverPercentage",
"cloudCoverPercentageQuotationMode", "unusableDataPercentage",
"bandSpecificMetadata", "bandSpecificMetadata", "bandSpecificMetadata",
"bandSpecificMetadata"))), .Names = "EarthObservationResult"),
.attrs = structure(c("http://schemas.planet.com/ps/v1/planet_product_metadata_geocorrected_level http://schemas.planet.com/ps/v1/planet_product_metadata_geocorrected_level.xsd",
"1.2.1", "1.0"), class = structure("XMLAttributes", package = "XML"), namespaces = structure(c("xsi",
"", ""), .Names = c("http://www.w3.org/2001/XMLSchema-instance",
"", "")), .Names = c("schemaLocation", "version", "planet_standard_product_version"
))), .Names = c("metaDataProperty", "validTime", "using",
"target", "resultOf", ".attrs"))
As you did not provide any reproducible data, the following attempt may not work:
# Initialise vectors:
b <- vector(mode = "character", length = 4)
coeffs <- vector(mode = "character", length = 4)
# Get coefficients
for(i in 6:9) {
b[i] = get("bandNumber", nodes[[i]])
coeffs[i] <- ifelse(b[i] %in% 6:9),
get("reflectanceCoefficient", nodes[[i]]), # Yes cond val
NA) # No cond val
}
coeffs
(edited to answer the updated question)
Have a look at these answers to work with original xml data: How to parse XML to R data frame
You already parsed the xml file and now you have lists. I think package purrr (https://purrr.tidyverse.org/) helps a lot in this case.
I assume that we know the path to the EarthObservationResult. Note how we extract reflectanceCoefficient from all sub-nodes and discard the NULL elements with compact.
library(tidyverse)
nodes <- matadata.list$resultOf$EarthObservationResult
coefff <- nodes %>%
purrr::map("reflectanceCoefficient") %>%
purrr::compact() %>%
purrr::map_dbl(~ as.numeric(.x)) %>%
purrr::set_names(nm = NULL)
print(coeffs)
#> [1] 2.213861e-05 2.314742e-05 2.602086e-05 3.834819e-05
Created on 2018-08-28 by the reprex package (v0.2.0).

Sankey plot with the riverplot package

Enchanté.
EDIT: Solution
As pointed out by MartineJ and emilliman5, nodes should be uniquely labelled (below).
library("riverplot")
nodes<-structure(list(ID = c("2011+", "2011-", "2016+", "2016-"), x = c(20,
20, 30, 30), y = c(50, 40, 50, 40)), class = "data.frame", row.names = c(NA,
-4L))
edges<-structure(list(N1 = c("2011+", "2011-", "2011+", "2011-"), N2 =
c("2016+", "2016-", "2016-", "2016+"), Value = c(461, 7, 0, 46)), class =
"data.frame", row.names = c(NA, -4L))
river <- makeRiver(nodes,edges)
riverplot(river)
I've been toying to plot a Sankey diagram/riverplot (using the riverplot package) of how cancer registrations evolve over time, though this code has bought me little success so far. Could anyone possibly direct me on the faults of this code?
Warning message: In checkedges(x2$edges, names(x2)) : duplicated edge information, removing 1 edges
Here is the suspect code:
library(“riverplot”)
edges<-structure(list(N1 = c("+", "-", "+", "-"), N2 = c("+", "-", "-", "+"), Value = c(664L, 50L, 0L, 46L)), .Names = c("N1", "N2", "Value"), class = "data.frame", row.names = c(NA, -4L))
nodes = data.frame(ID = unique(c(edges$N1, edges$N2)), stringsAsFactors = FALSE)
nodes$x = c(1,2)
rownames(nodes) = nodes$ID
rp <- list(nodes=nodes, edges=edges)
class(rp) <- c(class(rp), "riverplot")
plot(rp)
And the data, which is included in code:
N1 N2 Value
+ + 664
- - 50
+ - 0
- + 46
Eternally grateful.
It looks like you're using the same value multiple times in N1 (and in N2). Try to make them all different (per column) and try again, f.i.:
N1: plus1 minus1 plus2 minus2
If you want to show only + and -: in makeRiver, there is an option **node_labels **
Your nodes need to be named uniquely and then use the nodes$labels to change it back:
library(riverplot)
edges<-structure(list(N1 = c("+", "-", "+", "-"), N2 = c("+", "-", "-", "+"), Value = c(664L, 50L, 0L, 46L)), .Names = c("N1", "N2", "Value"), class = "data.frame", row.names = c(NA, -4L))
edges$N1 <- paste0(edges$N1, "a")
edges$N2 <- paste0(edges$N2, "b")
nodes = data.frame(ID = unique(c(edges$N1, edges$N2)), stringsAsFactors = FALSE)
nodes$x = c(1,1,2,2)
nodes$labels <- as.character(substr(nodes$ID, 1, 1))
rownames(nodes) = nodes$ID
rp <- list(nodes=nodes, edges=edges)
class(rp) <- c(class(rp), "riverplot")
plot(rp)

Resources