Extract values from JSON complex object - r

I've a json file as shown below. I would like to extract the data into a R dataframe as follows. See the json object, that has a list of values for various dates. I would like to extract those values into the dataframe. Can you kindly help, on how I should build this?
Output Dataframe
Jan-18 a 5
Jan-18 b 0
Jan-18 c 9
Jan-18 d 0
Jan-18 e 5
Jan-19 a 4
Jan-19 b 0
Jan-19 c 26
Jan-19 d 0
Jan-19 e 35
value_headers = ['a', 'b', 'c', 'd', 'e']
Input JSON content:
{
"default": {
"timelineData": [
{
"time": "1610928000",
"formattedTime": "Jan 18, 2021",
"formattedAxisTime": "Jan 18",
"value": [
5,
0,
9,
0,
5
],
"hasData": [
true,
false,
true,
false,
true
],
"formattedValue": [
"5",
"0",
"9",
"0",
"5"
]
},
{
"time": "1611014400",
"formattedTime": "Jan 19, 2021",
"formattedAxisTime": "Jan 19",
"value": [
4,
0,
26,
0,
35
],
"hasData": [
true,
false,
true,
false,
true
],
"formattedValue": [
"4",
"0",
"26",
"0",
"35"
]
}
],
"averages": [
5,
1,
34,
25,
25
]
}
}

Using tidyverse could be something like:
library(jsonlite)
library(tidyverse)
json_dt <- fromJSON('{
"default": {
"timelineData": [
{
"time": "1610928000",
"formattedTime": "Jan 18, 2021",
"formattedAxisTime": "Jan 18",
"value": [
5,
0,
9,
0,
5
],
"hasData": [
true,
false,
true,
false,
true
],
"formattedValue": [
"5",
"0",
"9",
"0",
"5"
]
},
{
"time": "1611014400",
"formattedTime": "Jan 19, 2021",
"formattedAxisTime": "Jan 19",
"value": [
4,
0,
26,
0,
35
],
"hasData": [
true,
false,
true,
false,
true
],
"formattedValue": [
"4",
"0",
"26",
"0",
"35"
]
}
],
"averages": [
5,
1,
34,
25,
25
]
}
}')
tibble(
time = json_dt$default$timelineData$formattedTime,
value = json_dt$default$timelineData$formattedValue
) %>%
unnest(value) %>%
group_by(time) %>%
mutate(
letter = letters[1:n()],
value = as.integer(value),
time = str_replace(time, ",.*", ""),
time = str_replace(time, " ", "-")
)

Related

Putting string values in a column of lists

I have data that looks as follows (example data at the bottom):
# A tibble: 40 × 6
rn strata lower upper direction value
<chr> <list> <chr> <chr> <chr> <chr>
1 A <dbl [6]> 0 25 East 0 (replaced)
2 A <dbl [6]> 25 100 East 3 (replaced)
3 A <dbl [6]> 100 500 East 3
4 A <dbl [6]> 500 1000 East 4
5 A <dbl [6]> 1000 1000000 East 5
6 A <dbl [6]> 0 25 North 0 (replaced)
7 A <dbl [6]> 25 100 North 0 (replaced)
8 A <dbl [6]> 100 500 North 1
9 A <dbl [6]> 500 1000 North 28 (replaced)
10 A <dbl [6]> 1000 1000000 North 2
# … with 30 more rows
I would like to concatenate all value entries by rn, direction, upper. This can almost be done with the following code:
dat_in_new <- dat %>%
# One line for each rn-group
group_by(rn, upper, direction) %>%
# Calculate the sum, not taking into account replaced values
summarise(freq = sum(as.numeric(value), na.rm=TRUE), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$value))) %>%
group_by(rn) %>%
summarise(freq = list(freq), .groups = "drop")
# A tibble: 2 × 2
rn freq
<chr> <list>
1 A c(0, 0, 0, 0, 0, 4, 0, 3, 0, 0, 5, 2, 9, 0, 0, 0, 0, 0, 0, 0, 3, 1, 1, 0, 0)
2 B c(0, 0, 1, 0, 0, 13, 0, 2, 1, 0, 10, 3, 5, 0, 0, 1, 0, 1, 0, 0, 4, 0, 0, 1, 0)
This solution now has the correct sum, because the replaced values should not be added to the sum. However they should be added to the list. I have been trying to separate the two, but I cannot figure it out.
EDIT:
I thought it would maybe be possible to create another value column, say value_string, force value to numeric and keep value_string as strings, summarise both of them, get the sum from value and the values from value_string. But I can't figure out how to write the syntax.
Desired output:
# A tibble: 2 × 2
rn freq
<chr> <list>
1 A c("0 (replaced)", "0 (replaced)", ... )
2 B c("0 (replaced)", "0 (replaced)", ... )
Related questions:
Make a list out of frequencies, concatenating categories to that list
Using a column, with lists of values, to specify from which columns to create another list of values
DATA
library(dplyr)
library(tidyr)
dat <- structure(list(rn = c("A", "A", "A", "A",
"A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "B",
"B", "B", "B", "B",
"B", "B", "B", "B",
"B", "B", "B", "B",
"B", "B", "B", "B",
"B", "B", "B"), strata = list(
c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500, 1000,
1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500,
1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100,
500, 1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0,
25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06
), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500, 1000,
1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500,
1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100,
500, 1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0,
25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06
), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500, 1000,
1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500,
1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100,
500, 1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0,
25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06
), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500, 1000,
1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500,
1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100,
500, 1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0,
25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06
), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500, 1000,
1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500,
1000, 1e+06)), lower = c("0", "25", "100", "500", "1000",
"0", "25", "100", "500", "1000", "0", "25", "100", "500", "1000",
"0", "25", "100", "500", "1000", "0", "25", "100", "500", "1000",
"0", "25", "100", "500", "1000", "0", "25", "100", "500", "1000",
"0", "25", "100", "500", "1000"), upper = c("25", "100", "500",
"1000", "1000000", "25", "100", "500", "1000", "1000000", "25",
"100", "500", "1000", "1000000", "25", "100", "500", "1000",
"1000000", "25", "100", "500", "1000", "1000000", "25", "100",
"500", "1000", "1000000", "25", "100", "500", "1000", "1000000",
"25", "100", "500", "1000", "1000000"), direction = c("East",
"East", "East", "East", "East", "North", "North", "North", "North",
"North", "South", "South", "South", "South", "South", "West",
"West", "West", "West", "West", "East", "East", "East", "East",
"East", "North", "North", "North", "North", "North", "South",
"South", "South", "South", "South", "West", "West", "West", "West",
"West"), value = c("0 (replaced)", "3 (replaced)", "3", "4", "5",
"0 (replaced)", "0 (replaced)", "1", "28 (replaced)", "2", "0 (replaced)",
"2 (replaced)", "1", "3", "9", "0 (replaced)", "1 (replaced)", "9 (replaced)",
"8 (replaced)", "21 (replaced)", "1", "61 (replaced)", "4", "13", "10",
"2 (replaced)", "12 (replaced)", "48 (replaced)", "32 (replaced)", "3",
"1", "1", "76 (replaced)", "2", "5", "0 (replaced)", "4 (replaced)",
"1", "1", "15 (replaced)")), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame"))
Perhaps this helps
library(dplyr)
library(stringr)
out <- dat %>%
mutate(value_str = replace(value, str_detect(value, "^[0-9]+$"), NA_character_),
value = as.numeric(value)) %>%
group_by(rn, lower, upper) %>%
transmute(value = sum(value, na.rm = TRUE), value_str) %>%
group_by(rn, lower) %>%
group_modify(~add_row(., upper = "Sum", value = sum(.$value))) %>%
ungroup %>%
mutate(value = coalesce(value_str, as.character(value))) %>%
distinct(rn, lower, upper, value) %>%
group_by(rn) %>%
summarise(value = list(value))
-output
> out$value
[[1]]
[1] "0 (replaced)" "0" "5" "9 (replaced)" "20" "16" "21 (replaced)" "64" "3 (replaced)"
[10] "2 (replaced)" "1 (replaced)" "7" "28 (replaced)" "8 (replaced)" "28"
[[2]]
[1] "2" "2 (replaced)" "0 (replaced)" "8" "5" "48 (replaced)" "76 (replaced)" "20" "18"
[10] "15 (replaced)" "72" "61 (replaced)" "12 (replaced)" "1" "4 (replaced)" "4" "16" "32 (replaced)"
[19] "64"
I am not sure, but maybe you are looking for this:
What we do here is simple paste and collapse all!! the values after unnesting:
library(dplyr)
library(tidyr)
dat %>%
group_by(rn, upper,direction) %>%
summarise(freq = sum(as.numeric(value), na.rm=TRUE), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$value))) %>%
group_by(rn) %>%
summarise(freq = list(freq), .groups = "drop") %>%
unnest() %>%
group_by(rn) %>%
mutate(freq = paste0(freq, " (replaced)", collapse = ", ")) %>%
slice(1)
rn freq
<chr> <chr>
1 A 0 (replaced), 0 (replaced), 0 (replaced), 0 (replaced), 0 (re~
2 B 0 (replaced), 0 (replaced), 1 (replaced), 0 (replaced), 0 (re~
I eventually figured it out, although it is far from the cleanest approach:
# Only sum values that are not replaced
dat$upper <- as.character(dat$upper)
dat <- dat %>%
group_by(rn, direction ) %>%
summarise(value = as.character(sum(as.numeric(value), na.rm=TRUE)), .groups = 'drop_last', upper="1000001", strata=strata) %>% # get sum of sizes
bind_rows(dat, .)
# Remove the duplicate rows
dat <- unique( dat )
# Convert upper back to numeric for sorting
dat$upper <- as.numeric(dat$upper)
dat <- dat %>%
arrange(rn, direction, upper)
# Create list
dat <- dat %>%
group_by(rn, strata) %>%
summarise(freq = list(value), .groups = 'drop')

regex for replacement of non-numeric character INSIDE parenthesis within a string in dyplr workflow

My question is somehow related to an already answered question Need to extract individual characters from a string column using R.
I try to solve this question with my knowledge and need to know how to remove non numeric characters in parenthesis within a string: `
This is the dataframe with column x:
team linescore ondate x
1 NYM 010000000 2020-08-01 0, 1, 0, 0, 0, 0, 0, 0, 0
2 NYM (10)1140006x) 2020-08-02 (, 1, 0, ), 1, 1, 4, 0, 0, 0, 6, x, )
3 BOS 002200010 2020-08-13 0, 0, 2, 2, 0, 0, 0, 1, 0
4 NYM 00000(11)01x 2020-08-15 0, 0, 0, 0, 0, (, 1, 1, ), 0, 1, x
5 BOS 311200 2020-08-20 3, 1, 1, 2, 0, 0
structure(list(team = c("NYM", "NYM", "BOS", "NYM", "BOS"), linescore = c("010000000",
"(10)1140006x)", "002200010", "00000(11)01x", "311200"), ondate = structure(c(18475,
18476, 18487, 18489, 18494), class = "Date"), x = list(c("0",
"1", "0", "0", "0", "0", "0", "0", "0"), c("(", "1", "0", ")",
"1", "1", "4", "0", "0", "0", "6", "x", ")"), c("0", "0", "2",
"2", "0", "0", "0", "1", "0"), c("0", "0", "0", "0", "0", "(",
"1", "1", ")", "0", "1", "x"), c("3", "1", "1", "2", "0", "0"
))), class = "data.frame", row.names = c(NA, -5L))
Desired Output:
team linescore ondate x
1 NYM 010000000 2020-08-01 0, 1, 0, 0, 0, 0, 0, 0, 0
2 NYM (10)1140006x) 2020-08-02 10, 1, 1, 4, 0, 0, 0, 6, x, )
3 BOS 002200010 2020-08-13 0, 0, 2, 2, 0, 0, 0, 1, 0
4 NYM 00000(11)01x 2020-08-15 0, 0, 0, 0, 0, 11, 0, 1, x
5 BOS 311200 2020-08-20 3, 1, 1, 2, 0, 0
How can I change (, 1, 0, ) to 10 and (, 1, 1, ) to 11 and leave the rest as is.
Some help I already got so far:
regex for replacement of specific character outside parenthesis only thanks AnilGoyal
gsub("\\D+", "", str1) thanks to akrun
gsub("[(,) ]", "", "(, 1, 0, )") thanks to Anoushirvan
Thanks!
We could do this in base R. An option is to insert a delimiter between the characters that are outside the (...) with *SKIP/*FAIL, then remove the paired () while keeping the characters by capturing as a group, finally return the list by splitting at the , with strsplit
df1$x <- strsplit(gsub("\\((\\d+)\\)", "\\1,",
gsub("\\([^)]+\\)(*SKIP)(*FAIL)|(.)", "\\1,",
df1$linescore, perl = TRUE)),",")
-ouptut
df1$x
[[1]]
[1] "0" "1" "0" "0" "0" "0" "0" "0" "0"
[[2]]
[1] "10" "1" "1" "4" "0" "0" "0" "6" "x" ")"
[[3]]
[1] "0" "0" "2" "2" "0" "0" "0" "1" "0"
[[4]]
[1] "0" "0" "0" "0" "0" "11" "0" "1" "x"
[[5]]
[1] "3" "1" "1" "2" "0" "0"
Here is another way that we could get to your desired output, I just figured out which is not relying on regex. However, the use of regex makes your solution much more elegant and compact:
library(purrr)
map(df %>% select(linescore), ~ strsplit(.x, "\\(|\\)")) %>%
flatten() %>%
map_dfr(~ map(.x, ~ if(nchar(.x) > 2) strsplit(.x, "")[[1]] else .x) %>%
reduce(~ c(.x, .y)) %>%
keep(~ nchar(.x) != 0) %>% t() %>%
as_tibble() %>%
set_names(~ paste0("inng", 1:length(.x))))
# A tibble: 5 x 9
inng1 inng2 inng3 inng4 inng5 inng6 inng7 inng8 inng9
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 0 1 0 0 0 0 0 0 0
2 10 1 1 4 0 0 0 6 x
3 0 0 2 2 0 0 0 1 0
4 0 0 0 0 0 11 0 1 x
5 3 1 1 2 0 0 NA NA NA

Import JSON dataset to R

I want to make a data frame from the following JSON sample:
{"gender": "M", "age": 68, "id": "e2127556f4f64592b11af22de27a7932", "became_member_on": "20180426", "income": 70000}
{"gender": null, "age": 118, "id": "8ec6ce2a7e7949b1bf142def7d0e0586", "became_member_on": "20170925", "income": null}
{"gender": null, "age": 118, "id": "68617ca6246f4fbc85e91a2a49552598", "became_member_on": "20171002", "income": null}
{"gender": "M", "age": 65, "id": "389bc3fa690240e798340f5a15918d5c", "became_member_on": "20180209", "income": 53000}
{"gender": null, "age": 118, "id": "8974fc5686fe429db53ddde067b88302", "became_member_on": "20161122", "income": null}
{"gender": null, "age": 118, "id": "c4863c7985cf408faee930f111475da3", "became_member_on": "20170824", "income": null}
{"gender": null, "age": 118, "id": "148adfcaa27d485b82f323aaaad036bd", "became_member_on": "20150919", "income": null}
We can use stream_in
out <- jsonlite::stream_in(textConnection(str1))
str(out)
#'data.frame': 7 obs. of 5 variables:
# $ gender : chr "M" NA NA "M" ...
# $ age : int 68 118 118 65 118 118 118
# $ id : chr "e2127556f4f64592b11af22de27a7932" "8ec6ce2a7e7949b1bf142def7d0e0586" "68617ca6246f4fbc85e91a2a49552598" "389bc3fa690240e798340f5a15918d5c" ...
# $ became_member_on: chr "20180426" "20170925" "20171002" "20180209" ...
# $ income : int 70000 NA NA 53000 NA NA NA
If we are reading from a file
out <- jsonlite::stream_in(file('yourfile.json'))
Or with ndjson::stream_in
out <- ndjson::stream_in('yourfile.json', 'tbl')
data
str1 <- '{"gender": "M", "age": 68, "id": "e2127556f4f64592b11af22de27a7932", "became_member_on": "20180426", "income": 70000}
{"gender": null, "age": 118, "id": "8ec6ce2a7e7949b1bf142def7d0e0586", "became_member_on": "20170925", "income": null}
{"gender": null, "age": 118, "id": "68617ca6246f4fbc85e91a2a49552598", "became_member_on": "20171002", "income": null}
{"gender": "M", "age": 65, "id": "389bc3fa690240e798340f5a15918d5c", "became_member_on": "20180209", "income": 53000}
{"gender": null, "age": 118, "id": "8974fc5686fe429db53ddde067b88302", "became_member_on": "20161122", "income": null}
{"gender": null, "age": 118, "id": "c4863c7985cf408faee930f111475da3", "became_member_on": "20170824", "income": null}
{"gender": null, "age": 118, "id": "148adfcaa27d485b82f323aaaad036bd", "became_member_on": "20150919", "income": null}'

Convert R dataframe into tough JSON list of lists for d3.hierarchy model

Edit: I have cleaned up a bit the question posting, and added a bounty. I will be afk for a few days, but getting this resolved would be a huge help
I would like to create using d3 a d3.hierarchy of a tree model, using basketball data. I essentially want to create a bracket structured as such:
...where the graph / model is a tree where each node has exactly two children (except for all of the end / leaf nodes, of course). This is a textbook example of when you'd want to use the d3.tree() and d3.hierarchy() functionalities, but it requires a JSON in a fairly specific format for the d3.hierarchy command. In particular, for a bracket of 8 basketball teams in a tournament that goes 8 - 4 - 2 - 1, the JSON data needs to be formatted like this:
const playoffData = {
"name": "Rockets",
"round": 4,
"id": 15,
"children": [
{
"name": "Rockets",
"round": 3,
"id": 14,
"children": [
{
"name": "Rockets",
"round": 2,
"id": 9,
"children": [
{
"name": "Rockets",
"round": 1,
"id": 1
},
{
"name": "Timberwolves",
"round": 1,
"id": 8
}
]
},
{
"name": "Jazz",
"round": 2,
"id": 12,
"children": [
{
"name": "Jazz",
"round": 1,
"id": 4
},
{
"name": "Thunder",
"round": 1,
"id": 5
}
]
}
]
},
{
"name": "Warriors",
"round": 3,
"id": 13,
"children": [
{
"name": "Warriors",
"round": 2,
"id": 10,
"children": [
{
"name": "Warriors",
"round": 1,
"id": 2
},
{
"name": "Spurs",
"round": 1,
"id": 7
}
]
},
{
"name": "Pelicans",
"round": 2,
"id": 11,
"children": [
{
"name": "Pelicans",
"round": 1,
"id": 3
},
{
"name": "Trail Blazers",
"round": 1,
"id": 6
}
]
}
]
}
]
};
Note the nested nature of the JSONs. The root node corresponds with the winner of the bracket, and leaf nodes correspond to teams in the first round of the bracket.
I have the following R dataframe of basketball data for the bracket:
> dput(mydata)
structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15), teamname = c("Rockets", "Warriors", "Trail Blazers",
"Jazz", "Thunder", "Pelicans", "Spurs", "Timberwolves", "Rockets",
"Warriors", "Pelicans", "Jazz", "Rockets", "Warriors", "Rockets"
), conference = c("West", "West", "West", "West", "West", "West",
"West", "West", "West", "West", "West", "West", "West", "West",
"West"), seeding = c(1, 2, 3, 4, 5, 6, 7, 8, NA, NA, NA, NA,
NA, NA, NA), round = c(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3,
3, 4), child1 = c(NA, NA, NA, NA, NA, NA, NA, NA, 1, 2, 3, 4,
9, 11, 13), child2 = c(NA, NA, NA, NA, NA, NA, NA, NA, 8, 7,
6, 5, 12, 10, 14), wins = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), losses = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), completed = c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
), winprobs = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA)), .Names = c("id", "teamname", "conference", "seeding",
"round", "child1", "child2", "wins", "losses", "completed", "winprobs"
), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 17L, 18L, 19L,
20L, 25L, 26L, 29L), class = "data.frame")
> mydata
> playoff.data
id teamname conference seeding round child1 child2 wins losses completed winprobs
1 1 Rockets West 1 1 NA NA 0 0 FALSE NA
2 2 Warriors West 2 1 NA NA 0 0 FALSE NA
3 3 Trail Blazers West 3 1 NA NA 0 0 FALSE NA
4 4 Jazz West 4 1 NA NA 0 0 FALSE NA
5 5 Thunder West 5 1 NA NA 0 0 FALSE NA
6 6 Pelicans West 6 1 NA NA 0 0 FALSE NA
7 7 Spurs West 7 1 NA NA 0 0 FALSE NA
8 8 Timberwolves West 8 1 NA NA 0 0 FALSE NA
17 9 Rockets West NA 2 1 8 0 0 FALSE NA
18 10 Warriors West NA 2 2 7 0 0 FALSE NA
19 11 Pelicans West NA 2 3 6 0 0 FALSE NA
20 12 Jazz West NA 2 4 5 0 0 FALSE NA
25 13 Rockets West NA 3 9 12 0 0 FALSE NA
26 14 Warriors West NA 3 11 10 0 0 FALSE NA
29 15 Rockets West NA 4 13 14 0 0 FALSE NA
If you can tell, My R Dataframe has a row for what will be each node in my d3 graph. Notice the tree structure in particular, and the child1 and child2 helper columns for identifying children - for the Final Round (row 15), its child nodes are the two nodes in the previous round (13 and 14). For row 13 (the semi finals), its children nodes are 9 and 12, etc. The first 8 rows are the first round, and therefore these are leaf nodes and have no children.
Its a bit long, but I wanted to include the whole JSON and R dataframe to keep things clear. I would also like other dataframe columns (wins, losses, win probs) included in the JSON structure, however for a bit of brevity, I did not show these in the JSON above.
A last note: while I work mainly in R, this is a d3 graph I am making, and as such there is quite a bit of javascript coding that I must do for this. My opinion is that R is better for this type of data manip, however since this is a nested JSON object we're dealing with, maybe JS is better. If there's an eas(ier) solution that involves using javasript to map a 2D JSON version of the R dataframe into the desired nested JSON, that would probably be sufficient as well.
Any help with this is appreciated! I promise to select a top answer once I return to award the bounty.
Here is a tidyverse solution.
We reformat your data and split the data.frame in 4 data.frames.
Then we join those, nesting the relevant columns at each step.
Finally we use toJSON to finish the job :
my.split <- my.data %>%
gather(temp,children,child1,child2) %>%
select(-temp) %>%
select(name= teamname,round,id,children) %>% # change here to keep more columns
distinct %>%
split(.$round)
my.split[[1]] %>%
select(-children) %>%
right_join(my.split[[2]],by=c(id="children"),suffix=c("",".y")) %>%
nest(1:3) %>% # change here to keep more columns
setNames(names(my.split[[1]])) %>%
right_join(my.split[[3]],by=c(id="children"),suffix=c("",".y")) %>%
nest(1:4) %>% # change here to keep more columns
setNames(names(my.split[[1]])) %>%
right_join(my.split[[4]],by=c(id="children"),suffix=c("",".y")) %>%
nest(1:4) %>% # change here to keep more columns
setNames(names(my.split[[1]])) %>%
jsonlite::toJSON(pretty=TRUE)
output:
[
{
"name": "Rockets",
"round": 4,
"id": 15,
"children": [
{
"name": "Rockets",
"round": 3,
"id": 13,
"children": [
{
"name": "Rockets",
"round": 2,
"id": 9,
"children": [
{
"name": "Rockets",
"round": 1,
"id": 1
},
{
"name": "Timberwolves",
"round": 1,
"id": 8
}
]
},
{
"name": "Jazz",
"round": 2,
"id": 12,
"children": [
{
"name": "Jazz",
"round": 1,
"id": 4
},
{
"name": "Thunder",
"round": 1,
"id": 5
}
]
}
]
},
{
"name": "Warriors",
"round": 3,
"id": 14,
"children": [
{
"name": "Pelicans",
"round": 2,
"id": 11,
"children": [
{
"name": "Trail Blazers",
"round": 1,
"id": 3
},
{
"name": "Pelicans",
"round": 1,
"id": 6
}
]
},
{
"name": "Warriors",
"round": 2,
"id": 10,
"children": [
{
"name": "Warriors",
"round": 1,
"id": 2
},
{
"name": "Spurs",
"round": 1,
"id": 7
}
]
}
]
}
]
}
]
You can try this recursive function together with jsonlite::toJSON():
get_node <- function(df, id) {
node <- as.list(df[df$id == id, c("teamname", "round", "id")])
names(node) = c("name", "round", "id")
id1 <- df[df$id == id,]$child1
id2 <- df[df$id == id,]$child2
if (!is.na(id1) && !is.na(id2)) {
child1 <- get_node(df, id1)
child2 <- get_node(df, id2)
if (child1$name == node$name)
node$children <- list(child1, child2)
else if (child2$name == node$name)
node$children <- list(child2, child1)
else
stop("Inout data is inconsistent!")
}
node
}
jsonlite::toJSON(get_node(playoffs, 15), pretty = TRUE, auto_unbox = TRUE)
With your data I get the following JSON:
{
"name": "Rockets",
"round": 4,
"id": 15,
"children": [
{
"name": "Rockets",
"round": 3,
"id": 13,
"children": [
{
"name": "Rockets",
"round": 2,
"id": 9,
"children": [
{
"name": "Rockets",
"round": 1,
"id": 1
},
{
"name": "Timberwolves",
"round": 1,
"id": 8
}
]
},
{
"name": "Jazz",
"round": 2,
"id": 12,
"children": [
{
"name": "Jazz",
"round": 1,
"id": 4
},
{
"name": "Thunder",
"round": 1,
"id": 5
}
]
}
]
},
{
"name": "Warriors",
"round": 3,
"id": 14,
"children": [
{
"name": "Warriors",
"round": 2,
"id": 10,
"children": [
{
"name": "Warriors",
"round": 1,
"id": 2
},
{
"name": "Spurs",
"round": 1,
"id": 7
}
]
},
{
"name": "Pelicans",
"round": 2,
"id": 11,
"children": [
{
"name": "Pelicans",
"round": 1,
"id": 6
},
{
"name": "Trail Blazers",
"round": 1,
"id": 3
}
]
}
]
}
]
}

Adding a row with Sum and mean of the columns

I'm having a dataframe as like below.
`> am_me
Group.1 Group.2 x.x x.y
2 AM clearterminate 3 21.00000
3 AM display.cryptic 86 30.12791
4 AM price 71 898.00000`
I would like to get result as like below.
`> am_me_t
Group.2 x.x x.y
2 clearterminate 3 21
3 display.cryptic 86 30.1279069767442
4 price 71 898
41 AM 160 316.375968992248`
I have taken out the first column and got the result like below
`> am_res
Group.2 x.x x.y
2 clearterminate 3 21.00000
3 display.cryptic 86 30.12791
4 price 71 898.00000`
When I try rbind to Add "AM" to new row, as like below, I'm getting a warning message and getting NA.
`> am_me_t <- rbind(am_res, c("AM", colSums(am_res[2]), colMeans(am_res[3])))
Warning message:
invalid factor level, NAs generated in: "[<-.factor"(`*tmp*`, ri, value = "AM")
Group.2 x.x x.y
2 clearterminate 3 21
3 display.cryptic 86 30.1279069767442
4 price 71 898
41 <NA> 160 316.375968992248`
For your information, Output of edit(am_me)
`> edit(am_me)
structure(list(Group.1 = structure(as.integer(c(2, 2, 2)), .Label = c("1Y",
"AM", "BE", "CM", "CO", "LX", "SN", "US", "VK", "VS"), class = "factor"),
Group.2 = structure(as.integer(c(2, 5, 9)), .Label = c("bestbuy",
"clearterminate", "currency.display", "display", "display.cryptic",
"fqa", "mileage.display", "ping", "price", "reissue", "reissuedisplay",
"shortaccess.followon"), class = "factor"), x.x = as.integer(c(3,
86, 71)), x.y = c(21, 30.1279069767442, 898)), .Names = c("Group.1",
"Group.2", "x.x", "x.y"), row.names = c("2", "3", "4"), class = "data.frame")`
Also
`> edit(me)
structure(list(Group.1 = structure(as.integer(c(1, 2, 2, 2, 3,
4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8,
8, 8, 9, 9, 10, 10, 10, 10, 10, 10)), .Label = c("1Y", "AM",
"BE", "CM", "CO", "LX", "SN", "US", "VK", "VS"), class = "factor"),
Group.2 = structure(as.integer(c(8, 2, 5, 9, 10, 1, 2, 5,
9, 1, 2, 5, 9, 1, 2, 3, 4, 7, 9, 11, 12, 2, 4, 6, 1, 2, 5,
9, 2, 5, 1, 2, 3, 5, 9, 10)), .Label = c("bestbuy", "clearterminate",
"currency.display", "display", "display.cryptic", "fqa",
"mileage.display", "ping", "price", "reissue", "reissuedisplay",
"shortaccess.followon"), class = "factor"), x.x = as.integer(c(1,
3, 86, 71, 1, 2, 5, 1, 52, 10, 7, 27, 15, 5, 267, 14, 4,
1, 256, 1, 1, 80, 1, 78, 2, 10, 23, 6, 1, 2, 4, 3, 3, 11,
1, 1)), x.y = c(5, 21, 30.1279069767442, 898, 12280, 800,
56.4, 104, 490.442307692308, 1759.1, 18.1428571428571, 1244.81481481481,
518.533333333333, 3033.2, 18.5468164794007, 20, 3788.5, 23,
2053.49609375, 3863, 6376, 17.825, 240, 1752.21794871795,
1114.5, 34, 1369.60869565217, 1062.16666666667, 23, 245,
5681.5, 11.3333333333333, 13.3333333333333, 1273.81818181818,
2076, 5724)), .Names = c("Group.1", "Group.2", "x.x", "x.y"
), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
"21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31",
"32", "33", "34", "35", "36"), class = "data.frame")
Group.1 Group.2 x.x x.y
1 1Y ping 1 5.00000
2 AM clearterminate 3 21.00000
3 AM display.cryptic 86 30.12791
4 AM price 71 898.00000
5 BE reissue 1 12280.00000
6 CM bestbuy 2 800.00000
7 CM clearterminate 5 56.40000
8 CM display.cryptic 1 104.00000
9 CM price 52 490.44231
10 CO bestbuy 10 1759.10000
11 CO clearterminate 7 18.14286
12 CO display.cryptic 27 1244.81481
13 CO price 15 518.53333
14 LX bestbuy 5 3033.20000
15 LX clearterminate 267 18.54682
16 LX currency.display 14 20.00000
17 LX display 4 3788.50000
18 LX mileage.display 1 23.00000
19 LX price 256 2053.49609
20 LX reissuedisplay 1 3863.00000
21 LX shortaccess.followon 1 6376.00000
22 SN clearterminate 80 17.82500
23 SN display 1 240.00000
24 SN fqa 78 1752.21795
25 US bestbuy 2 1114.50000
26 US clearterminate 10 34.00000
27 US display.cryptic 23 1369.60870
28 US price 6 1062.16667
29 VK clearterminate 1 23.00000
30 VK display.cryptic 2 245.00000
31 VS bestbuy 4 5681.50000
32 VS clearterminate 3 11.33333
33 VS currency.display 3 13.33333
34 VS display.cryptic 11 1273.81818
35 VS price 1 2076.00000
36 VS reissue 1 5724.00000`
The type of the Group.2 column is factor, and that limits the possible values. You can transform it to character with am_me$Group.2 <- as.character(am_me$Group.2), after that the AM value will be added without errors.
Note that you can also use sum() and mean() for single column operations.

Resources