I try to read from a dropbox link a csv file as data frame using this option
df <- read.csv("https://www.dropbox.com/s/vta51y5wyzu86m1/FY_2008.csv?dl=0", stringsAsFactors = FALSE)
However I receive this error:
Error in read.table(file = file, header = header, sep = sep, quote = quote, :
duplicate 'row.names' are not allowed
Any help to figure out why this error exist?
Change the dl=0 to dl=1.
For an abbreviated demonstration, I'll limit to just the top 10 rows:
df <- read.csv("https://www.dropbox.com/s/vta51y5wyzu86m1/FY_2008.csv?dl=1", nrows=10)
str(df)
# 'data.frame': 10 obs. of 65 variables:
# $ contract_transaction_unique_key : chr "9700_9700_0000_0_W91QUZ07D0011_0" "9700_9700_0001_0_DAJA6196A0004_0" "6940_6940_0001_1_DTNH2208D00115_0" "9700_9700_0001_17_F0470001D0020_0" ...
# $ contract_award_unique_key : chr "CONT_AWD_0000_9700_W91QUZ07D0011_9700" "CONT_AWD_0001_9700_DAJA6196A0004_9700" "CONT_AWD_0001_6940_DTNH2208D00115_6940" "CONT_AWD_0001_9700_F0470001D0020_9700" ...
# $ award_id_piid : int 0 1 1 1 1 1 1 1 1 1
# $ modification_number : int 0 0 1 17 2 0 0 0 1 1
# $ transaction_number : int 0 0 0 0 0 0 0 0 0 0
# $ parent_award_agency_id : int 9700 9700 6940 9700 9700 9700 9700 9700 9700 9700
# $ parent_award_agency_name : chr "" "DEPT OF DEFENSE" "NATIONAL HIGHWAY TRAFFIC SAFETY ADMINISTRATION" "" ...
# $ parent_award_id_piid : chr "W91QUZ07D0011" "DAJA6196A0004" "DTNH2208D00115" "F0470001D0020" ...
# $ parent_award_modification_number : chr "0" "0" "0" "P00013" ...
# $ federal_action_obligation : num 1082099 1104 0 -15741 -15927 ...
# $ total_dollars_obligated : num NA 1104 NA NA NA ...
# $ current_total_value_of_award : num NA 1104 NA NA NA ...
# $ potential_total_value_of_award : num NA 1104 NA NA NA ...
# $ disaster_emergency_fund_codes_for_overall_award : logi NA NA NA NA NA NA ...
# $ outlayed_amount_funded_by_COVID.19_supplementals_for_overall_aw: logi NA NA NA NA NA NA ...
# $ obligated_amount_funded_by_COVID.19_supplementals_for_overall_a: logi NA NA NA NA NA NA ...
# $ action_date : chr "2008-09-30" "2008-09-30" "2008-09-30" "2008-09-30" ...
# $ action_date_fiscal_year : int 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008
# $ period_of_performance_start_date : chr "2008-09-30 00:00:00" "2008-09-30 00:00:00" "2008-09-30 00:00:00" "2008-09-30 00:00:00" ...
# $ period_of_performance_current_end_date : chr "2009-09-29 00:00:00" "2008-09-30 00:00:00" "2009-12-18 00:00:00" "2003-11-30 00:00:00" ...
# $ period_of_performance_potential_end_date : chr "2009-09-29 00:00:00" "2008-09-30 00:00:00" "2009-12-18 00:00:00" "2003-11-30 00:00:00" ...
# $ awarding_agency_code : int 97 97 69 97 97 97 97 97 97 97
# $ awarding_agency_name : chr "DEPARTMENT OF DEFENSE (DOD)" "DEPARTMENT OF DEFENSE (DOD)" "DEPARTMENT OF TRANSPORTATION (DOT)" "DEPARTMENT OF DEFENSE (DOD)" ...
# $ awarding_sub_agency_code : int 2100 2100 6940 5700 5700 5700 5700 5700 5700 5700
# $ awarding_sub_agency_name : chr "DEPT OF THE ARMY" "DEPT OF THE ARMY" "NATIONAL HIGHWAY TRAFFIC SAFETY ADMINISTRATION" "DEPT OF THE AIR FORCE" ...
# $ awarding_office_code : chr "W911W4" "W912PA" "00022" "FA9301" ...
# $ awarding_office_name : chr "W00Y CONTR OFC DODAAC" "ECC PARC EUROPE REGIONAL CONTRACTIN" "DEPT OF TRANS/NAT HIGHWAY TRAFFIC SAFETY ADM" "FA9301 AFTC PZIO" ...
# $ recipient_duns : int 614948396 123456787 49508120 848288408 92440044 52220485 144606436 132004701 122474104 57579807
# $ recipient_name : chr "WORLD WIDE TECHNOLOGY, INC." "MISCELLANEOUS FOREIGN AWARDEES" "WESTAT, INC." "ACCENT SERVICE COMPANY INC" ...
# $ recipient_doing_business_as_name : logi NA NA NA NA NA NA ...
# $ recipient_parent_duns : int 131784451 123456787 49508120 848288408 92440044 52220485 144606436 132004701 122474104 57579807
# $ recipient_parent_name : chr "WORLD WIDE TECHNOLOGY HOLDING CO. INC." "MISCELLANEOUS FOREIGN CONTRACTORS" "WESTAT INC." "ACCENT SERVICE COMPANY INC" ...
# $ recipient_country_code : chr "USA" "USA" "UNITED STATES" "UNITED STATES" ...
# $ recipient_country_name : chr "UNITED STATES OF AMERICA" "UNITED STATES" "" "" ...
# $ recipient_address_line_1 : chr "60 WELDON PKWY" "1800 F ST NW" "1650 RESEARCH BLVD RM RE164" "2001 LEMNOS DR" ...
# $ recipient_address_line_2 : logi NA NA NA NA NA NA ...
# $ recipient_city_name : chr "MARYLAND HEIGHTS" "WASHINGTON" "ROCKVILLE" "COSTA MESA" ...
# $ recipient_county_name : chr "ST. LOUIS" "DISTRICT OF COLUMBIA" "" "" ...
# $ recipient_state_code : chr "MO" "DC" "MD" "CA" ...
# $ recipient_state_name : chr "MISSOURI" "DISTRICT OF COLUMBIA" "" "" ...
# $ recipient_zip_4_code : int 63043 204050001 208503195 926263535 92408 329205818 769047833 223031802 782584092 782073102
# $ primary_place_of_performance_country_name : chr "UNITED STATES OF AMERICA" "GERMANY" "UNITED STATES" "UNITED STATES" ...
# $ primary_place_of_performance_city_name : chr "FORT BELVOIR" "" "ROCKVILLE" "EDWARDS" ...
# $ primary_place_of_performance_county_name : chr "FAIRFAX" "" "MONTGOMERY" "KERN" ...
# $ primary_place_of_performance_state_code : chr "VA" "" "MD" "CA" ...
# $ primary_place_of_performance_state_name : chr "VIRGINIA" "" "MARYLAND" "CALIFORNIA" ...
# $ award_or_idv_flag : chr "AWARD" "AWARD" "AWARD" "AWARD" ...
# $ award_type_code : chr "C" "C" "C" "C" ...
# $ award_type : chr "DO" "DELIVERY ORDER" "DO" "DO" ...
# $ type_of_contract_pricing_code : chr "J" "J" "3" "S" ...
# $ type_of_contract_pricing : chr "FIXED PRICE" "FIXED PRICE" "OTHER (NONE OF THE ABOVE)" "COST NO FEE" ...
# $ award_description : chr "PURCHASE OF ROUTERS, SERVERS, AND ANCILLARY EQUIPMENT. USED WORLD-WIDE IN SUPPORT OF MISSION." "LOCKSMITH SUPPLIES" "RFP FOR IDIQ CONTRACT - MULTIPLE AWARD" "BASIC CLEANING SERVICES" ...
# $ product_or_service_code : chr "7490" "4510" "R405" "S201" ...
# $ product_or_service_code_description : chr "MISCELLANEOUS OFFICE MACHINES" "PLUMBING FIXTURES AND ACCESSORIES" "OPERATIONS RESEARCH & QUANTITATIVE" "CUSTODIAL JANITORIAL SERVICES" ...
# $ naics_description : chr "WIRED TELECOMMUNICATIONS CARRIERS" "OTHER SUPPORT ACTIVITIES FOR ROAD TRANSPORTATION" "ENGINEERING SERVICES" "JANITORIAL SERVICES" ...
# $ domestic_or_foreign_entity : logi NA NA NA NA NA NA ...
# $ country_of_product_or_service_origin_code : chr "USA" "DEU" "NAN" "USA" ...
# $ extent_competed_code : chr "A" "A" "" "D" ...
# $ extent_competed : chr "FULL AND OPEN COMPETITION" "FULL AND OPEN COMPETITION" "" "FULL AND OPEN COMPETITION AFTER EXCLUSION OF SOURCES" ...
# $ parent_award_type_code : chr "" "B" "" "" ...
# $ parent_award_type : chr "" "IDC" "" "" ...
# $ cost_or_pricing_data_code : chr "N" "N" "" "N" ...
# $ cost_or_pricing_data : chr "NO" "NO" "" "NO" ...
# $ multi_year_contract_code : chr "N" "N" "N" "N" ...
# $ multi_year_contract : chr "NO" "NO" "NO" "NO" ...
Related
I'm having issues with leaflet::addPolylines using sf objects with Leaflet for R.
Below is the code I'm using to extract (as a random example) of a railway in London.
library(osmdata)
library(leaflet)
library(sf)
library(ggplot2)
# Get Data
dlr <-
opq("London, UK") %>%
add_osm_feature(key = "line", value = "DLR") %>%
osmdata_sf()
str(dlr$osm_lines)
# Classes ‘sf’ and 'data.frame': 213 obs. of 25 variables:
# $ osm_id : chr "3636480" "3663203" "4005749" "4005750" ...
# $ name : chr "Docklands Light Railway" "Docklands Light Railway" "Docklands Light Railway" "Docklands Light Railway" ...
# $ bridge : chr "viaduct" "viaduct" NA NA ...
# $ covered : chr NA NA NA NA ...
# $ cutting : chr NA NA NA NA ...
# $ disused.railway: chr NA NA NA NA ...
# $ electrified : chr "rail" "rail" "rail" "rail" ...
# $ fixme : chr NA NA NA NA ...
# $ frequency : chr "0" "0" "0" "0" ...
# $ gauge : chr "1435" "1435" "1435" "1435" ...
# $ layer : chr "1" "1" "-2" "-2" ...
# $ level : chr NA NA NA NA ...
# $ line : chr "DLR" "DLR" "DLR" "DLR" ...
# $ note : chr NA NA "Route guessed" "Route guessed" ...
# $ oneway : chr NA NA NA NA ...
# $ railway : chr "light_rail" "light_rail" "light_rail" "light_rail" ...
# $ service : chr NA NA NA NA ...
# $ short_name : chr NA NA NA NA ...
# $ source : chr NA NA NA NA ...
# $ source_ref : chr NA NA NA NA ...
# $ start_date : chr NA NA NA NA ...
# $ track_detail : chr NA NA NA NA ...
# $ tunnel : chr NA NA "yes" "yes" ...
# $ voltage : chr "750" "750" "750" "750" ...
# $ geometry :sfc_LINESTRING of length 213; first list element: 'XY' num [1:4, 1:2] -0.0673 -0.0669 -0.0664 -0.0661 51.5111 ...
# ..- attr(*, "dimnames")=List of 2
# .. ..$ : chr "18019994" "1842525419" "1752475375" "18019985"
# .. ..$ : chr "lon" "lat"
# - attr(*, "sf_column")= chr "geometry"
# - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
# ..- attr(*, "names")= chr "osm_id" "name" "bridge" "covered" ...
Then, plotting using ggplot() and geom_sf() is fine:
dlr$osm_lines %>%
ggplot() + geom_sf()
But not with Leaflet:
dlr$osm_lines %>%
leaflet() %>%
addProviderTiles("Stamen.Watercolor") %>%
addPolylines()
Apologies for the unnecessary watercolour - just wanted to make it abundantly clear that the lines were not there.
This seems to be a problem with the names being set in the geometry of the lines, following recent updates - see the discussion here https://github.com/r-spatial/sf/issues/880 - which suggests just removing them.
This works for me with your example...
names(st_geometry(dlr$osm_lines)) = NULL
dlr$osm_lines %>%
leaflet() %>%
addProviderTiles("Stamen.Watercolor") %>%
addPolylines()
This will hopefully be dealt with by a leaflet update - see https://github.com/rstudio/leaflet/issues/631.
I have a dataframe nested within a dataframe that I'm getting from Mongo. The number of rows match in each so that when viewed it looks like a typical dataframe. My question, how do I expand the nested dataframe into the parent so that I can run dplyr selects? See the layout below
'data.frame': 10 obs. of 2 variables:
$ _id : int 1551 1033 1061 1262 1032 1896 1080 1099 1679 1690
$ personalInfo:'data.frame': 10 obs. of 2 variables:
..$ FirstName :List of 10
.. ..$ : chr "Jack"
.. ..$ : chr "Yogesh"
.. ..$ : chr "Steven"
.. ..$ : chr "Richard"
.. ..$ : chr "Thomas"
.. ..$ : chr "Craig"
.. ..$ : chr "David"
.. ..$ : chr "Aman"
.. ..$ : chr "Frank"
.. ..$ : chr "Robert"
..$ MiddleName :List of 10
.. ..$ : chr "B"
.. ..$ : NULL
.. ..$ : chr "J"
.. ..$ : chr "I"
.. ..$ : chr "E"
.. ..$ : chr "A"
.. ..$ : chr "R"
.. ..$ : NULL
.. ..$ : chr "J"
.. ..$ : chr "E"
As per suggestion, here's how you recreate the data
id <- c(1551, 1033, 1061, 1262, 1032, 1896, 1080, 1099, 1679, 1690)
fname <- list("Jack","Yogesh","Steven","Richard","Thomas","Craig","David","Aman","Frank","Robert")
mname <- list("B",NULL,"J","I","E","A","R",NULL,"J","E")
sub <- as.data.frame(cbind(fname, mname))
master <- as.data.frame(id)
master$personalInfo <- sub
We could loop the 'personalInfo', change the NULL elements of the list to NA and convert it to a real dataset with 3 columns
library(tidyverse)
out <- master %>%
pull(personalInfo) %>%
map_df(~ map_chr(.x, ~ replace(.x, is.null(.x), NA))) %>%
bind_cols(master %>%
select(id), .)
str(out)
#'data.frame': 10 obs. of 3 variables:
# $ id : num 1551 1033 1061 1262 1032 ...
# $ fname: chr "Jack" "Yogesh" "Steven" "Richard" ...
# $ mname: chr "B" NA "J" "I" ...
While #akrun's answer is probably more practical and probably the way to tidy your data, I think this output is closer to what you describe.
I create a new environment where I put the data.frame's content, there I unlist to the said environment the content of your problematic column, and finally I wrap it all back into a data.frame.
I use a strange hack with cbind as as.data.frame is annoying with list columns. Using tibble::as_tibble works fine however.
new_env <- new.env()
list2env(master,new_env)
list2env(new_env$personalInfo,new_env)
rm(personalInfo,envir = new_env)
res <- as.data.frame(do.call(cbind,as.list(new_env))) # or as_tibble(as.list(new_env))
rm(new_env)
res
# fname id mname
# 1 Jack 1551 B
# 2 Yogesh 1033 NULL
# 3 Steven 1061 J
# 4 Richard 1262 I
# 5 Thomas 1032 E
# 6 Craig 1896 A
# 7 David 1080 R
# 8 Aman 1099 NULL
# 9 Frank 1679 J
# 10 Robert 1690 E
str(res)
# 'data.frame': 10 obs. of 3 variables:
# $ fname:List of 10
# ..$ : chr "Jack"
# ..$ : chr "Yogesh"
# ..$ : chr "Steven"
# ..$ : chr "Richard"
# ..$ : chr "Thomas"
# ..$ : chr "Craig"
# ..$ : chr "David"
# ..$ : chr "Aman"
# ..$ : chr "Frank"
# ..$ : chr "Robert"
# $ id :List of 10
# ..$ : num 1551
# ..$ : num 1033
# ..$ : num 1061
# ..$ : num 1262
# ..$ : num 1032
# ..$ : num 1896
# ..$ : num 1080
# ..$ : num 1099
# ..$ : num 1679
# ..$ : num 1690
# $ mname:List of 10
# ..$ : chr "B"
# ..$ : NULL
# ..$ : chr "J"
# ..$ : chr "I"
# ..$ : chr "E"
# ..$ : chr "A"
# ..$ : chr "R"
# ..$ : NULL
# ..$ : chr "J"
# ..$ : chr "E"
I have some data that is formatted in a way that's difficult to use, so I'm trying to flatten it out. The minimum reproducible example is here.
> str(sampleData)
List of 4
$ Events :'data.frame': 2 obs. of 3 variables:
..$ CateringOptions:List of 2
.. ..$ :'data.frame': 1 obs. of 3 variables:
.. .. ..$ Agreed : logi TRUE
.. .. ..$ Tnc :'data.frame': 1 obs. of 5 variables:
.. .. .. ..$ Identity : chr "SpicyOWing"
.. .. .. ..$ Schema : logi NA
.. .. .. ..$ ElementId : chr "105031"
.. .. .. ..$ ElementType : logi NA
.. .. .. ..$ ElementVersion: logi NA
.. .. ..$ Address: chr "New York"
.. ..$ :'data.frame': 1 obs. of 3 variables:
.. .. ..$ Agreed : logi TRUE
.. .. ..$ Tnc :'data.frame': 1 obs. of 5 variables:
.. .. .. ..$ Identity : chr "BaconEggs"
.. .. .. ..$ Schema : logi NA
.. .. .. ..$ ElementId : chr "105032"
.. .. .. ..$ ElementType : logi NA
.. .. .. ..$ ElementVersion: logi NA
.. .. ..$ Address: chr "Seattle"
..$ Action : num [1:2] 1 1
..$ Volume : num [1:2] 1000 2000
$ Host :List of 5
..$ Identity : chr "John"
..$ Schema : logi NA
..$ ElementId : chr "101505"
..$ ElementType : logi NA
..$ ElementVersion: logi NA
$ Sender :List of 5
..$ Identity : chr "Jane"
..$ Schema : logi NA
..$ ElementId : chr "101005"
..$ ElementType : logi NA
..$ ElementVersion: logi NA
$ CompletedDate: chr "/Date(1490112000000)/"
Expected
> expectedOutcome
Events.CateringOptions.Agreed Events.CateringOptions.Tnc.Identity Events.CateringOptions.Tnc.Schema Events.CateringOptions.Tnc.ElementId
1 NA SpicyOWing TRUE 105031
2 NA BaconEggs TRUE 105032
Events.CateringOptions.Tnc.ElementType Events.CateringOptions.Tnc.ElementVersion Events.CateringOptions.Address Events.Action Events.Volume Host.Identity
1 NA NA New York 1 1000 John
2 NA NA Seattle 1 2000 John
Host.Schema Host.ElementId Host.ElementType Host.ElementVersion Sender.Identity Sender.Schema Sender.ElementId Sender.ElementType Sender.ElementVersion
1 NA 101505 NA NA Jane NA 101005 NA NA
2 NA 101505 NA NA Jane NA 101005 NA NA
CompletedDate
1 /Date(1490112000000)/
2 /Date(1490112000000)/
The check function
check<-function(li){
areDF<-sapply(1:length(li), function(i) class(li[[i]]) == "data.frame")
areList<-sapply(1:length(li), function(i) class(li[[i]]) == "list")
tmp1 <- NULL
tmp2 <- NULL
if(any(areDF)){
for(j in which(areDF)){
columns <- jsonlite::flatten(li[[j]])
li[[j]] <- check(columns)
}
tmp1<-plyr::rbind.fill(li[areDF])
#return(tmp1)
}
if(any(areList)){
for(j in which(areList)){
li[[j]]<-check(li[[j]])
}
tmp2<-do.call(cbind,li)
#return(tmp2)
}
if(!is.null(tmp1) & !is.null(tmp2)){
return (cbind(tmp1,tmp2))
}
else if(!is.null(tmp1)){
return (tmp1)
}
else if(!is.null(tmp2)){
return (tmp2)
}
return(li)
}
Results
> str(check(sampleData))
'data.frame': 2 obs. of 29 variables:
$ CateringOptions.Agreed : logi TRUE TRUE
$ CateringOptions.Address : chr "New York" "Seattle"
$ CateringOptions.Tnc.Identity : chr "SpicyOWing" "BaconEggs"
$ CateringOptions.Tnc.Schema : logi NA NA
$ CateringOptions.Tnc.ElementId : chr "105031" "105032"
$ CateringOptions.Tnc.ElementType : logi NA NA
$ CateringOptions.Tnc.ElementVersion : logi NA NA
$ Action : num 1 1
$ Volume : num 1000 2000
$ Events.CateringOptions.Agreed : logi TRUE TRUE
$ Events.CateringOptions.Address : chr "New York" "Seattle"
$ Events.CateringOptions.Tnc.Identity : chr "SpicyOWing" "BaconEggs"
$ Events.CateringOptions.Tnc.Schema : logi NA NA
$ Events.CateringOptions.Tnc.ElementId : chr "105031" "105032"
$ Events.CateringOptions.Tnc.ElementType : logi NA NA
$ Events.CateringOptions.Tnc.ElementVersion: logi NA NA
$ Events.Action : num 1 1
$ Events.Volume : num 1000 2000
$ Host.Identity : Factor w/ 1 level "John": 1 1
$ Host.Schema : logi NA NA
$ Host.ElementId : Factor w/ 1 level "101505": 1 1
$ Host.ElementType : logi NA NA
$ Host.ElementVersion : logi NA NA
$ Sender.Identity : Factor w/ 1 level "Jane": 1 1
$ Sender.Schema : logi NA NA
$ Sender.ElementId : Factor w/ 1 level "101005": 1 1
$ Sender.ElementType : logi NA NA
$ Sender.ElementVersion : logi NA NA
$ CompletedDate : Factor w/ 1 level "/Date(1490112000000)/": 1 1
I almost have it, but the nested dataframe is being duped. Also, my code takes fairly long. Does anyone have any idea how I can go about flattening this?
Edit:
I added my solution in the end in the gist
Here is my take at it, with help from purrr.
The idea is similar to yours, only with a different syntax: flatten() the most nested dataframes, then rbind() them.
If I understand your code properly, mine is slightly different at the end, since I'll try to get a more "jsonlite::flatten-friendly" structure to apply it once more to the end result:
library(jsonlite)
library(purrr)
res <-
sampleData %>%
modify_if(
is.list,
.f = ~ modify_if(
.x,
.p = function(x) all(sapply(x, is.data.frame)),
.f = ~ do.call("rbind", lapply(.x, jsonlite::flatten))
)
) %>%
as.data.frame() %>%
jsonlite::flatten()
str(res)
# 'data.frame': 2 obs. of 20 variables:
# $ Events.Action : num 1 1
# $ Events.Volume : num 1000 2000
# $ Host.Identity : chr "John" "John"
# $ Host.Schema : logi NA NA
# $ Host.ElementId : chr "101505" "101505"
# $ Host.ElementType : logi NA NA
# $ Host.ElementVersion : logi NA NA
# $ Sender.Identity : chr "Jane" "Jane"
# $ Sender.Schema : logi NA NA
# $ Sender.ElementId : chr "101005" "101005"
# $ Sender.ElementType : logi NA NA
# $ Sender.ElementVersion : logi NA NA
# $ CompletedDate : chr "/Date(1490112000000)/" "/Date(1490112000000)/"
# $ Events.CateringOptions.Agreed : logi TRUE TRUE
# $ Events.CateringOptions.Address : chr "New York" "Seattle"
# $ Events.CateringOptions.Tnc.Identity : chr "SpicyOWing" "BaconEggs"
# $ Events.CateringOptions.Tnc.Schema : logi NA NA
# $ Events.CateringOptions.Tnc.ElementId : chr "105031" "105032"
# $ Events.CateringOptions.Tnc.ElementType : logi NA NA
# $ Events.CateringOptions.Tnc.ElementVersion: logi NA NA
I've got one mismatch with your expectedOutcome but if I may, it might be on your side:
all.equal(expectedOutcome[sort(names(expectedOutcome))], res[sort(names(res))])
# [1] "Component “Events.CateringOptions.Agreed”: 'is.NA' value mismatch: 0 in current 2 in target"
Not sure if this over-simplifies your problem, but with the sample you shared, it seems to work. Basically, if the column is not already a vector when you do data.frame(your_list), it unlists the data and makes a matrix.
FLAT <- function(inlist) {
A <- data.frame(inlist)
out <- lapply(A, function(y) {
if (is.list(y)) {
y <- unlist(y)
m <- matrix(y, nrow(A), byrow = TRUE, dimnames = list(NULL, unique(names(y))))
y <- data.frame(m, stringsAsFactors = FALSE)
y[] <- lapply(y, type.convert)
}
y
})
do.call(cbind, out)
}
FLAT(sampleData)
Here's the str on your sample data:
str(FLAT(sampleData))
## 'data.frame': 2 obs. of 20 variables:
## $ Events.CateringOptions.Agreed : logi TRUE TRUE
## $ Events.CateringOptions.Tnc.Identity : Factor w/ 2 levels "BaconEggs","SpicyOWing": 2 1
## $ Events.CateringOptions.Tnc.Schema : logi NA NA
## $ Events.CateringOptions.Tnc.ElementId : int 105031 105032
## $ Events.CateringOptions.Tnc.ElementType : logi NA NA
## $ Events.CateringOptions.Tnc.ElementVersion: logi NA NA
## $ Events.CateringOptions.Address : Factor w/ 2 levels "New York","Seattle": 1 2
## $ Events.Action : num 1 1
## $ Events.Volume : num 1000 2000
## $ Host.Identity : Factor w/ 1 level "John": 1 1
## $ Host.Schema : logi NA NA
## $ Host.ElementId : Factor w/ 1 level "101505": 1 1
## $ Host.ElementType : logi NA NA
## $ Host.ElementVersion : logi NA NA
## $ Sender.Identity : Factor w/ 1 level "Jane": 1 1
## $ Sender.Schema : logi NA NA
## $ Sender.ElementId : Factor w/ 1 level "101005": 1 1
## $ Sender.ElementType : logi NA NA
## $ Sender.ElementVersion : logi NA NA
## $ CompletedDate : Factor w/ 1 level "/Date(1490112000000)/": 1 1
After importing data from a JSON stream, I have a data frame that is 621 lists of the same 22 variables.
List of 621
$ :List of 22
..$ _id : chr "55c79e711cbee48856a30886"
..$ number : num 1
..$ country : chr "Yemen"
..$ date : chr "2002-11-03T00:00:00.000Z"
..$ narrative : chr ""
..$ town : chr ""
..$ location : chr ""
..$ deaths : chr "6"
..$ deaths_min : chr "6"
..$ deaths_max : chr "6"
..$ civilians : chr "0"
..$ injuries : chr ""
..$ children : chr ""
..$ tweet_id : chr "278544689483890688"
..$ bureau_id : chr "YEM001"
..$ bij_summary_short: chr ""
..$ bij_link : chr ""
..$ target : chr ""
..$ lat : chr "15.47467"
..$ lon : chr "45.322755"
..$ articles : list()
..$ names : chr ""| __truncated__
$ :List of 22
..$ _id : chr "55c79e711cbee48856a30887"
..$ number : num 2
..$ country : chr "Pakistan"
..$ date : chr "2004-06-17T00:00:00.000Z"
..$ narrative : chr ""
..$ town : chr ""
..$ location : chr ""
..$ deaths : chr "6-8"
..$ deaths_min : chr "6"
..$ deaths_max : chr "8"
..$ civilians : chr "2"
..$ injuries : chr "1"
..$ children : chr "2"
..$ tweet_id : chr "278544750867533824"
..$ bureau_id : chr "B1"
..$ bij_summary_short: chr ""| __truncated__
..$ bij_link : chr ""
..$ target : chr ""
..$ lat : chr "32.30512565"
..$ lon : chr "69.57624435"
..$ articles : list()
..$ names : chr ""
...
How can I combine these lists into one data frame of 621 observations of 22 variables? Notice that all 621 lists are unnamed.
edit: Per request, here is how I got this data set:
library(rjson)
url <- 'http://api.dronestre.am/data'
document <- fromJSON(file=url, method='C')
str(document$strike)
Can you provide example on how you generated the data ? I did not test the answer but, the following should help. If you can update the Q, on how you came up with the data, I can work to try that.
update
library(rjson)
library(data.table)
library(dplyr)
url <- 'http://api.dronestre.am/data'
document <- fromJSON(file=url, method='C')
is(document)
listdata<- document$strike
df<-do.call(rbind,listdata) %>% as.data.table
dim(df)
purrr has a useful transpose function which 'inverts' a list. The $articles element causes trouble as it appears always to be empty, and scuppers you when you try to convert to a data.frame, so I've subsetted for it.
library(purrr)
df <- transpose(document$strike) %>%
t %>%
apply(FUN = unlist, MARGIN = 2)
df <- df[-21] %>% data.frame %>% tbl_df
df
Source: local data frame [621 x 21]
X_id number country date
(fctr) (dbl) (fctr) (fctr)
1 55c79e711cbee48856a30886 1 Yemen 2002-11-03T00:00:00.000Z
2 55c79e711cbee48856a30887 2 Pakistan 2004-06-17T00:00:00.000Z
3 55c79e711cbee48856a30888 3 Pakistan 2005-05-08T00:00:00.000Z
4 55c79e721cbee48856a30889 4 Pakistan 2005-11-05T00:00:00.000Z
5 55c79e721cbee48856a3088a 5 Pakistan 2005-12-01T00:00:00.000Z
6 55c79e721cbee48856a3088b 6 Pakistan 2006-01-06T00:00:00.000Z
7 55c79e721cbee48856a3088c 7 Pakistan 2006-01-13T00:00:00.000Z
8 55c79e721cbee48856a3088d 8 Pakistan 2006-10-30T00:00:00.000Z
9 55c79e721cbee48856a3088e 9 Pakistan 2007-01-16T00:00:00.000Z
10 55c79e721cbee48856a3088f 10 Pakistan 2007-04-27T00:00:00.000Z
.. ... ... ... ...
Variables not shown: narrative (fctr), town (fctr), location (fctr), deaths
(fctr), deaths_min (fctr), deaths_max (fctr), civilians (fctr), injuries
(fctr), children (fctr), tweet_id (fctr), bureau_id (fctr), bij_summary_short
(fctr), bij_link (fctr), target (fctr), lat (fctr), lon (fctr), names (fctr)
How do I get historical data of an INDEX into R from Interactive Brokers? If it were futures, I would use this command (as suggested here IBrokers request Historical Futures Contract Data?):
library(twsInstrument)
a <- reqHistoricalData(tws, getContract("ESJUN2013"))
But the corresponding commanding with the connid of the S&P Index gives an error:
> a <- reqHistoricalData(tws, getContract("11004968"))
Connected with clientId 110.
Contract details request complete. Disconnected.
waiting for TWS reply on ES ....failed.
Warning message:
In errorHandler(con, verbose, OK = c(165, 300, 366, 2104, 2106, :
Error validating request:-'uc' : cause - HMDS Expired Contract Violation:contract can not expire.
P.S. Someone with enough points should create a tag for IBrokers
I don't have market data access to index data, but I think following should work.
reqHistoricalData(tws, twsIndex(symbol = "SPX", exch = "CBOE"))
## waiting for TWS reply on SPX ....failed.
## NULL
## Warning message:
## In errorHandler(con, verbose, OK = c(165, 300, 366, 2104, 2106, :
## Historical Market Data Service error message:No market data permissions for CBOE IND
Following is result of reqContractDetails using similar approach as above which proves that the contract object is created properly by twsIndex
reqContractDetails(tws, twsIndex(symbol = "SPX", exch = "CBOE"))
## [[1]]
## List of 18
## $ version : chr "8"
## $ contract :List of 16
## ..$ conId : chr "416904"
## ..$ symbol : chr "SPX"
## ..$ sectype : chr "IND"
## ..$ exch : chr "CBOE"
## ..$ primary : chr ""
## ..$ expiry : chr ""
## ..$ strike : chr "0"
## ..$ currency : chr "USD"
## ..$ right : chr ""
## ..$ local : chr "SPX"
## ..$ multiplier : chr ""
## ..$ combo_legs_desc: chr ""
## ..$ comboleg : chr ""
## ..$ include_expired: chr ""
## ..$ secIdType : chr ""
## ..$ secId : chr ""
## ..- attr(*, "class")= chr "twsContract"
## $ marketName : chr "SPX"
## $ tradingClass : chr "SPX"
## $ conId : chr "416904"
## $ minTick : chr "0.01"
## $ orderTypes : chr [1:22] "ACTIVETIM" "ADJUST" "ALERT" "ALLOC" ...
## $ validExchanges: chr "CBOE"
## $ priceMagnifier: chr "1"
## $ underConId : chr "0"
## $ longName : chr "S&P 500 Stock Index"
## $ contractMonth : chr ""
## $ industry : chr "Indices"
## $ category : chr "Broad Range Equity Index"
## $ subcategory : chr "*"
## $ timeZoneId : chr "CST"
## $ tradingHours : chr "20130321:0830-1500;20130322:0830-1500"
## $ liquidHours : chr "20130321:0830-1500;20130322:0830-1500"
##