Related
I am trying to use the substr function to change the time in one of my files
I want the created_at times to be changed from mdy/hms to mdy only
substr(jointTweet2$created_at.x, 1,10)
Doing this works, but does not change the dates in my file
jointTweet2 <- substr(jointTweet2$created_at.x, 1,10)
Doing this creates a list instead
How can i just do the time change without creating a list ?
I attached under my data as taken via dput
structure(list(author_id = c("1000023272250118144", "1000030523270496262",
"1000043409212674048", "1000050535649759232", "1000051189986177025",
"1000060938463170560"), possibly_sensitive = c(FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE), created_at.x = c("2021-06-13T01:42:23.000Z",
"2021-02-10T17:55:43.000Z", "2021-04-09T22:31:24.000Z", "2021-04-19T16:11:28.000Z",
"2021-01-11T10:06:43.000Z", "2021-01-16T11:30:34.000Z"), lang = c("en",
"en", "en", "en", "en", "en"), source = c("Twitter Web App",
"Twitter Web App", "Twitter Web App", "Twitter Web App", "Twitter for Android",
"Twitter Web App"), public_metrics.x = structure(list(retweet_count = c(0L,
1L, 4L, 4L, 0L, 27L), reply_count = c(0L, 0L, 1L, 0L, 0L, 5L),
like_count = c(1L, 0L, 5L, 39L, 0L, 125L), quote_count = c(0L,
0L, 0L, 0L, 0L, 0L)), row.names = c(302733L, 69371L, 179801L,
198715L, 15735L, 24450L), class = "data.frame"), id = c("1403890461387919362",
"1359561699733430273", "1380649576164642817", "1384177838757072906",
"1348572038705733635", "1350405079216640003"), entities.x = structure(list(
hashtags = list(structure(list(), .Names = character(0)),
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(start = c(13L, 22L, 33L, 43L, 53L, 63L,
74L), end = c(21L, 32L, 42L, 52L, 62L, 73L, 78L), tag = c("gamedev",
"gamemaker", "pixelart", "aseprite", "indiedev", "indiegame",
"art")), class = "data.frame", row.names = c(NA, 7L)),
structure(list(), .Names = character(0)), structure(list(
start = c(21L, 32L, 38L, 45L, 54L, 64L), end = c(31L,
37L, 44L, 53L, 63L, 69L), tag = c("IndieGame", "jRPG",
"RM2k3", "RPG<U+30C4><U+30AF><U+30FC><U+30EB>", "PixelArt",
"<U+30C9><U+30C3><U+30C8><U+7D75>")), class = "data.frame", row.names = c(NA,
6L))), urls = list(structure(list(start = 62L, end = 85L,
url = "", expanded_url = "",
display_url = ""), class = "data.frame", row.names = 1L),
structure(list(start = 197L, end = 220L, url = "",
expanded_url = "https://twitter.com/FroppleStudios/status/1359552958309019653",
display_url = ""), class = "data.frame", row.names = 1L),
structure(list(start = c(23L, 55L, 101L, 154L, 222L),
end = c(46L, 78L, 124L, 177L, 245L), url = c(""
), expanded_url = c(""), display_url = c(""
), status = c(NA, NA, 200L, NA, NA), unwound_url = c(NA,
NA, "",
NA, NA)), class = "data.frame", row.names = c(NA,
5L)), structure(list(start = 79L, end = 102L, url = "",
expanded_url = "",
display_url = "", media_key = "7_1384177629943590919"), class = "data.frame", row.names = 1L),
structure(list(start = 50L, end = 73L, url = "",
expanded_url = "",
display_url = ""), class = "data.frame", row.names = 1L),
structure(list(start = 70L, end = 93L, url = "",
expanded_url = "",
display_url = "", media_key = "3_1350404601623830529"), class = "data.frame", row.names = 1L)),
annotations = list(structure(list(), .Names = character(0)),
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(), .Names = character(0))), mentions = list(
structure(list(start = 90L, end = 98L, username = "YouTube",
id = "10228272"), class = "data.frame", row.names = 1L),
structure(list(start = 34L, end = 49L, username = "FroppleStudios",
id = "1359175250156679171"), class = "data.frame", row.names = 1L),
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(start = 4L, end = 15L, username = "TimBledsoe",
id = "20614993"), class = "data.frame", row.names = 1L),
structure(list(), .Names = character(0))), cashtags = list(
NULL, NULL, NULL, NULL, NULL, NULL)), row.names = c(302733L,
69371L, 179801L, 198715L, 15735L, 24450L), class = "data.frame"),
conversation_id = c("1403890461387919362", "1359561699733430273",
"1380649576164642817", "1384177838757072906", "1348572038705733635",
"1350405079216640003"), text = c("RoboDunk - Reveal Trailer Roguelite Basketball Combat for PC via #YouTube",
"Currently working on a project at #FroppleStudios \nThose interested in following the development of an indie game be sure to follow. We are still in early stages but we will show more when we can. ",
"My stuff: Wood carving and Crafting Also you can check my brother's work here: He's making a cool indie game",
"Gutter ways\n\n#gamedev #gamemaker #pixelart #aseprite #indiedev #indiegame #art ",
"Hey #TimBledsoe here's a new indie game for you. ",
"Mountainous ridge <U+0001F332>\n\n#IndieGame #jRPG #RM2k3 #RPG<U+30C4><U+30AF><U+30FC><U+30EB> #PixelArt #<U+30C9><U+30C3><U+30C8><U+7D75> "
), attachments = structure(list(media_keys = list(structure(list(), .Names = character(0)),
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
"7_1384177629943590919", structure(list(), .Names = character(0)),
"3_1350404601623830529"), poll_ids = list(structure(list(), .Names = character(0)),
NULL, structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(), .Names = character(0)), NULL)), row.names = c(302733L,
69371L, 179801L, 198715L, 15735L, 24450L), class = "data.frame"),
geo = structure(list(place_id = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), coordinates = structure(list(coordinates = list(NULL,
NULL, NULL, NULL, NULL, NULL), type = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_)), row.names = c(302733L, 69371L, 179801L,
198715L, 15735L, 24450L), class = "data.frame")), row.names = c(302733L,
69371L, 179801L, 198715L, 15735L, 24450L), class = "data.frame"),
referenced_tweets = list(structure(list(), .Names = character(0)),
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(), .Names = character(0))), `FTweet$id` = c("1403890461387919362",
"1359561699733430273", "1380649576164642817", "1384177838757072906",
"1348572038705733635", "1350405079216640003"), created_at.y = c("2018-05-25T14:38:22.000Z",
"2018-05-25T15:07:11.000Z", "2018-05-25T15:58:23.000Z", "2018-05-25T16:26:42.000Z",
"2018-05-25T16:29:18.000Z", "2018-05-25T17:08:02.000Z"),
verified = c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), protected = c(FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE), public_metrics.y = structure(list(
followers_count = c(58L, 38L, 304L, 1444L, 4003L, 3696L
), following_count = c(27L, 323L, 255L, 531L, 273L, 1806L
), tweet_count = c(2721L, 7212L, 6082L, 663L, 11167L,
1531L), listed_count = c(2L, 0L, 1L, 21L, 55L, 53L)), row.names = c("56366",
"20397", "39153", "41987", "7173", "9740"), class = "data.frame"),
entities.y = structure(list(url = structure(list(urls = list(
structure(list(start = 0L, end = 23L, url = "",
expanded_url = "", display_url = "oldgamers.net"), class = "data.frame", row.names = 1L),
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(), .Names = character(0)), structure(list(
start = 0L, end = 23L, url = "",
expanded_url = "", display_url = "castjunkie.com"), class = "data.frame", row.names = 1L),
structure(list(start = 0L, end = 23L, url = "",
expanded_url = "",
display_url = ""), class = "data.frame", row.names = 1L))), row.names = c("56366",
"20397", "39153", "41987", "7173", "9740"), class = "data.frame"),
description = structure(list(urls = list(structure(list(
start = 87L, end = 110L, url = "",
expanded_url = "", display_url = ""), class = "data.frame", row.names = 1L),
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(), .Names = character(0))), hashtags = list(
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(start = 80L, end = 101L, tag = "SupportIndiePodcasts"), class = "data.frame", row.names = 1L),
structure(list(), .Names = character(0))), mentions = list(
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(start = 74L, end = 85L, username = "Callia_wen"), class = "data.frame", row.names = 1L),
structure(list(), .Names = character(0)), structure(list(), .Names = character(0)),
structure(list(start = 7L, end = 22L, username = "BelovedRapture"), class = "data.frame", row.names = 1L)),
cashtags = list(NULL, NULL, structure(list(), .Names = character(0)),
NULL, structure(list(), .Names = character(0)),
NULL)), row.names = c("56366", "20397", "39153",
"41987", "7173", "9740"), class = "data.frame")), row.names = c("56366",
"20397", "39153", "41987", "7173", "9740"), class = "data.frame"),
username = c("oldgamers_TV", "DavidSantosDev", "Senti_nelle",
"_AERYS_", "castjunkie", "BelovedRapture"), location = c("Houston, TX",
NA, "Mon PV : #JeanJean_PV", "Northern Ireland", "Washington, USA",
"Boston, MA"), description = c("Oldgamers shares gaming news & Support streamers.\nAd* Video Games Best Deals are here: ",
"", "Artisan de l'art. <U+2606> I draw big mechas, dragons and OCs\nHunting partner of #Callia_wen",
"Game Developer | 20 | UK | Thank You All So Much For 1400 Followers! Big fan of Adventure Time. Commissions open :D",
"Host of vibrant Discord community with thousands of podcast creators/listeners. #SupportIndiePodcasts",
"Dev of #BelovedRapture, a SNES-styled indie RPG game. <U+0001F3AE><U+0001F3F3><U+FE0F><U+200D><U+0001F308><U+0001F338>"
), pinned_tweet_id = c(NA, NA, "1380649576164642817", NA,
"1515503033177673729", "1455620363564556291"), name = c("oldgamers_TV",
"David", "Sentinelle <U+0001F47A>", "Aerys", "CastJunkie",
"Blind"), withheld = structure(list(country_codes = list(
NULL, NULL, NULL, NULL, NULL, NULL)), row.names = c("56366",
"20397", "39153", "41987", "7173", "9740"), class = "data.frame"),
`FUsers$id` = c("1000023272250118144", "1000030523270496262",
"1000043409212674048", "1000050535649759232", "1000051189986177025",
"1000060938463170560")), row.names = c(NA, 6L), class = "data.frame")
Sorry for all the added data, i hope it works
I had to remove the URL's by hand
Managed to fix it using the following function:
system.time(jointTweet2$created_at.x <- as.Date(jointTweet2$created_at.x))
No idea why the rest was not working
A follow-up to this question: is it possible to apply unnest_wider recursively, until no more list columns are left?
I'm working with a deeply nested data set, where one list column has a single level (one unnest_wider call does the trick); but another has 228-237 list columns of varying depths. I want to 'explode' the entire data set so that it's tidy, with all columns available at the top of the hierarchy.
Here's a sample:
structure(list(fullVisitorId = c("2248281639583218707", "8647436381089107732"
), date = c("20170801", "20170801"), visitStartTime = c(1501583974L,
1501616585L), totals = list(list(visits = 1L, hits = 1L, pageviews = 1L,
timeOnSite = NA_integer_, bounces = 1L, transactions = NA_integer_,
transactionRevenue = NA_integer_, newVisits = 1L, screenviews = NA_integer_,
uniqueScreenviews = NA_integer_, timeOnScreen = NA_integer_,
totalTransactionRevenue = NA_integer_, sessionQualityDim = 1L),
list(visits = 1L, hits = 1L, pageviews = 1L, timeOnSite = NA_integer_,
bounces = 1L, transactions = NA_integer_, transactionRevenue = NA_integer_,
newVisits = 1L, screenviews = NA_integer_, uniqueScreenviews = NA_integer_,
timeOnScreen = NA_integer_, totalTransactionRevenue = NA_integer_,
sessionQualityDim = 1L)), channelGrouping = c("Organic Search",
"Organic Search"), hits = list(structure(list(hitNumber = 1L,
time = 0L, hour = 3L, minute = 39L, isSecure = NA, isInteraction = TRUE,
isEntrance = TRUE, isExit = TRUE, referer = "http://www.google.com/",
page = list(list(pagePath = "/google+redesign/electronics",
hostname = "shop.googlemerchandisestore.com", pageTitle = "Electronics | Google Merchandise Store",
searchKeyword = NA_character_, searchCategory = NA_character_,
pagePathLevel1 = "/google+redesign/", pagePathLevel2 = "/electronics",
pagePathLevel3 = "", pagePathLevel4 = "")), transaction = list(
list(transactionId = NA_character_, transactionRevenue = NA_integer_,
transactionTax = NA_integer_, transactionShipping = NA_integer_,
affiliation = NA_character_, currencyCode = "USD",
localTransactionRevenue = NA_integer_, localTransactionTax = NA_integer_,
localTransactionShipping = NA_integer_, transactionCoupon = NA_character_)),
item = list(list(transactionId = NA_character_, productName = NA_character_,
productCategory = NA_character_, productSku = NA_character_,
itemQuantity = NA_integer_, itemRevenue = NA_integer_,
currencyCode = "USD", localItemRevenue = NA_integer_)),
contentInfo = list(structure(list(NULL), .Names = "")), appInfo = list(
list(name = NA_character_, version = NA_character_, id = NA_character_,
installerId = NA_character_, appInstallerId = NA_character_,
appName = NA_character_, appVersion = NA_character_,
appId = NA_character_, screenName = "shop.googlemerchandisestore.com/google+redesign/electronics",
landingScreenName = "shop.googlemerchandisestore.com/google+redesign/electronics",
exitScreenName = "shop.googlemerchandisestore.com/google+redesign/electronics",
screenDepth = "0")), exceptionInfo = list(list(description = NA_character_,
isFatal = TRUE, exceptions = NA_integer_, fatalExceptions = NA_integer_)),
eventInfo = list(structure(list(NULL, NULL, NULL, NULL), .Names = c("",
"", "", ""))), product = list(structure(list(productSKU = c("GGOEGBFC018799",
"GGOEGESB015199", "GGOEGEVA022399", "GGOEGCBB074199", "GGOEGFKA022299",
"GGOEGCBB074399", "GGOEGCBC074299", "GGOEGEHQ072499", "GGOEGEHQ072599",
"GGOEGESB015099", "GGOEGESC014099", "GGOEGESQ016799"), v2ProductName = c("Electronics Accessory Pouch",
"Google Flashlight", "Micro Wireless Earbud", "Google Car Clip Phone Holder",
"Keyboard DOT Sticker", "Google Device Holder Sticky Pad",
"Google Device Stand", "Google 2200mAh Micro Charger", "Google 4400mAh Power Bank",
"Basecamp Explorer Powerbank Flashlight", "Rocket Flashlight",
"Plastic Sliding Flashlight"), v2ProductCategory = c("Home/Electronics/",
"Home/Electronics/", "Home/Electronics/", "Home/Electronics/",
"Home/Electronics/", "Home/Electronics/", "Home/Electronics/",
"Home/Electronics/", "Home/Electronics/", "Home/Electronics/",
"Home/Electronics/", "Home/Electronics/"), productVariant = c("(not set)",
"(not set)", "(not set)", "(not set)", "(not set)", "(not set)",
"(not set)", "(not set)", "(not set)", "(not set)", "(not set)",
"(not set)"), productBrand = c("(not set)", "(not set)",
"(not set)", "(not set)", "(not set)", "(not set)", "(not set)",
"(not set)", "(not set)", "(not set)", "(not set)", "(not set)"
), productRevenue = c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), localProductRevenue = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_), productPrice = c(4990000L, 59990000L, 39990000L,
6990000L, 1500000L, 4990000L, 4990000L, 22990000L, 37990000L,
22990000L, 4990000L, 12990000L), localProductPrice = c(4990000L,
59990000L, 39990000L, 6990000L, 1500000L, 4990000L, 4990000L,
22990000L, 37990000L, 22990000L, 4990000L, 12990000L), productQuantity = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_), productRefundAmount = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), localProductRefundAmount = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), isImpression = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE), isClick = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), customDimensions = list(
structure(list(index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame"))), customMetrics = list(
structure(list(index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame"))), productListName = c("Category",
"Category", "Category", "Category", "Category", "Category",
"Category", "Category", "Category", "Category", "Category",
"Category"), productListPosition = 1:12, productCouponCode = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_)), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))), promotion = list(
structure(list(promoId = character(0), promoName = character(0),
promoCreative = character(0), promoPosition = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame"))), promotionActionInfo = list(
structure(list(NULL, NULL), .Names = c("", ""))), refund = list(
structure(list(NULL, NULL), .Names = c("", ""))), eCommerceAction = list(
list(action_type = "0", step = 1L, option = NA_character_)),
experiment = list(structure(list(experimentId = character(0),
experimentVariant = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame"))), publisher = list(
structure(list(NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL), .Names = c("", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", ""))),
customVariables = list(structure(list(index = integer(0),
customVarName = character(0), customVarValue = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame"))), customDimensions = list(
structure(list(index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame"))), customMetrics = list(
structure(list(index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame"))), type = "PAGE",
social = list(list(socialInteractionNetwork = NA_character_,
socialInteractionAction = NA_character_, socialInteractions = NA_integer_,
socialInteractionTarget = NA_character_, socialNetwork = "(not set)",
uniqueSocialInteractions = NA_integer_, hasSocialSourceReferral = "No",
socialInteractionNetworkAction = " : ")), latencyTracking = list(
structure(list(NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL), .Names = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "",
""))), sourcePropertyInfo = list(structure(list(NULL,
NULL), .Names = c("", ""))), contentGroup = list(list(
contentGroup1 = "(not set)", contentGroup2 = "Electronics",
contentGroup3 = "(not set)", contentGroup4 = "(not set)",
contentGroup5 = "(not set)", previousContentGroup1 = "(entrance)",
previousContentGroup2 = "(entrance)", previousContentGroup3 = "(entrance)",
previousContentGroup4 = "(entrance)", previousContentGroup5 = "(entrance)",
contentGroupUniqueViews1 = NA_integer_, contentGroupUniqueViews2 = 1L,
contentGroupUniqueViews3 = NA_integer_, contentGroupUniqueViews4 = NA_integer_,
contentGroupUniqueViews5 = NA_integer_)), dataSource = "web",
publisher_infos = list(structure(list(dfpClicks = integer(0),
dfpImpressions = integer(0), dfpMatchedQueries = integer(0),
dfpMeasurableImpressions = integer(0), dfpQueries = integer(0),
dfpRevenueCpm = integer(0), dfpRevenueCpc = integer(0),
dfpViewableImpressions = integer(0), dfpPagesViewed = integer(0),
adsenseBackfillDfpClicks = integer(0), adsenseBackfillDfpImpressions = integer(0),
adsenseBackfillDfpMatchedQueries = integer(0), adsenseBackfillDfpMeasurableImpressions = integer(0),
adsenseBackfillDfpQueries = integer(0), adsenseBackfillDfpRevenueCpm = integer(0),
adsenseBackfillDfpRevenueCpc = integer(0), adsenseBackfillDfpViewableImpressions = integer(0),
adsenseBackfillDfpPagesViewed = integer(0), adxBackfillDfpClicks = integer(0),
adxBackfillDfpImpressions = integer(0), adxBackfillDfpMatchedQueries = integer(0),
adxBackfillDfpMeasurableImpressions = integer(0), adxBackfillDfpQueries = integer(0),
adxBackfillDfpRevenueCpm = integer(0), adxBackfillDfpRevenueCpc = integer(0),
adxBackfillDfpViewableImpressions = integer(0), adxBackfillDfpPagesViewed = integer(0),
adxClicks = integer(0), adxImpressions = integer(0),
adxMatchedQueries = integer(0), adxMeasurableImpressions = integer(0),
adxQueries = integer(0), adxRevenue = integer(0), adxViewableImpressions = integer(0),
adxPagesViewed = integer(0), adsViewed = integer(0),
adsUnitsViewed = integer(0), adsUnitsMatched = integer(0),
viewableAdsViewed = integer(0), measurableAdsViewed = integer(0),
adsPagesViewed = integer(0), adsClicked = integer(0),
adsRevenue = integer(0), dfpAdGroup = character(0), dfpAdUnits = character(0),
dfpNetworkId = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame")))), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(hitNumber = 1L, time = 0L,
hour = 12L, minute = 43L, isSecure = NA, isInteraction = TRUE,
isEntrance = TRUE, isExit = TRUE, referer = "https://www.google.com/",
page = list(list(pagePath = "/google+redesign/apparel/mens/mens+outerwear",
hostname = "shop.googlemerchandisestore.com", pageTitle = "Men's Outerwear | Apparel | Google Merchandise Store",
searchKeyword = NA_character_, searchCategory = NA_character_,
pagePathLevel1 = "/google+redesign/", pagePathLevel2 = "/apparel/",
pagePathLevel3 = "/mens/", pagePathLevel4 = "/mens+outerwear")),
transaction = list(list(transactionId = NA_character_, transactionRevenue = NA_integer_,
transactionTax = NA_integer_, transactionShipping = NA_integer_,
affiliation = NA_character_, currencyCode = "USD", localTransactionRevenue = NA_integer_,
localTransactionTax = NA_integer_, localTransactionShipping = NA_integer_,
transactionCoupon = NA_character_)), item = list(list(
transactionId = NA_character_, productName = NA_character_,
productCategory = NA_character_, productSku = NA_character_,
itemQuantity = NA_integer_, itemRevenue = NA_integer_,
currencyCode = "USD", localItemRevenue = NA_integer_)),
contentInfo = list(structure(list(NULL), .Names = "")), appInfo = list(
list(name = NA_character_, version = NA_character_, id = NA_character_,
installerId = NA_character_, appInstallerId = NA_character_,
appName = NA_character_, appVersion = NA_character_,
appId = NA_character_, screenName = "shop.googlemerchandisestore.com/google+redesign/apparel/mens/mens+outerwear",
landingScreenName = "shop.googlemerchandisestore.com/google+redesign/apparel/mens/mens+outerwear",
exitScreenName = "shop.googlemerchandisestore.com/google+redesign/apparel/mens/mens+outerwear",
screenDepth = "0")), exceptionInfo = list(list(description = NA_character_,
isFatal = TRUE, exceptions = NA_integer_, fatalExceptions = NA_integer_)),
eventInfo = list(structure(list(NULL, NULL, NULL, NULL), .Names = c("",
"", "", ""))), product = list(structure(list(productSKU = c("GGOEGAAX0313",
"GGOEGAAX0358", "GGOEGAAX0568", "GGOEGAAX0592", "GGOEGAAX0593",
"GGOEGAAX0598", "GGOEGAAX0595", "GGOEGAAX0596", "GGOEGAAX0569",
"GGOEGAAX0567", "GGOEGAAX0731"), v2ProductName = c("Google Tri-blend Hoodie Grey",
"Google Men's Zip Hoodie", "Google Men's Watershed Full Zip Hoodie Grey",
"Google Men's Airflow 1/4 Zip Pullover Black", "Google Men's Airflow 1/4 Zip Pullover Lapis",
"Google Men's Convertible Vest-Jacket Pewter", "Google Men's Microfiber 1/4 Zip Pullover Blue/Indigo",
"Google Men's Quilted Insulated Vest Black", "Google Men's Performance Full Zip Jacket Black",
"Google Men's Softshell Jacket Black/Grey", "YouTube Men's Fleece Hoodie Black"
), v2ProductCategory = c("Home/Apparel/Men's/Men's-Outerwear/",
"Home/Apparel/Men's/Men's-Outerwear/", "Home/Apparel/Men's/Men's-Outerwear/",
"Home/Apparel/Men's/Men's-Outerwear/", "Home/Apparel/Men's/Men's-Outerwear/",
"Home/Apparel/Men's/Men's-Outerwear/", "Home/Apparel/Men's/Men's-Outerwear/",
"Home/Apparel/Men's/Men's-Outerwear/", "Home/Apparel/Men's/Men's-Outerwear/",
"Home/Apparel/Men's/Men's-Outerwear/", "Home/Apparel/Men's/Men's-Outerwear/"
), productVariant = c("(not set)", "(not set)", "(not set)",
"(not set)", "(not set)", "(not set)", "(not set)", "(not set)",
"(not set)", "(not set)", "(not set)"), productBrand = c("(not set)",
"(not set)", "(not set)", "(not set)", "(not set)", "(not set)",
"(not set)", "(not set)", "(not set)", "(not set)", "(not set)"
), productRevenue = c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), localProductRevenue = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), productPrice = c(39990000L, 55990000L, 109990000L, 69990000L,
69990000L, 98990000L, 74990000L, 74990000L, 119990000L, 98990000L,
55990000L), localProductPrice = c(39990000L, 55990000L, 109990000L,
69990000L, 69990000L, 98990000L, 74990000L, 74990000L, 119990000L,
98990000L, 55990000L), productQuantity = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), productRefundAmount = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), localProductRefundAmount = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), isImpression = c(TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
), isClick = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), customDimensions = list(structure(list(index = integer(0),
value = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(index = integer(0),
value = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(index = integer(0),
value = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(index = integer(0),
value = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(index = integer(0),
value = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(index = integer(0),
value = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(index = integer(0),
value = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(index = integer(0),
value = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(index = integer(0),
value = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(index = integer(0),
value = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(index = integer(0),
value = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame"))), customMetrics = list(structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame"))), productListName = c("Category",
"Category", "Category", "Category", "Category", "Category",
"Category", "Category", "Category", "Category", "Category"
), productListPosition = 1:11, productCouponCode = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_)), row.names = c(NA, -11L), class = c("tbl_df",
"tbl", "data.frame"))), promotion = list(structure(list(promoId = character(0),
promoName = character(0), promoCreative = character(0),
promoPosition = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame"))), promotionActionInfo = list(structure(list(
NULL, NULL), .Names = c("", ""))), refund = list(structure(list(
NULL, NULL), .Names = c("", ""))), eCommerceAction = list(
list(action_type = "0", step = 1L, option = NA_character_)),
experiment = list(structure(list(experimentId = character(0),
experimentVariant = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame"))), publisher = list(
structure(list(NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL), .Names = c("", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", ""))),
customVariables = list(structure(list(index = integer(0),
customVarName = character(0), customVarValue = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame"))), customDimensions = list(
structure(list(index = integer(0), value = character(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame"))), customMetrics = list(
structure(list(index = integer(0), value = integer(0)), row.names = c(NA,
0L), class = c("tbl_df", "tbl", "data.frame"))), type = "PAGE",
social = list(list(socialInteractionNetwork = NA_character_,
socialInteractionAction = NA_character_, socialInteractions = NA_integer_,
socialInteractionTarget = NA_character_, socialNetwork = "(not set)",
uniqueSocialInteractions = NA_integer_, hasSocialSourceReferral = "No",
socialInteractionNetworkAction = " : ")), latencyTracking = list(
structure(list(NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL), .Names = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "",
""))), sourcePropertyInfo = list(structure(list(NULL,
NULL), .Names = c("", ""))), contentGroup = list(list(
contentGroup1 = "(not set)", contentGroup2 = "Apparel",
contentGroup3 = "(not set)", contentGroup4 = "(not set)",
contentGroup5 = "(not set)", previousContentGroup1 = "(entrance)",
previousContentGroup2 = "(entrance)", previousContentGroup3 = "(entrance)",
previousContentGroup4 = "(entrance)", previousContentGroup5 = "(entrance)",
contentGroupUniqueViews1 = NA_integer_, contentGroupUniqueViews2 = 1L,
contentGroupUniqueViews3 = NA_integer_, contentGroupUniqueViews4 = NA_integer_,
contentGroupUniqueViews5 = NA_integer_)), dataSource = "web",
publisher_infos = list(structure(list(dfpClicks = integer(0),
dfpImpressions = integer(0), dfpMatchedQueries = integer(0),
dfpMeasurableImpressions = integer(0), dfpQueries = integer(0),
dfpRevenueCpm = integer(0), dfpRevenueCpc = integer(0),
dfpViewableImpressions = integer(0), dfpPagesViewed = integer(0),
adsenseBackfillDfpClicks = integer(0), adsenseBackfillDfpImpressions = integer(0),
adsenseBackfillDfpMatchedQueries = integer(0), adsenseBackfillDfpMeasurableImpressions = integer(0),
adsenseBackfillDfpQueries = integer(0), adsenseBackfillDfpRevenueCpm = integer(0),
adsenseBackfillDfpRevenueCpc = integer(0), adsenseBackfillDfpViewableImpressions = integer(0),
adsenseBackfillDfpPagesViewed = integer(0), adxBackfillDfpClicks = integer(0),
adxBackfillDfpImpressions = integer(0), adxBackfillDfpMatchedQueries = integer(0),
adxBackfillDfpMeasurableImpressions = integer(0), adxBackfillDfpQueries = integer(0),
adxBackfillDfpRevenueCpm = integer(0), adxBackfillDfpRevenueCpc = integer(0),
adxBackfillDfpViewableImpressions = integer(0), adxBackfillDfpPagesViewed = integer(0),
adxClicks = integer(0), adxImpressions = integer(0),
adxMatchedQueries = integer(0), adxMeasurableImpressions = integer(0),
adxQueries = integer(0), adxRevenue = integer(0), adxViewableImpressions = integer(0),
adxPagesViewed = integer(0), adsViewed = integer(0),
adsUnitsViewed = integer(0), adsUnitsMatched = integer(0),
viewableAdsViewed = integer(0), measurableAdsViewed = integer(0),
adsPagesViewed = integer(0), adsClicked = integer(0),
adsRevenue = integer(0), dfpAdGroup = character(0), dfpAdUnits = character(0),
dfpNetworkId = character(0)), row.names = c(NA, 0L), class = c("tbl_df",
"tbl", "data.frame")))), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")))), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
Is this doable?
Note that unnest_wider() is not commutative with respect to nesting: applying the command from the inside out will give you a different answer than going outside in. For example,
# Create a nested data frame
df1 <- tibble( gr = c('a', 'b', 'c'), values = list(1:2, 3:4, 5:6) )
df2 <- tibble( dfi = c(1,2), dfs = list(df1, df1) )
# Apply unnest_wider on the outer layer, then inner layer
df2 %>% unnest_wider( dfs ) %>% unnest_wider( values )
# # A tibble: 2 x 5
# dfi gr ...1 ...2 ...3
# <dbl> <list> <list> <list> <list>
# 1 1 <chr [3]> <dbl [2]> <dbl [2]> <dbl [2]>
# 2 2 <chr [3]> <dbl [2]> <dbl [2]> <dbl [2]>
# Apply unner_wider to each inner data frame, then to the entire thing
df2 %>% mutate_at("dfs", map, unnest_wider, "values") %>% unnest_wider(dfs)
# # A tibble: 2 x 4
# dfi gr ...1 ...2
# <dbl> <list> <list> <list>
# 1 1 <chr [3]> <int [3]> <int [3]>
# 2 2 <chr [3]> <int [3]> <int [3]>
Regular unnest() doesn't have this issue:
x1 <- df2 %>% unnest(dfs) %>% unnest(values)
x2 <- df2 %>% mutate_at("dfs", map, unnest, "values") %>% unnest(dfs)
identical( x1, x2 ) # TRUE
So, my suggestion would be to recursively apply standard unnest() until all nested columns are on the same level. You can then follow it up with tidyr::spread() to widen the result as needed.
unnest_all <- function( .df )
{
lc <- purrr::keep(.df, is.list) %>% names
if( length(lc) == 0 ) return(.df)
tidyr::unnest( .df, lc ) %>% unnest_all()
}
# Example
unnest_all(df2)
# # A tibble: 12 x 3
# dfi gr values
# <dbl> <chr> <dbl>
# 1 1 a 1
# 2 1 a 2
# 3 1 b 3
# ...
Your example dataset seems to have some issues with its internal structure (likely due to NA), which results in problems with unnesting, even when doing it by hand:
X <- structure(...) # As in the question
X %>% unnest(hits) %>% unnest(transaction) %>% unnest(transaction)
# Error: No common type for `..1$transaction$transaction` <character> and `..2$transaction$transaction` <integer>.
I have tried to subset my data frame according a condition on specific column. For this purpose I need to create TRUE or FALSE info for each line on this column. But some line on this column has combine words and my code can not detect them.
p <- sapply(strsplit(test$hashtags, split=","), function(x)any(x%in%"evet"))
When you check the sample data you can easily see that line 5,7,8 have specific word but they are showed as a FALSE.
I have tried to add "unlist" command in my code but it haven't worked for me.
p <- sapply(unlist(strsplit(test$hashtags, split=",")), function(x)any(x%in%"evet"))
I need to create one FALSE or TRUE condition according specific word for combined line even though there are more than one words.
Thanks for in advance.
Sample Data:
test <- structure(list(created_at = structure(c(1489636860, 1489636860,
1489636860, 1489636860, 1489636860, 1489636860, 1489636860, 1489636860,
1489636860, 1489636860), class = c("POSIXct", "POSIXt"), tzone = "GMT"),
user.screen_name = c("bilge_bilir", "memetozturk93", "Byomeraslan",
"tmremolar", "orhanyilmaz_77", "tamdere", "EriVatan", "BaySancaktar",
"zeynepmekik", "EriVatan"), entities.hashtags = list(structure(list(
indices = list(c(84L, 90L)), text = "Hayır"), .Names = c("indices",
"text"), class = "data.frame", row.names = 1L), structure(list(
indices = list(c(65L, 70L)), text = "evet"), .Names = c("indices",
"text"), class = "data.frame", row.names = 1L), structure(list(
indices = list(c(98L, 103L)), text = "Evet"), .Names = c("indices",
"text"), class = "data.frame", row.names = 1L), structure(list(
indices = list(c(98L, 104L)), text = "Hayır"), .Names = c("indices",
"text"), class = "data.frame", row.names = 1L), structure(list(
indices = list(c(28L, 33L), c(45L, 50L), c(89L, 94L)),
text = c("EVET", "EVET", "EVET")), .Names = c("indices",
"text"), class = "data.frame", row.names = c(NA, 3L)), structure(list(
indices = list(c(38L, 43L)), text = "EVET"), .Names = c("indices",
"text"), class = "data.frame", row.names = 1L), structure(list(
indices = list(c(20L, 29L), c(36L, 46L), c(89L, 94L)),
text = c("Dirilişe", "Yükselişe", "Evet")), .Names = c("indices",
"text"), class = "data.frame", row.names = c(NA, 3L)), structure(list(
indices = list(c(10L, 15L), c(16L, 20L), c(21L, 26L),
c(27L, 31L)), text = c("Evet", "Eri", "Beli", "Yes"
)), .Names = c("indices", "text"), class = "data.frame", row.names = c(NA,
4L)), structure(list(indices = list(c(125L, 130L)), text = "Evet"), .Names = c("indices",
"text"), class = "data.frame", row.names = 1L), structure(list(
indices = list(c(102L, 107L)), text = "EVET"), .Names = c("indices",
"text"), class = "data.frame", row.names = 1L)), retweeted_status.created_at = c("Thu Mar 16 03:49:15 +0000 2017",
"Wed Mar 15 23:57:44 +0000 2017", "Wed Mar 15 21:07:54 +0000 2017",
"Wed Mar 15 20:54:43 +0000 2017", "Wed Mar 15 14:41:15 +0000 2017",
"Wed Mar 15 23:07:43 +0000 2017", "Wed Mar 15 15:41:06 +0000 2017",
NA, "Wed Mar 15 11:13:15 +0000 2017", "Wed Mar 15 16:37:13 +0000 2017"
), entities.user_mentions = list(structure(list(indices = list(
c(3L, 16L), c(18L, 30L), c(44L, 55L), c(56L, 71L), c(72L,
83L)), screen_name = c("seremgiz8289", "bilge_bilir",
"OduncuTimi", "yalcinvelioglu", "OPTlMlst_Z"), id = c(301944248,
2189106581, 2756465282, 2668851081, 2734161237), id_str = c("301944248",
"2189106581", "2756465282", "2668851081", "2734161237"),
name = c("ATA KIZI HAYIR DİYOR", "Bilge Eryuz", "OduncuTimi ®",
"Yalçın Velioğlu", "OPTlMlst_Z")), .Names = c("indices",
"screen_name", "id", "id_str", "name"), class = "data.frame", row.names = c(NA,
5L)), structure(list(indices = list(c(3L, 16L)), screen_name = "kendimce_ben",
id = 2322523731, id_str = "2322523731", name = "İzzet#EVET/\U0001f1f9\U0001f1f7"), .Names = c("indices",
"screen_name", "id", "id_str", "name"), class = "data.frame", row.names = 1L),
structure(list(indices = list(c(3L, 12L)), screen_name = "omrolcay",
id = 360420809L, id_str = "360420809", name = "Ömer Olcay"), .Names = c("indices",
"screen_name", "id", "id_str", "name"), class = "data.frame", row.names = 1L),
structure(list(indices = list(c(3L, 18L)), screen_name = "mehmet_asassoy",
id = 3151503430, id_str = "3151503430", name = "Mehmet Asassoy"), .Names = c("indices",
"screen_name", "id", "id_str", "name"), class = "data.frame", row.names = 1L),
structure(list(indices = list(c(3L, 17L), c(120L, 132L
)), screen_name = c("sevincbeykent", "yigitbulutt"),
id = c(538364458L, 256065299L), id_str = c("538364458",
"256065299"), name = c("Sevinç", "YİĞİT BULUT"
)), .Names = c("indices", "screen_name", "id", "id_str",
"name"), class = "data.frame", row.names = 1:2), structure(list(
indices = list(c(3L, 13L)), screen_name = "AKsamet54",
id = 313205928L, id_str = "313205928", name = "Samet ÇELİK"), .Names = c("indices",
"screen_name", "id", "id_str", "name"), class = "data.frame", row.names = 1L),
structure(list(indices = list(c(3L, 18L)), screen_name = "HayataTebessum",
id = 2911157237, id_str = "2911157237", name = "Meryem"), .Names = c("indices",
"screen_name", "id", "id_str", "name"), class = "data.frame", row.names = 1L),
structure(list(indices = list(c(0L, 9L)), screen_name = "4qet1dil",
id = 536676261L, id_str = "536676261", name = "KerenGo"), .Names = c("indices",
"screen_name", "id", "id_str", "name"), class = "data.frame", row.names = 1L),
structure(list(indices = list(c(3L, 18L)), screen_name = "akkadinantalya",
id = 1898504755L, id_str = "1898504755", name = "AK Kadın Antalya"), .Names = c("indices",
"screen_name", "id", "id_str", "name"), class = "data.frame", row.names = 1L),
structure(list(indices = list(c(3L, 15L)), screen_name = "menes__2010",
id = 186968367L, id_str = "186968367", name = "#EVET☪ ياسين ☝"), .Names = c("indices",
"screen_name", "id", "id_str", "name"), class = "data.frame", row.names = 1L)),
hashtags = c("hayir", "evet", "evet", "hayir", "c(\"evet\", \"evet\", \"evet\")",
"evet", "c(\"dirilise\", \"yukselise\", \"evet\")", "c(\"evet\", \"eri\", \"beli\", \"yes\")",
"evet", "evet"), mentions = list(c("seremgiz8289", "bilge_bilir",
"OduncuTimi", "yalcinvelioglu", "OPTlMlst_Z"), "kendimce_ben",
"omrolcay", "mehmet_asassoy", c("sevincbeykent", "yigitbulutt"
), "AKsamet54", "HayataTebessum", "4qet1dil", "akkadinantalya",
"menes__2010")), .Names = c("created_at", "user.screen_name",
"entities.hashtags", "retweeted_status.created_at", "entities.user_mentions",
"hashtags", "mentions"), row.names = c(NA, 10L), class = "data.frame")
That is mostly because the way hashtags column was generated. It was stored as a list of character vector and when coerced to character it gave this structure.
See for example,
list(c("A", "B", "C"))
#[[1]]
#[1] "A" "B" "C"
as.character(list(c("A", "B", "C")))
#[1] "c(\"A\", \"B\", \"C\")"
Checking an individual element on your dataframe gives the same structure.
test$hashtags[5]
#[1] "c(\"evet\", \"evet\", \"evet\")"
So if there is no way you could go back and change the way hashtags columns was generated you can use grepl instead and it would save you from strsplit and sapply call as well.
grepl("evet", test$hashtags)
#[1] FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
I would use grepl here:
p <- sapply(strsplit(test$hashtags, split=","), function(x) {
grepl("evet", x)
})
If you really wanted to match the standalone word evet, then use word boundaries:
p <- sapply(strsplit(test$hashtags, split=","), function(x) {
grepl("\bevet\b", x)
})
We can create a logical index column with str_detect
library(tidyverse)
out <- test %>%
mutate(ind = str_detect(hashtags, pattern = "evet"))
out$ind
#[1] FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
If we need to get the logical index for each word
test %>%
mutate(ind = str_extract_all(hashtags, "\\w+") %>%
map(str_detect, pattern = "evet"))
I have a data frame with only one row but 95 columns. The value for each column is either a list or a list of lists. Most of the lists have single valued integers. I want the data frame to contain the integer values instead of list elements. In case of lists with multiple values, I want new columns to be assigned and suitably named.
How can I go about this? I've added the dput object for reference.
structure(list(post_stories = structure(list(values = structure(list(
value = 123L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_storytellers = structure(list(values = structure(list(
value = 122L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_stories_by_action_type = structure(list(values = structure(list(
value = structure(list(share = 2L, like = 121L), .Names = c("share",
"like"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_storytellers_by_action_type = structure(list(values = structure(list(
value = structure(list(share = 2L, like = 121L), .Names = c("share",
"like"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_story_adds = structure(list(values = structure(list(
value = 123L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_story_adds_unique = structure(list(values = structure(list(
value = 122L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_story_adds_by_action_type = structure(list(values = structure(list(
value = structure(list(like = 121L, share = 2L), .Names = c("like",
"share"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_story_adds_by_action_type_unique = structure(list(values = structure(list(
value = structure(list(like = 122L, share = 2L), .Names = c("like",
"share"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_complete_views_30s = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_complete_views_30s_autoplayed = structure(list(
values = structure(list(value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_complete_views_30s_clicked_to_play = structure(list(
values = structure(list(value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_complete_views_30s_organic = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_complete_views_30s_paid = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_complete_views_30s_unique = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_interests_impressions = structure(list(values = structure(list(
value = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_interests_impressions_unique = structure(list(values = structure(list(
value = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_interests_consumptions_unique = structure(list(values = structure(list(
value = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_interests_consumptions = structure(list(values = structure(list(
value = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_interests_consumptions_by_type_unique = structure(list(
values = structure(list(value = structure(list(`video play` = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame"), `other clicks` = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame"), `photo view` = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame"), `link clicks` = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = c("video play",
"other clicks", "photo view", "link clicks"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_interests_consumptions_by_type = structure(list(values = structure(list(
value = structure(list(`video play` = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame"), `other clicks` = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame"), `photo view` = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame"), `link clicks` = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = c("video play",
"other clicks", "photo view", "link clicks"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_interests_action_by_type_unique = structure(list(values = structure(list(
value = structure(list(like = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame"), comment = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame"), share = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = c("like", "comment",
"share"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_interests_action_by_type = structure(list(values = structure(list(
value = structure(list(like = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame"), comment = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame"), share = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = c("like", "comment",
"share"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions = structure(list(values = structure(list(
value = 4248L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_unique = structure(list(values = structure(list(
value = 3642L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_paid = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_paid_unique = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_fan = structure(list(values = structure(list(
value = 4108L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_fan_unique = structure(list(values = structure(list(
value = 3527L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_fan_paid = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_fan_paid_unique = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_organic = structure(list(values = structure(list(
value = 4171L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_organic_unique = structure(list(values = structure(list(
value = 3578L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_viral = structure(list(values = structure(list(
value = 77L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_viral_unique = structure(list(values = structure(list(
value = 74L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_by_story_type = structure(list(values = structure(list(
value = structure(list(other = 77L), .Names = "other", class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_by_story_type_unique = structure(list(values = structure(list(
value = structure(list(other = 74L), .Names = "other", class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_by_paid_non_paid = structure(list(values = structure(list(
value = structure(list(total = 4248L, unpaid = 4248L,
paid = 0L), .Names = c("total", "unpaid", "paid"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_impressions_by_paid_non_paid_unique = structure(list(
values = structure(list(value = structure(list(total = 3642L,
unpaid = 3642L, paid = 0L), .Names = c("total", "unpaid",
"paid"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_consumptions = structure(list(values = structure(list(
value = 355L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_consumptions_unique = structure(list(values = structure(list(
value = 75L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_consumptions_by_type = structure(list(values = structure(list(
value = structure(list(`other clicks` = 283L, `photo view` = 72L), .Names = c("other clicks",
"photo view"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_consumptions_by_type_unique = structure(list(values = structure(list(
value = structure(list(`other clicks` = 29L, `photo view` = 69L), .Names = c("other clicks",
"photo view"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_engaged_users = structure(list(values = structure(list(
value = 186L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_negative_feedback = structure(list(values = structure(list(
value = 1L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_negative_feedback_unique = structure(list(values = structure(list(
value = 1L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_negative_feedback_by_type = structure(list(values = structure(list(
value = structure(list(hide_clicks = 1L), .Names = "hide_clicks", class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_negative_feedback_by_type_unique = structure(list(values = structure(list(
value = structure(list(hide_clicks = 1L), .Names = "hide_clicks", class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_engaged_fan = structure(list(values = structure(list(
value = 179L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_fan_reach = structure(list(values = structure(list(value = 3527L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_reactions_anger_total = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_reactions_by_type_total = structure(list(values = structure(list(
value = structure(list(like = 109L, love = 0L, wow = 3L,
haha = 9L, sorry = 0L, anger = 0L), .Names = c("like",
"love", "wow", "haha", "sorry", "anger"), class = "data.frame", row.names = 1L)), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_reactions_haha_total = structure(list(values = structure(list(
value = 9L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_reactions_like_total = structure(list(values = structure(list(
value = 109L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_reactions_love_total = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_reactions_sorry_total = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_reactions_wow_total = structure(list(values = structure(list(
value = 3L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_avg_time_watched = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_complete_views_organic = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_complete_views_organic_unique = structure(list(
values = structure(list(value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_complete_views_paid = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_complete_views_paid_unique = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_retention_graph = structure(list(values = structure(list(
value = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_retention_graph_clicked_to_play = structure(list(
values = structure(list(value = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_retention_graph_autoplayed = structure(list(values = structure(list(
value = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_organic = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_organic_unique = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_paid = structure(list(values = structure(list(
value = c(0L, 0L, 0L), end_time = c("2017-03-10T08:00:00+0000",
"2017-03-11T08:00:00+0000", "2017-03-12T08:00:00+0000"
)), .Names = c("value", "end_time"), class = "data.frame", row.names = c(NA,
3L))), .Names = "values"), post_video_views_paid = structure(list(
values = structure(list(value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_paid_unique = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_length = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views = structure(list(values = structure(list(
value = c(0L, 0L, 0L), end_time = c("2017-03-10T08:00:00+0000",
"2017-03-11T08:00:00+0000", "2017-03-12T08:00:00+0000"
)), .Names = c("value", "end_time"), class = "data.frame", row.names = c(NA,
3L))), .Names = "values"), post_video_views = structure(list(
values = structure(list(value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_unique = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_autoplayed = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_clicked_to_play = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_10s = structure(list(values = structure(list(
value = c(0L, 0L, 0L), end_time = c("2017-03-10T08:00:00+0000",
"2017-03-11T08:00:00+0000", "2017-03-12T08:00:00+0000"
)), .Names = c("value", "end_time"), class = "data.frame", row.names = c(NA,
3L))), .Names = "values"), post_video_views_10s = structure(list(
values = structure(list(value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_10s_unique = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_10s_autoplayed = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_10s_clicked_to_play = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_10s_organic = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_10s_paid = structure(list(values = structure(list(
value = c(0L, 0L, 0L), end_time = c("2017-03-10T08:00:00+0000",
"2017-03-11T08:00:00+0000", "2017-03-12T08:00:00+0000"
)), .Names = c("value", "end_time"), class = "data.frame", row.names = c(NA,
3L))), .Names = "values"), post_video_views_10s_paid = structure(list(
values = structure(list(value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_10s_sound_on = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_sound_on = structure(list(values = structure(list(
value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_view_time = structure(list(values = structure(list(
value = c(0L, 0L, 0L), end_time = c("2017-03-10T08:00:00+0000",
"2017-03-11T08:00:00+0000", "2017-03-12T08:00:00+0000"
)), .Names = c("value", "end_time"), class = "data.frame", row.names = c(NA,
3L))), .Names = "values"), post_video_view_time = structure(list(
values = structure(list(value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_view_time_organic = structure(list(values = structure(list(
value = c(0L, 0L, 0L), end_time = c("2017-03-10T08:00:00+0000",
"2017-03-11T08:00:00+0000", "2017-03-12T08:00:00+0000"
)), .Names = c("value", "end_time"), class = "data.frame", row.names = c(NA,
3L))), .Names = "values"), post_video_view_time_organic = structure(list(
values = structure(list(value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_view_time_paid = structure(list(values = structure(list(
value = c(0L, 0L, 0L), end_time = c("2017-03-10T08:00:00+0000",
"2017-03-11T08:00:00+0000", "2017-03-12T08:00:00+0000"
)), .Names = c("value", "end_time"), class = "data.frame", row.names = c(NA,
3L))), .Names = "values"), post_video_view_time_paid = structure(list(
values = structure(list(value = 0L), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_view_time_by_age_bucket_and_gender = structure(list(
values = structure(list(value = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_view_time_by_region_id = structure(list(values = structure(list(
value = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_views_by_distribution_type = structure(list(values = structure(list(
value = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values"),
post_video_view_time_by_distribution_type = structure(list(
values = structure(list(value = structure(list(), .Names = character(0), row.names = c(NA,
-1L), class = "data.frame")), .Names = "value", class = "data.frame", row.names = 1L)), .Names = "values")), .Names = c("post_stories",
"post_storytellers", "post_stories_by_action_type", "post_storytellers_by_action_type",
"post_story_adds", "post_story_adds_unique", "post_story_adds_by_action_type",
"post_story_adds_by_action_type_unique", "post_video_complete_views_30s",
"post_video_complete_views_30s_autoplayed", "post_video_complete_views_30s_clicked_to_play",
"post_video_complete_views_30s_organic", "post_video_complete_views_30s_paid",
"post_video_complete_views_30s_unique", "post_interests_impressions",
"post_interests_impressions_unique", "post_interests_consumptions_unique",
"post_interests_consumptions", "post_interests_consumptions_by_type_unique",
"post_interests_consumptions_by_type", "post_interests_action_by_type_unique",
"post_interests_action_by_type", "post_impressions", "post_impressions_unique",
"post_impressions_paid", "post_impressions_paid_unique", "post_impressions_fan",
"post_impressions_fan_unique", "post_impressions_fan_paid", "post_impressions_fan_paid_unique",
"post_impressions_organic", "post_impressions_organic_unique",
"post_impressions_viral", "post_impressions_viral_unique", "post_impressions_by_story_type",
"post_impressions_by_story_type_unique", "post_impressions_by_paid_non_paid",
"post_impressions_by_paid_non_paid_unique", "post_consumptions",
"post_consumptions_unique", "post_consumptions_by_type", "post_consumptions_by_type_unique",
"post_engaged_users", "post_negative_feedback", "post_negative_feedback_unique",
"post_negative_feedback_by_type", "post_negative_feedback_by_type_unique",
"post_engaged_fan", "post_fan_reach", "post_reactions_anger_total",
"post_reactions_by_type_total", "post_reactions_haha_total",
"post_reactions_like_total", "post_reactions_love_total", "post_reactions_sorry_total",
"post_reactions_wow_total", "post_video_avg_time_watched", "post_video_complete_views_organic",
"post_video_complete_views_organic_unique", "post_video_complete_views_paid",
"post_video_complete_views_paid_unique", "post_video_retention_graph",
"post_video_retention_graph_clicked_to_play", "post_video_retention_graph_autoplayed",
"post_video_views_organic", "post_video_views_organic_unique",
"post_video_views_paid", "post_video_views_paid", "post_video_views_paid_unique",
"post_video_length", "post_video_views", "post_video_views",
"post_video_views_unique", "post_video_views_autoplayed", "post_video_views_clicked_to_play",
"post_video_views_10s", "post_video_views_10s", "post_video_views_10s_unique",
"post_video_views_10s_autoplayed", "post_video_views_10s_clicked_to_play",
"post_video_views_10s_organic", "post_video_views_10s_paid",
"post_video_views_10s_paid", "post_video_views_10s_sound_on",
"post_video_views_sound_on", "post_video_view_time", "post_video_view_time",
"post_video_view_time_organic", "post_video_view_time_organic",
"post_video_view_time_paid", "post_video_view_time_paid", "post_video_view_time_by_age_bucket_and_gender",
"post_video_view_time_by_region_id", "post_video_views_by_distribution_type",
"post_video_view_time_by_distribution_type"), row.names = "values", class = "data.frame")
I have a dataset that looks like this
> dput(events.seq)
structure(list(vid = structure(1L, .Label = "2a38ebc2-dd97-43c8-9726-59c247854df5", class = "factor"),
deltas = structure(1L, .Label = "38479,38488,38492,38775,45595,45602,45606,45987,50280,50285,50288,50646,54995,55001,55005,55317,59528,59533,59537,59921,63392,63403,63408,63822,66706,66710,66716,67002,73750,73755,73759,74158,77999,78003,78006,78076,81360,81367,81371,82381,93365,93370,93374,93872,154875,154878,154880,154880,155866,155870", class = "factor"),
events = structure(1L, .Label = "mousemove,mousedown,mouseup,click,mousemove,mousedown,mouseup,click,mousemove,mousedown,mouseup,click,mousemove,mousedown,mouseup,click,mousemove,mousedown,mouseup,click,mousemove,mousedown,mouseup,click,mousemove,mousedown,mouseup,click,mousemove,mousedown,mouseup,click,mousemove,mousedown,mouseup,click,mousemove,mousedown,mouseup,click,mousemove,mousedown,mouseup,click,mousemove,mousedown,mouseup,click,mousemove,mousedown", class = "factor")), .Names = c("vid",
"deltas", "events"), class = "data.frame", row.names = c(NA,
-1L))
I need to normalize it to this structure:
> dput(test)
structure(list(vid = structure(c(1L, 1L, 1L), .Label = "2a38ebc2-dd97-43c8-9726-59c247854df5\n+ ", class = "factor"),
delta = c(38479, 38488, 38492), c..mousemove....mousedown....mousup.. = structure(c(2L,
1L, 3L), .Label = c("mousedown", "mousemove", "mousup"), class = "factor")), .Names = c("vid",
"delta", "c..mousemove....mousedown....mousup.."), row.names = c(NA,
-3L), class = "data.frame")
Any help appreciated.
I did try to use strplit, the problem us that I want to split twice at the same time on second and third columns (which are always sync in their length)
Try this:
z <- with(x, data.frame(
deltas = strsplit(as.character(deltas), split = ",")[[1]],
events = strsplit(as.character(events), ",")[[1]]
))
head(z)
The result:
deltas events
1 38479 mousemove
2 38488 mousedown
3 38492 mouseup
4 38775 click
5 45595 mousemove
6 45602 mousedown