I have a "main_df" along the lines of this:
structure(list(study_id = c("02ipnnqgeovkrxz", "02ipnnqgeovkrxz",
"02ipnnqgeovkrxz", "02ipnnqgeovkrxz", "02ipnnqgeovkrxz", "02ipnnqgeovkrxz"
), question = c("3eEVJgaAP6c9FPL", "b8GLxGjZKtstCQZ", "40iyFKjeMEFGI2V",
"6eZGejSZ1oTZYLb", "3pXAUvZH8GGuryd", "0kYkUAHe4iODUl7"), study_rt = c("1.353",
"0.714", "0.68", "0.695", "0.696", "0.656"), study_response = c("picture",
"picture", "picture", "picture", "picture", "picture")), row.names = c(NA,
-6L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), reshapeWide = list(
v.names = NULL, timevar = "index", idvar = c("study_id",
"question"), times = c("rt", "response"), varying = structure(c("response.rt",
"response.response"), .Dim = 1:2)), groups = structure(list(
study_id = "02ipnnqgeovkrxz", .rows = list(1:6)), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
and a reference df along the lines of this:
structure(list(stim = c("ashtray_word", "bell_word", "blouse_word",
"boot_word", "bottle_word", "bread_word"), url = c("eW1BRoUDV4BKQMl",
"5zKTGwHlwlzpssB", "55SVfoQudZJNCFT", "bOORR1zuKYSnAe9", "6RrOQfDZim81pHv",
"1F97ouH0HrwQOgZ"), study_list = c("A", "A", "A", "A", "A", "A"
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"))
Each value in the 'question' column of the main df can be found in the 'url' column of the reference df. I want to match these values, and add all columns from that row of the reference df to my main df. The output will look like this:
structure(list(study_id = c("02ipnnqgeovkrxz", "02ipnnqgeovkrxz",
"02ipnnqgeovkrxz", "02ipnnqgeovkrxz", "02ipnnqgeovkrxz", "02ipnnqgeovkrxz"
), question = c("3eEVJgaAP6c9FPL", "b8GLxGjZKtstCQZ", "40iyFKjeMEFGI2V",
"6eZGejSZ1oTZYLb", "3pXAUvZH8GGuryd", "0kYkUAHe4iODUl7"), study_rt = c("1.353",
"0.714", "0.68", "0.695", "0.696", "0.656"), study_response = c("picture",
"picture", "picture", "picture", "picture", "picture"), stim = c("chisel_picture",
"raccoon_picture", "apple_picture", "belt_picture", "bicycle_picture",
"cake_picture"), url = c("3eEVJgaAP6c9FPL", "b8GLxGjZKtstCQZ",
"40iyFKjeMEFGI2V", "6eZGejSZ1oTZYLb", "3pXAUvZH8GGuryd", "0kYkUAHe4iODUl7"
), study_list = c("B FILLER", "B FILLER", "B", "B", "B", "B")), row.names = c(NA,
-6L), groups = structure(list(study_id = "02ipnnqgeovkrxz", .rows = list(
1:6)), row.names = c(NA, -1L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"))
This will allow me to see the 'sensible' item names (e.g. "chisel_picture") that subjects were responding to, as opposed to the nonsensical code names I have now (e.g. "3eEVJgaAP6c9FPL"). The same items appear over and over again in the 'question' column (as different subjects saw the same items), and I need to preserve these repeats.
I have successfully managed this using a for loop...but it's super slow! A tidyverse solution would be amazing!
My awful for loop (study_data = main df / image_urls = reference df):
all_study_stim_items <- study_data$question # List all values in 'question' column.
matched_items <- tibble() # Create empty tibble to store results of for loop.
for (i in all_study_stim_items) {
temp <- image_urls %>%
filter(url == i) %>%
select(stim, url, study_list)
matched_items <- bind_rows(matched_items, temp) } # Continuously overwrite tibble with each match.
# I then join this with the main df.
i have data frame called df,how to create new column from existing list column data frame.
my data frame.
Policy Item
Checked list(Processed = "Valid", Gmail = "yy#gmail", Information = list(list(Descrption = "T1, R1", VID = "YUY")))
Sample list(Processed = "Valid", Gmail = "tt#gmail", Information = list(list(Descrption = "D3, Y3", VID = "RT")))
Processed list(Processed = "Valid", Gmail = "pp#gmail", Information = list(list(Descrption = "Y2, LE", VID = "UIU")))
my expected data frame.
Policy Processed Gmail Descrption VID
Checked Valid yy#gmail "T1,R1" "YUY"
Sample Valid tt#gmail "D3,Y3" "RT"
Processed Valid pp#gmail "Y2,LE" "UIU"
i'm using below code to get my expected dataframe .
na_if_null <- function(x) if (is.null(x)) NA else x
new_cols <- lapply(
Filter(is.list, df),
function(list_col) {
names_ <- setNames(nm = unique(do.call(c, lapply(list_col, names))))
lapply(names_, function(name) sapply(list_col, function(x)
trimws(na_if_null(as.list(x)[[name]]))))
}
)
res <- do.call(
data.frame,
c(
list(df, check.names = FALSE, stringsAsFactors = FALSE),
do.call(c, new_cols)
)
)
But i'm getting below Data frame.please help me to done my post.
Policy Item Item.Processed Item.Gmail Item.Information
Checked list(Processed = "Valid", Gmail = "yy#gmail", Information = list(list(Descrption = "T1, R1", VID = "YUY"))) Processed yy#gmail list(Descrption = "T1, R1", VID = "YUY")
Sample list(Processed = "Valid", Gmail = "tt#gmail", Information = list(list(Descrption = "D3, Y3", VID = "RT"))) Processed tt#gmail list(Descrption = "D3, Y3", VID = "RT")
Processed list(Processed = "Valid", Gmail = "pp#gmail", Information = list(list(Descrption = "Y2, LE", VID = "UIU"))) Processed pp#gmail list(Descrption = "Y2, LE", VID = "UIU")
dput
structure(list(Policy = c("Checked", "Sample", "Processed"), Item = list(
structure(list(Processed = "Valid", Gmail = "yy#gmail", Information = list(
structure(list(Descrption = "T1, R1", VID = "YUY"), .Names = c("Descrption",
"VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed",
"Gmail", "Information"), class = "data.frame", row.names = 1L),
structure(list(Processed = "Valid", Gmail = "tt#gmail", Information = list(
structure(list(Descrption = "D3, Y3", VID = "RT"), .Names = c("Descrption",
"VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed",
"Gmail", "Information"), class = "data.frame", row.names = 1L),
structure(list(Processed = "Valid", Gmail = "pp#gmail", Information = list(
structure(list(Descrption = "Y2, LE", VID = "UIU"), .Names = c("Descrption",
"VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed",
"Gmail", "Information"), class = "data.frame", row.names = 1L))), row.names = c(NA,
3L), class = "data.frame", .Names = c("Policy", "Item"))
Sample data frame
Policy colval Item
Checked list(PID="4",Bdetail ="ui,89") list(Processed = "Valid", Gmail = "yy#gmail", Information = list(list(Descrption = "T1, R1", VID = "YUY")))
Sample list(PID="7",Bdetail ="ju,78") list(Processed = "Valid", Gmail = "tt#gmail", Information = list(list(Descrption = "D3, Y3", VID = "RT")))
Processed list(PID ="8",Bdetail ="nj,45") list(Processed = "Valid", Gmail = "pp#gmail", Information = list(list(Descrption = "Y2, LE", VID = "UIU")))
Here a solution in base R:
dd <-
cbind(
dx$Policy,
do.call(rbind,
lapply(seq_len(nrow(dx)), function(i)unlist(dx$Item[i]))
)
)
colnames(dd) <- c("Policy","Processed","Gmail","Descrption","VID")
dd
# Policy Processed Gmail Descrption VID
# [1,] "Checked" "Valid" "yy#gmail" "T1, R1" "YUY"
# [2,] "Sample" "Valid" "tt#gmail" "D3, Y3" "RT"
# [3,] "Processed" "Valid" "pp#gmail" "Y2, LE" "UIU"
Basically I am using unlist for each item. and Then joining them using the classic d.call(rbind,llist).
edit
in case you want tu use the same names as the original sub lists you can do something like :
colnames(dd) <- c("Policy",gsub(".*[.]","",colnames(dd)[-1]))
data.table solution
library(data.table)
setDT(dx)
dx[, rbindlist(lapply(.SD,function(x)data.table(t(unlist(x))))),Policy]
Easily done with unnest from tidyr:
library(dplyr)
library(tidyr)
df %>%
unnest() %>%
unnest()
Result:
Policy Processed Gmail Descrption VID
1 Checked Valid yy#gmail T1, R1 YUY
2 Sample Valid tt#gmail D3, Y3 RT
3 Processed Valid pp#gmail Y2, LE UIU
Data:
df = structure(list(Policy = c("Checked", "Sample", "Processed"), Item = list(
structure(list(Processed = "Valid", Gmail = "yy#gmail", Information = list(
structure(list(Descrption = "T1, R1", VID = "YUY"), .Names = c("Descrption",
"VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed",
"Gmail", "Information"), class = "data.frame", row.names = 1L),
structure(list(Processed = "Valid", Gmail = "tt#gmail", Information = list(
structure(list(Descrption = "D3, Y3", VID = "RT"), .Names = c("Descrption",
"VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed",
"Gmail", "Information"), class = "data.frame", row.names = 1L),
structure(list(Processed = "Valid", Gmail = "pp#gmail", Information = list(
structure(list(Descrption = "Y2, LE", VID = "UIU"), .Names = c("Descrption",
"VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed",
"Gmail", "Information"), class = "data.frame", row.names = 1L))), row.names = c(NA,
3L), class = "data.frame", .Names = c("Policy", "Item"))
Note:
Notice I used two passes of unnest because there are two levels of lists in your original dataframe. unnest automatically flattens all lists in the dataframe and reuses the names, but it does not do it recursively, so you will have to have as many unnest as there are list levels.
Below is a subsample of my data set (only 2 rows by 215 columns). I am trying to view them on RStudio but it gives me the following error:
Error in View : undefined columns selected
Do not really know what is going on. The whole set is 7786 rows by 215 columns. Viewing it works fine, however, when doing any kind of subsetting or removing one row it is no longer want to view.
structure(list(`NA` = structure(c(16343, 16344), class = "Date"),
AVON = c("615.5", "621.5"), BA. = c("471.5", "463.2"), CMRG = c("224.5",
"224.5"), COB = c("291.10000000000002", "283.5"), MGGT = c("451.2",
"444.7"), QQ. = c("224.5", "223.5"), RR. = c("953.65", "933.38"
), SNR = c("268.2", "264.7"), ULE = c("1771", "1746"), GKN = c("319.2",
"311.5"), BRAG = c("617", "603"), BVIC = c("668", "661"),
CCH = c("1333", "1327"), DGE = c("1785", "1760.5"), SAB = c("3428",
"3383"), STCK = c("291.60000000000002", "294"), ALNT = c("328",
"321"), CAR = c("125", "124.5"), CRDA = c("2053", "1990"),
ELM = c("255.5", "254.5"), JMAT = c("2919", "2825"), SYNT = c("212",
"210.8"), VCTA = c("1606", "1605"), DIA = c("901", "924"),
DNO = c("611", "611"), E2V = c("161", "160.5"), HLMA = c("612",
"598.5"), HTY = c("309.8", "308"), MGAM = c("296.8", "289.40000000000003"
), OXFD = c("1020", "1035"), RSHW = c("1630", "1625"), SXS = c("1808",
"1778"), TTG = c("166.75", "167.5"), XAR = c("376", "367"
), X = c("1527", "1520"), ABF = c("2679", "2654"), AE = c("633.5",
"640"), CARM = c("1647", "1637"), CWK = c("1328", "1320"),
DCG = c("383.75", "369"), DVO = c("237.75", "231"), GNCL = c("234",
"229.6"), HFG = c("416", "411"), FD = c("36.5", "34.75"),
TATE = c("591.5", "585"), MNDI = c("1011", "1012"), BI = c("616",
"620"), REX = c("491.8", "483.5"), RC = c("559", "540"),
SMDS = c("266.3", "257"), SMIN = c("1264", "1250"), VSVS = c("451.8",
"438.40000000000003"), AGA = c("163.25", "160.25"), BDEV = c("396.1",
"389.3"), BKG = c("2250", "2224"), BLWY = c("1567", "1558"
), BVS = c("779", "771"), CRST = c("325", "314.60000000000002"
), GLSN = c("393.5", "388.5"), MCB = c("83.53", "83.29"),
SN = c("1334", "1309"), RB. = c("5350", "5305"), RDW = c("280.7",
"273.8"), TW. = c("112.8", "111.8"), BODY = c("668.5", "647"
), FENR = c("317.60000000000002", "313.10000000000002"),
GDWN = c("3500", "3500"), HILS = c("561", "561.5"), IMI = c("1230",
"1206"), MRO = c("247.70000000000002", "246"), VAR = c("304",
"300.75"), RNO = c("56", "54.5"), RTRK = c("2765", "2736"
), SFR = c("63.5", "64"), SRX = c("2826", "2812"), TRI = c("105.75",
"105"), VTC = c("613.5", "612"), WEIR = c("2502", "2430"),
EVR = c("130", "123.60000000000001"), FXO = c("112.3", "105.10000000000001"
), BBA = c("325", "326"), BMS = c("494.38", "492"), CKN = c("2350",
"2341"), FSHR = c("1326", "1294"), RMG = c("392.2", "399.7"
), STOB = c("111", "109"), UKM = c("473.88", "467"), WIN = c("136.25",
"137.5"), GAW = c("597.5", "585"), HTM = c("131.5", "129.25"
), `NA` = c(NA_character_, NA_character_), AAL = c("1384",
"1363.5"), ABG = c("218.8", "209.1"), ANTO = c("721", "702"
), AF = c("131.5", "130.25"), AQ = c("18.5", "18.75"), ARMS = c("69",
"62.25"), BLT = c("1715", "1690.5"), CEY = c("61.15", "61"
), FRES = c("760", "747"), GEMD = c("192", "191.75"), GLEN = c("343.2",
"336.45"), HOC = c("135.30000000000001", "130.19999999999999"
), KAZ = c("263.39999999999998", "260.10000000000002"), KMRL = c("9.5",
"9.3000000000000007"), LMI = c("185.8", "176.8"), NWR = c("1.97",
"1.82"), `NA` = c(NA_character_, NA_character_), DL = c("190.20000000000002",
"190"), OG = c("22", "24"), OLY = c("516", "496.6"), RIO = c("3031.5",
"3020"), RRS = c("4209", "4154"), VED = c("998.5", "974.5"
), AFR = c("103.5", "109.4"), BG. = c("1140", "1093"), B. = c("453.45",
"452.75"), CNE = c("176.5", "171.6"), ENQ = c("109.60000000000001",
"107.8"), EXI = c("157", "150"), HDY = c("102", "99.75"),
JKX = c("48.25", "47"), OHR = c("229.3", "220.9"), MO = c("333",
"324.7"), RDSA = c("2358.5", "2331"), RDSB = c("2437", "2418.5"
), SIA = c("381", "377.90000000000003"), SMDR = c("100",
"98.5"), TLW = c("644.5", "631"), AMEC = c("1104", "1077"
), CIU = c("283.5", "275.75"), GMS = c("157", "157"), HTG = c("892.5",
"876"), LAM = c("163.25", "160"), FC = c("1037", "1011"),
WG. = c("759.5", "743"), BRBY = c("1511", "1476"), ZC = c("365.7",
"366"), SG = c("1133", "1126"), TED = c("1863", "1862"),
ULVR = c("2585", "2547"), AZN = c("4441.5", "4360.5"), BTG = c("700",
"697.5"), CIR = c("304", "300"), DH = c("758", "753"), GNS = c("1130",
"1130"), GSK = c("1413", "1414"), HIK = c("1733", "1715"),
SH = c("5340", "5310"), SK = c("329.25", "319"), VEC = c("132",
"132"), AGK = c("1548", "1528"), AHT = c("1043", "1024"),
ATK = c("1317", "1323"), BAB = c("1092", "1085"), BNZL = c("1610",
"1597"), BRAM = c("376", "374"), BRSN = c("980", "979"),
CLLN = c("304.60000000000002", "304.3"), CMS = c("59.75",
"59.5"), CNCT = c("149.25", "151"), CI = c("1164", "1165"
), CTR = c("259.5", "255"), DCC = c("3422", "3405"), DLAR = c("477",
"478"), DLM = c("689.5", "685"), ECOM = c("223", "219.8"),
ESNT = c("797.5", "792.5"), EXO = c("176.5", "180"), EXN = c("983.5",
"968"), GFS = c("250.70000000000002", "251.6"), GFTU = c("626",
"616"), HAS = c("116.3", "115.7"), HRG = c("45.75", "45.75"
), HSV = c("319.7", "319"), HWDN = c("339.1", "335"), HYC = c("749",
"748"), IRV = c("599.5", "592.5"), ITRK = c("2621", "2631"
), LVD = c("201.75", "201.5"), MER = c("435", "436.75"),
MMC = c("25.25", "25"), MNZS = c("569", "575.5"), MI = c("418.6",
"421"), MTO = c("287.90000000000003", "286.60000000000002"
), NTG = c("483.8", "481.3"), AY = c("983.5", "989"), FL = c("182",
"180.1"), RCDO = c("671", "667.5"), RENT = c("117.8", "116"
), RGU = c("169.70000000000002", "169.9"), RS = c("261",
"251.6"), RWA = c("302.5", "302.5"), SDY = c("70.5", "69.75"
), SERC = c("286.10000000000002", "279.8"), SHI = c("166.6",
"161.1"), SIV = c("199.75", "200"), SKS = c("90", "92"),
STHR = c("350.25", "358.5"), TK = c("1664", "1635"), TRB = c("170.5",
"172"), V. = c("609.5", "600"), WOS = c("3242", "3243"),
XCH = c("188", "184.75"), ARM = c("906", "887.5"), BVC = c("16.38",
"16.25"), CSR = c("758", "756"), IMG = c("188.5", "184.75"
), LRD = c("309.7", "306.7"), IC = c("298.10000000000002",
"299"), SEU = c("141", "141"), ST = c("104.60000000000001",
"99.9"), BATS = c("3482", "3480"), IMT = c("2664", "2679"
)), .Names = c("NA", "AVON", "BA.", "CMRG", "COB", "MGGT",
"QQ.", "RR.", "SNR", "ULE", "GKN", "BRAG", "BVIC", "CCH", "DGE",
"SAB", "STCK", "ALNT", "CAR", "CRDA", "ELM", "JMAT", "SYNT",
"VCTA", "DIA", "DNO", "E2V", "HLMA", "HTY", "MGAM", "OXFD", "RSHW",
"SXS", "TTG", "XAR", "X", "ABF", "AE", "CARM", "CWK", "DCG",
"DVO", "GNCL", "HFG", "FD", "TATE", "MNDI", "BI", "REX", "RC",
"SMDS", "SMIN", "VSVS", "AGA", "BDEV", "BKG", "BLWY", "BVS",
"CRST", "GLSN", "MCB", "SN", "RB.", "RDW", "TW.", "BODY", "FENR",
"GDWN", "HILS", "IMI", "MRO", "VAR", "RNO", "RTRK", "SFR", "SRX",
"TRI", "VTC", "WEIR", "EVR", "FXO", "BBA", "BMS", "CKN", "FSHR",
"RMG", "STOB", "UKM", "WIN", "GAW", "HTM", NA, "AAL", "ABG",
"ANTO", "AF", "AQ", "ARMS", "BLT", "CEY", "FRES", "GEMD", "GLEN",
"HOC", "KAZ", "KMRL", "LMI", "NWR", NA, "DL", "OG", "OLY", "RIO",
"RRS", "VED", "AFR", "BG.", "B.", "CNE", "ENQ", "EXI", "HDY",
"JKX", "OHR", "MO", "RDSA", "RDSB", "SIA", "SMDR", "TLW", "AMEC",
"CIU", "GMS", "HTG", "LAM", "FC", "WG.", "BRBY", "ZC", "SG",
"TED", "ULVR", "AZN", "BTG", "CIR", "DH", "GNS", "GSK", "HIK",
"SH", "SK", "VEC", "AGK", "AHT", "ATK", "BAB", "BNZL", "BRAM",
"BRSN", "CLLN", "CMS", "CNCT", "CI", "CTR", "DCC", "DLAR", "DLM",
"ECOM", "ESNT", "EXO", "EXN", "GFS", "GFTU", "HAS", "HRG", "HSV",
"HWDN", "HYC", "IRV", "ITRK", "LVD", "MER", "MMC", "MNZS", "MI",
"MTO", "NTG", "AY", "FL", "RCDO", "RENT", "RGU", "RS", "RWA",
"SDY", "SERC", "SHI", "SIV", "SKS", "STHR", "TK", "TRB", "V.",
"WOS", "XCH", "ARM", "BVC", "CSR", "IMG", "LRD", "IC", "SEU",
"ST", "BATS", "IMT"), row.names = 7785:7786, class = "data.frame")
I am on Mac OS 10.10, R 3.1.1 and RStudio 0.98.1060.
One of your column names is NA. If d is your data defined above, then try names(d)[92]. Try replacing with a non-missing column name.
As allready mentioned by DMC, but with a short version of your example code.
a <- structure(list(`NA` = structure(c(16343, 16344), class = "Date"),
AVON = c("615.5", "621.5"),
BA. = c("471.5", "463.2"),
`NA` = c(NA_character_, NA_character_), AAL = c("1384", "1363.5")),
.Names = c(NA, "AVON", "BA.", "NA", "AAL"), row.names = 7785:7786, class = "data.frame")
View(a)
Error in View : undefined columns selected
names(a)
[1] NA "AVON" "BA." "NA" "AAL"
a <- structure(list(`NA` = structure(c(16343, 16344), class = "Date"),
AVON = c("615.5", "621.5"),
BA. = c("471.5", "463.2"),
`NA` = c(NA_character_, NA_character_), AAL = c("1384", "1363.5")),
.Names = c("NA", "AVON", "BA.", "NA", "AAL"), row.names = 7785:7786, class = "data.frame")
View(a)
names(a)
[1] "NA" "AVON" "BA." "NA" "AAL"
You need to have proper names in the data frame to View it.