I have this data acquired from a JSON routine, which includes 5 different records. Each record has either 41 or 0 sub-elements. In addition, sub-elements 40 and 41 are sub-lists.
I need to convert this into a main dataframe in which each record will show as a row with 39 columns (sub-lists can go away).
Also, records with no data (elements 2,3,4) will still show up as rows with NULL.
a = list(structure(list(ConOrden = 1L, TipoMed = 1L, TipoPrest = 2L,
CausaS1 = 0L, CausaS2 = 0L, CausaS3 = 0L, MedPBSUtilizado = NA,
RznCausaS31 = 0L, DescRzn31 = NA, RznCausaS32 = 0L, DescRzn32 = NA,
CausaS4 = 1L, MedPBSDescartado = NA, RznCausaS41 = 0L, DescRzn41 = NA,
RznCausaS42 = 0L, DescRzn42 = NA, RznCausaS43 = 1L, DescRzn43 = "N.A.",
RznCausaS44 = 0L, DescRzn44 = NA, CausaS5 = 1L, RznCausaS5 = NA,
CausaS6 = NA, DescMedPrinAct = "[APIXABAN] 5mg/1U", CodFF = "COLFF001",
CodVA = "048", JustNoPBS = "Paciente con infeccion por sarscov2",
Dosis = "5", DosisUM = "0168", NoFAdmon = "12", CodFreAdmon = 2L,
IndEsp = 10L, CanTrat = "3", DurTrat = 5L, CantTotalF = "180",
UFCantTotal = "66", IndRec = "Paciente con infeccion por sarscov2",
EstJM = 1L, PrincipiosActivos = list(structure(list(ConOrden = 1L,
CodPriAct = "08626", ConcCant = "5", UMedConc = "0168",
CantCont = "1", UMedCantCont = "0247"), class = "data.frame", row.names = 1L)),
IndicacionesUNIRS = list(list())), class = "data.frame", row.names = 1L),
list(), structure(list(), .Names = character(0), row.names = integer(0), class = "data.frame"),
structure(list(), .Names = character(0), row.names = integer(0), class = "data.frame"),
structure(list(ConOrden = 1L, TipoMed = 1L, TipoPrest = 2L,
CausaS1 = 0L, CausaS2 = 0L, CausaS3 = 0L, MedPBSUtilizado = NA,
RznCausaS31 = 0L, DescRzn31 = NA, RznCausaS32 = 0L, DescRzn32 = NA,
CausaS4 = 1L, MedPBSDescartado = "OXICODONA", RznCausaS41 = 1L,
DescRzn41 = "extreñimiento", RznCausaS42 = 0L, DescRzn42 = NA,
RznCausaS43 = 0L, DescRzn43 = NA, RznCausaS44 = 0L, DescRzn44 = NA,
CausaS5 = 1L, RznCausaS5 = NA, CausaS6 = NA, DescMedPrinAct = "[ACETAMINOFEN] ",
CodFF = "COLFF001", CodVA = "048", JustNoPBS = "dolor de dificil modulacion",
Dosis = "325", DosisUM = "0168", NoFAdmon = "8", CodFreAdmon = 2L,
IndEsp = 10L, CanTrat = "60", DurTrat = 3L, CantTotalF = "180",
UFCantTotal = "66", IndRec = "tomar una cada 8 horas ",
EstJM = 1L, PrincipiosActivos = list(structure(list(ConOrden = c(1L,
1L), CodPriAct = c("00626", "50055"), ConcCant = c("325",
"30"), UMedConc = c("0168", "0168"), CantCont = c("1",
"1"), UMedCantCont = c("0247", "0247")), class = "data.frame", row.names = 1:2)),
IndicacionesUNIRS = list(list())), class = "data.frame", row.names = 1L))
For each list elements return first 39 columns if number of rows in the data is greater than 0. If the element is empty return an empty tibble with a single column.
library(dplyr)
library(purrr)
result <- map_df(a, ~if(NROW(.x) > 0) .x %>% select(1:39)
else tibble(ConOrden = NA), .id = 'id')
dim(result)
#[1] 5 40
In the output we have 40 columns because the first column id is used to uniquely identify all the list element. For this example, id 2, 3 and 4 will have all columns as NA.
Related
Below is the structure of a chunk which includes two elements of list1 and list2.
list1:
list1 <- list(structure(list(chr22_20230714_G_A_b38 = 0.0000953181301665087,
chr22_20230737_G_A_b38 = -0.00124036704551427, chr22_20231229_T_A_b38 = 0.000808061558738542,
chr22_20231474_G_A_b38 = 0.000387528601423933, chr22_20231667_C_G_b38 = -0.000120624028990859), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
chr22_47157062_G_A_b38 = 0.00000909931572319958, chr22_47157212_G_A_b38 = -0.000124084106569373,
chr22_47157394_C_G_b38 = -0.0000752774417069946, chr22_47157559_G_A_b38 = 0.0000808446315377557,
chr22_47157607_T_C_b38 = 0.000237979025556899), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")))
list2:
list2 <- list(structure(list(name = c("HG00096", "HG00097", "HG00099",
"HG00100", "HG00101"), ENSG = c("ENSG00000040608", "ENSG00000040608",
"ENSG00000040608", "ENSG00000040608", "ENSG00000040608"), expr = c(-0.5186894,
0.6170779, -0.5786774, 0.07324268, -0.7579184), chr22_20230714_G_A_b38 = c(1L,
1L, 1L, 2L, 1L), chr22_20230737_G_A_b38 = c(0L, 0L, 0L, 0L, 0L
), chr22_20231229_T_A_b38 = c(1L, 0L, 1L, 0L, 1L), chr22_20231474_G_A_b38 = c(0L,
1L, 0L, 0L, 0L), chr22_20231667_C_G_b38 = c(1L, 1L, 1L, 2L, 1L
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(name = c("HG00096", "HG00097", "HG00099",
"HG00100", "HG00101"), ENSG = c("ENSG00000054611", "ENSG00000054611",
"ENSG00000054611", "ENSG00000054611", "ENSG00000054611"), expr = c(-0.5555929,
0.1600335, 0.4027508, -0.6028474, 2.271097), chr22_47157062_G_A_b38 = c(0L,
1L, 0L, 0L, 0L), chr22_47157212_G_A_b38 = c(0L, 0L, 1L, 1L, 2L
), chr22_47157394_C_G_b38 = c(0L, 1L, 1L, 1L, 2L), chr22_47157559_G_A_b38 = c(0L,
1L, 0L, 0L, 0L), chr22_47157607_T_C_b38 = c(0L, 1L, 1L, 1L, 2L
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
)))
Both lists contain the same number and names of elements, as well as the same number of columns in each corresponding element. Using this assumption, I want to multiply the value of each column in list1 by the corresponding column in list2.
Desired output:
out <- list(structure(list(name = c("HG00096", "HG00097", "HG00099",
"HG00100", "HG00101"), ENSG = c("ENSG00000040608", "ENSG00000040608",
"ENSG00000040608", "ENSG00000040608", "ENSG00000040608"), expr = c(-0.5186894,
0.6170779, -0.5786774, 0.07324268, -0.7579184), chr22_20230714_G_A_b38 = c(0.0000953,
0.0000953, 0.0000953, 0.0001906, 0.0000953), chr22_20230737_G_A_b38 = c(0,
0, 0, 0, 0), chr22_20231229_T_A_b38 = c(0.000808, 0, 0.000808,
0, 0.000808), chr22_20231474_G_A_b38 = c(0, 0.000388, 0, 0, 0
), chr22_20231667_C_G_b38 = c(-0.000121, -0.000121, -0.000121,
-0.000242, -0.000121)), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(name = c("HG00096", "HG00097",
"HG00099", "HG00100", "HG00101"), ENSG = c("ENSG00000054611",
"ENSG00000054611", "ENSG00000054611", "ENSG00000054611", "ENSG00000054611"
), expr = c(-0.5555929, 0.1600335, 0.4027508, -0.6028474, 2.271097
), chr22_47157062_G_A_b38 = c(0, 0.0000091, 0, 0, 0), chr22_47157212_G_A_b38 = c(0,
0, -0.000124, -0.000124, -0.000248), chr22_47157394_C_G_b38 = c(0,
-0.0000753, -0.0000753, -0.0000753, -0.0001506), chr22_47157559_G_A_b38 = c(0,
0.0000808, 0, 0, 0), chr22_47157607_T_C_b38 = c(0, 0.000238,
0.000238, 0.000238, 0.000476)), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame")))
We could use map2 or Map in base R
library(dplyr)
library(tidyr)
outnew <- map2(list2, list1, ~ {
dat1 <- .y
.x %>% mutate(across(names(dat1), ~ .x * dat1[[cur_column()]] ))
})
I have a list and I need to add together elements with different indexes. I'm struggling because I want to create a loop at different indexes.
data(aSAH)
rocobj <- roc(aSAH$outcome, aSAH$s100b)
dat<-coords(rocobj, "all", ret=c("threshold","sensitivity", "specificity"), as.list=TRUE)
I want to create a function where I can look at all the sensitivity/1-specificity combos at all thresholds in a new data frame. I know threshold is found in dat[1,], sensitivity is found in dat[2,] and specificity is found in dat[3,]. So I tried:
for (i in length(dat)) {
print(dat[1,i]
print(dat[2,i]/(1-dat[3,i]))
}
Where I should end up with a dataframe that has threshold and sensitivity/1-specificity.
DATA
dput(head(aSAH))
structure(list(gos6 = structure(c(5L, 5L, 5L, 5L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5"), class = c("ordered", "factor")), outcome = structure(c(1L,
1L, 1L, 1L, 2L, 2L), .Label = c("Good", "Poor"), class = "factor"),
gender = structure(c(2L, 2L, 2L, 2L, 2L, 1L), .Label = c("Male",
"Female"), class = "factor"), age = c(42L, 37L, 42L, 27L,
42L, 48L), wfns = structure(c(1L, 1L, 1L, 1L, 3L, 2L), .Label = c("1",
"2", "3", "4", "5"), class = c("ordered", "factor")), s100b = c(0.13,
0.14, 0.1, 0.04, 0.13, 0.1), ndka = c(3.01, 8.54, 8.09, 10.42,
17.4, 12.75)), .Names = c("gos6", "outcome", "gender", "age",
"wfns", "s100b", "ndka"), row.names = 29:34, class = "data.frame")
EDIT
One answer:
dat_transform <- as.data.frame(t(dat))
dat_transform <- dat_transform %>% mutate(new=sensitivity/(1-specificity))
You can use :
transform(t, res = sensitivity/(1-specificity))[c(1, 4)]
Or with dplyr :
library(dplyr)
t %>%
mutate(res = sensitivity/(1-specificity)) %>%
select(threshold, res)
Also note that t is a default function in R to tranpose dataframe so better to use some other variable name for the dataframe.
I have my data like this:
tt <- structure(list(A.T = structure(c(`4` = 2L, `5` = 3L, `6` = 1L
), .Label = c("2015(1583)", "2273(1876)", "4916(3954)"), class = "factor"),
A.G = structure(c(`4` = 2L, `5` = 1L, `6` = 3L), .Label = c("18645(2960)",
"6394(1409)", "8601(1275)"), class = "factor"), A.C = structure(c(`4` = 1L,
`5` = 3L, `6` = 2L), .Label = c("451(173)", "482(230)", "860(349)"
), class = "factor"), C.T = structure(c(`4` = 1L, `5` = 3L,
`6` = 2L), .Label = c("3885(3148)", "4042(3049)", "7772(5955)"
), class = "factor"), C.G = structure(c(`4` = 1L, `5` = 3L,
`6` = 2L), .Label = c("162(108)", "171(107)", "333(239)"), class = "factor"),
C.A = structure(c(`4` = 2L, `5` = 3L, `6` = 1L), .Label = c("1825(1481)",
"2118(1743)", "3449(2557)"), class = "factor"), G.T = structure(c(`4` = 2L,
`5` = 3L, `6` = 1L), .Label = c("2019(1794)", "2571(2336)",
"4538(4086)"), class = "factor")), class = "data.frame", row.names = c("4",
"5", "6"))
Say if I write this table as write.table(tt, "mynewcsv.csv", sep = "\t", quote = FALSE) and then open mynewcsv.csv in Excel, it would look like this:
However, I want my excel table to look like this:
What do I need to do so that mynewcsv.csv in Excel looks the way I want?
You can insert a newline character using sub():
write.csv(sapply(tt, function(x) sub("(?<=\\d)(?=\\()", "\n", x, perl = TRUE)), "mynewcsv.csv", quote = TRUE, row.names = FALSE)
After opening in Excel, you need to highlight the cells and enable "Wrap text" for it to display correctly.
The end result should be:
I have several dataframes that share the same structure but have different column names. I want to merge them all into one dataframe, but if i use bind_rows() it creates new column names.
I tried smartbind(), union() , union_all() and other libraries, however, none of them is able to simply merge them.
Here goes some sample data:
df1 <- structure(list(Codigo_Cliente = c(292640L, 48296L, 28368L, 27631L,
21715L, 401076L), Segmento = structure(c(3L, 3L, 3L, 3L, 3L,
5L), .Label = c("Clasico", "Emergente", "Mi_Negocio", "Preferencial",
"Prestige"), class = "factor"), Sal_Cons_CA_2018 = c(115966976.4748,
41404074.5338, 21576406.4326, NA, 5217387.0461, NA), Sal_Cons_CA_2019 = c(233057582.7658,
146012775.8314, 121273292.4548, 72383484.8781, 76605696.1462,
64418761.5503), Tipo_Cliente = structure(c(2L, 2L, 2L, 2L, 2L,
1L), .Label = c("Nuevo", "Viejo"), class = "factor"), diferencia_anual = c(117090606.291,
104608701.2976, 99696886.0222, 72383484.8781, 71388309.1001,
64418761.5503), peso_cambio = c(11.7925653553277, 10.5354732191076,
10.040788765049, 7.28996973463426, 7.18974243396645, 6.48781725327502
), cum = c(117090606.291, 221699307.5886, 321396193.6108, 393779678.4889,
465167987.589, 529586749.1393), cum_cambio = c(11.7925653553277,
22.3280385744352, 32.3688273394842, 39.6587970741185, 46.8485395080849,
53.33635676136), ones = c(1, 1, 1, 1, 1, 1), clientes = c(1,
2, 3, 4, 5, 6), porcentaje_acumulado_clientes = c(0.040650406504065,
0.0813008130081301, 0.121951219512195, 0.16260162601626, 0.203252032520325,
0.24390243902439), Tipo_Aportante = c("Viejo Aportante", "Viejo Aportante",
"Viejo Aportante", "Nuevo Aportante", "Viejo Aportante", "Nuevo Aportante"
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-6L), groups = structure(list(Codigo_Cliente = c(21715L, 27631L,
28368L, 48296L, 292640L, 401076L), Segmento = structure(c(3L,
3L, 3L, 3L, 3L, 5L), .Label = c("Clasico", "Emergente", "Mi_Negocio",
"Preferencial", "Prestige"), class = "factor"), .rows = list(
5L, 4L, 3L, 2L, 1L, 6L)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
df2 <- structure(list(Codigo_Cliente = c(29460L, 208833L, 494610L, 292653L,
371679L, 54042L), Segmento = structure(c(3L, 3L, 3L, 3L, 3L,
3L), .Label = c("Clasico", "Emergente", "Mi_Negocio", "Preferencial",
"Prestige"), class = "factor"), Sal_Cons_CC_2018 = c(249412694.49,
226519.47, NA, 232072.25, 893861.14, 2305969.41), Sal_Cons_CC_2019 = c(492333714.52,
217220231.86, 140551673.22, 73744015.83, 57995686.81, 54669407.01
), Tipo_Cliente = structure(c(2L, 2L, 1L, 2L, 2L, 2L), .Label = c("Nuevo",
"Viejo"), class = "factor"), diferencia_anual = c(242921020.03,
216993712.39, 140551673.22, 73511943.58, 57101825.67, 52363437.6
), peso_cambio = c(30.7889911838579, 27.5028381525124, 17.8142024395939,
9.31726115143663, 7.23736301995891, 6.63679667747068), cum = c(242921020.03,
459914732.42, 600466405.64, 673978349.22, 731080174.89, 783443612.49
), cum_cambio = c(30.7889911838579, 58.2918293363703, 76.1060317759641,
85.4232929274008, 92.6606559473597, 99.2974526248303), ones = c(1,
1, 1, 1, 1, 1), clientes = c(1, 2, 3, 4, 5, 6), porcentaje_acumulado_clientes = c(0.0369822485207101,
0.0739644970414201, 0.11094674556213, 0.14792899408284, 0.18491124260355,
0.22189349112426), Tipo_Aportante = c("Viejo Aportante", "Viejo Aportante",
"Nuevo Aportante", "Viejo Aportante", "Viejo Aportante", "Viejo Aportante"
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-6L), groups = structure(list(Codigo_Cliente = c(29460L, 54042L,
208833L, 292653L, 371679L, 494610L), Segmento = structure(c(3L,
3L, 3L, 3L, 3L, 3L), .Label = c("Clasico", "Emergente", "Mi_Negocio",
"Preferencial", "Prestige"), class = "factor"), .rows = list(
1L, 6L, 2L, 4L, 5L, 3L)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
You can use data.table package, which has rbindlist function:
df <- rbindlist(list(df1,df2), use.names = T)
I'm trying to reproduce a data frame and dput is not cooperating.
dput command :
dput(head(data, 10))
dput output :
structure(list(lexptot = c(8.28377505197124, 9.1595012302023,
8.14707583238833, 9.86330744180814, 8.21391453619232, 8.92372556833205,
7.77219149815994, 8.58202430280175, 8.34096828565733, 10.1133857229336
), year = c(0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L), dfmfdyr = c(0,
1, 0, 1, 0, 1, 0, 1, 0, 1), dfmfd98 = c(1, 1, 1, 1, 1, 1, 1,
1, 1, 1), nh = c(11054L, 11054L, 11061L, 11061L, 11081L, 11081L,
11101L, 11101L, 12021L, 12021L)), .Names = c("lexptot", "year",
"dfmfdyr", "dfmfd98", "nh"), vars = list(nh), drop = TRUE, indices = list(
0:1, 2:3, 4:5, 6:7, 8:9), group_sizes = c(2L, 2L, 2L, 2L,
2L), biggest_group_size = 2L, labels = structure(list(nh = c(11054L,
11061L, 11081L, 11101L, 12021L)), class = "data.frame", row.names = c(NA,
-5L), .Names = "nh", vars = list(nh)), row.names = c(NA, 10L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Error :
Error in structure(list(lexptot = c(8.28377505197124, 9.1595012302023, :
object 'nh' not found
Why is this happening right from a dput command?
Edit :
Relevant posts, but suggestions did not work.
Why does this dplyr dput not work?
Edit 2 :
It appears because one of my variables is a group object, dput cannot reproduce this. The solution is to use ungroup(data) then rerun dput and all works.
The issue was one of the variable objects was a group and therefore, dput() couldn't recognize this. The solution was to ungroup() the data.
ungroup(data)
dput(head(data, 10))
New Data.frame :
structure(list(lexptot = c(8.28377505197124, 9.1595012302023,
8.14707583238833, 9.86330744180814, 8.21391453619232, 8.92372556833205,
7.77219149815994, 8.58202430280175, 8.34096828565733, 10.1133857229336
), year = c(0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L), dfmfd98 = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), dfmfd = c(0L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L)), .Names = c("lexptot", "year", "dfmfd98", "dfmfd"
), class = c("tbl_df", "data.frame"), row.names = c(NA, -10L))