count number of times string appears in a column - r

Can you think about an intuitive way of calculating the number of times the word space appears in a certain column? Or any other solution that is viable.
I basically want to know how many times the space key was pressed, however some participants made the mistake and pressed other keys which would also be considered a mistake. So I was wondering if I should go with the "key_resp.rt" column instead and count the number of response times instead. If you had any idea of how to do both it would be great as I may need to use both.
I used the following code but the results do not conform to the data.
Data %>% group_by(Participant, Session) %>% summarise(false_start = sum(str_count(key_resp.keys, "space")))
Here is a snippet of my data:
Participant RT Session key_resp.keys key_resp.rt
X 0.431265 1 ["space"] [2.3173399999941466]
X 0.217685 1
X 0.317435 2 ["space","space"] [0.6671900000001187,2.032510000000002] 2020.1.3 4
Y 0.252515 1
Y 0.05127 2 ["space","space","space","space","space","space","space","space","space"] [4.917419999999765,6.151149999999689,6.333714999999771,6.638249999999971,6.833514999999338,7.0362499999992,7.217724999999504,7.38576999999988,7.66913999999997]
dput(droplevels(head(Data_PVT)))
structure(list(Interval_stimulus = c(4.157783411, 4.876139922,
5.67011868, 9.338167417, 9.196342656, 7.62448411), Participant = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "ADH80254", class = "factor"),
RT = c(431.265, 277.99, 253.515, 310.53, 299.165, 539.46),
Session = c(1L, 1L, 1L, 1L, 1L, 1L), date = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "2020-06-12_11h11.47.141", class = "factor"),
key_resp.keys = structure(c(2L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"[\"space\"]"), class = "factor"), key_resp.rt = structure(c(2L,
1L, 1L, 1L, 1L, 1L), .Label = c("", "[2.3173399999941466]"
), class = "factor"), psychopyVersion = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = "2020.1.3", class = "factor"),
Trials = 0:5, Reciprocal = c(2.31875992719094, 3.59725169970143,
3.94453977082224, 3.22030077609249, 3.3426370063343, 1.85370555740926
)), row.names = c(NA, 6L), class = "data.frame")
Expected output:
Participant Session false_start
x 1 0
x 2 1
y 1 2
y 2 1
z 1 10
z 2 3

We can use str_count to count "space" values for each Participant and Session and sum them to get total. For all_false_start we count number of words in it.
library(dplyr)
library(stringr)
df %>%
group_by(Participant, Session) %>%
summarise(false_start = sum(str_count(key_resp.keys, '\\bspace\\b')),
all_false_start = sum(str_count(key_resp.keys, '\\b\\w+\\b')))

Related

Issues with pivot_wider and unique identifiers because of duplicate values

I'm trying to use pivot_wider move my dataset from long to wide so I can use it in a different programme.
I have seen the other posts on this topic but the solutions don't address my problem.
I have measurement variable called "rating" which has a value for each "rock" and each test ("gentest", first and second). I have an id variable called "turkcode".
For each individual in the dataset, there are 18 ratings. The problem is that there are 4 ratings for rock #8 and I think this is why the data won't pivot wider the way I want them to.
Here's a subset of the data
structure(list(turkcode = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("100879",
"104655", "108505", "110324", "110600", "112445", "114083", "115814",
"116573", "117411", "117817", "118651", "119324", "121548", "121883",
"121918", "123275", "123718", "125491", "127450", "127825", "128062",
"129061", "131404", "135358", "135594", "135671", "135945", "137951",
"138675", "139469", "140924", "145730", "147222", "148533", "150851",
"153455", "158882", "164468", "166907", "169260", "171463", "172398",
"175565", "177108", "179000", "180270", "183953", "185574", "185880",
"185948", "186371", "187787", "189220", "190014", "192550", "193904",
"195308", "196755", "197493", "198368", "200155", "200297", "201915",
"214519", "215994", "217903", "218771", "219302", "220434", "222740",
"223223", "224721", "225118", "225223", "229856", "229874", "231301",
"232576", "233842", "234215", "237581", "239567", "240609", "241098",
"241423", "242108", "244633", "246055", "251597", "252929", "255252",
"256652", "259936", "274962", "277053", "279422", "280317", "282602",
"283750", "285737", "286259", "287544", "288507", "290503", "291401",
"291835", "292160", "294117", "297863", "298061", "299347", "299499",
"301399", "304875", "305231", "306312", "307410", "308979", "311157",
"311524", "311630", "318956", "318988", "319995", "321405", "324288",
"327086", "327559", "328345", "328401", "330318", "330909", "332723",
"334115", "334517", "335811", "335831", "337145", "338323", "338542",
"338575", "340083", "341182", "343612", "343947", "344554", "346476",
"349874", "350117", "350433", "350972", "351187", "355311", "356717",
"359366", "360048", "360058", "361191", "361971", "362827", "363543",
"367244", "374254", "374965", "376278", "377622", "382139", "382916",
"384586", "385229", "386782", "388951", "389029", "390299", "390662",
"396335", "396732", "398076", "398573", "399276", "399587", "403388",
"406073", "406160", "411977", "412935", "417350", "420060", "421393",
"422944", "424462", "427143", "429291", "430758", "431629", "431638",
"431935", "432218", "433788", "434291", "436681", "437087", "439385",
"439499", "440477", "440834", "441253", "441876", "443826", "444080",
"447597", "452643", "454649", "457055", "457946", "463512", "464079",
"464123", "467897", "468650", "470211", "471115", "471512", "475493",
"476937", "479198", "482871", "484066", "484070", "485462", "486402",
"491701", "491835", "499644", "501833", "502335", "502373", "504800",
"507439", "507946", "507987", "509066", "513078", "515519", "517017",
"517988", "519144", "519210", "519858", "522847", "523683", "525315",
"528577", "532463", "532630", "533028", "539033", "539852", "540690",
"546773", "546916", "549652", "551599", "554198", "556066", "559920",
"560804", "560857", "562080", "562420", "563841", "565668", "565776",
"566509", "569039", "572553", "575364", "576421", "576694", "576877",
"577120", "577155", "577534", "577605", "578463", "578820", "578995",
"580213", "581893", "582433", "582905", "583887", "584569", "585314",
"585566", "587393", "589144", "592284", "594463", "596863", "601837",
"602632", "604254", "605885", "609296", "609963", "610062", "612437",
"612949", "613161", "614372", "614777", "615372", "615384", "616927",
"618118", "620041", "620336", "621634", "622289", "624098", "626163",
"626612", "627019", "627856", "630003", "630255", "634018", "634478",
"635801", "638606", "640012", "641078", "641366", "641436", "641821",
"642076", "642446", "643329", "643942", "644015", "646792", "647254",
"647700", "649516", "650792", "650810", "651229", "652387", "652671",
"654778", "657964", "658894", "660500", "660607", "664469", "666754",
"666796", "668996", "669712", "671682", "673516", "675712", "677835",
"678008", "679262", "680295", "686455", "690471", "691175", "692489",
"694023", "696001", "698716", "700133", "700641", "707812", "707953",
"708010", "708881", "713657", "715255", "715386", "716764", "718936",
"719956", "725348", "727753", "728436", "729588", "730513", "731928",
"732013", "732438", "733366", "733559", "734672", "735174", "735675",
"737044", "737127", "741264", "745262", "748173", "748414", "748943",
"749221", "749963", "750363", "753518", "754512", "754970", "758639",
"760838", "761642", "766250", "770646", "772574", "773054", "775271",
"776762", "778208", "779453", "781378", "781861", "782257", "785763",
"785860", "787011", "790280", "791735", "791903", "792178", "796650",
"796822", "796970", "798621", "802731", "804701", "805606", "807848",
"809142", "810539", "812182", "812321", "814029", "814545", "814774",
"815079", "816572", "824215", "825063", "827763", "829973", "829983",
"830126", "832112", "832666", "833066", "834756", "835270", "835340",
"837413", "837746", "839882", "846097", "847975", "848746", "851745",
"851975", "856622", "858918", "859174", "859182", "859726", "859850",
"862222", "864356", "865028", "869700", "871576", "872256", "873350",
"873597", "875873", "883140", "886308", "886592", "886706", "892144",
"893930", "894959", "896820", "900374", "901373", "902879", "904147",
"905194", "906305", "908049", "908798", "911505", "913314", "915390",
"915833", "919057", "922432", "924120", "925640", "927671", "932006",
"936810", "936916", "938349", "940727", "941945", "942271", "943188",
"944548", "945783", "947164", "948322", "949181", "951414", "952632",
"955090", "956428", "956985", "959916", "960349", "962224", "962980",
"964665", "967160", "967588", "969929", "972543", "972893", "977734",
"978083", "978981", "980427", "980782", "981541", "981850", "982220",
"983781", "985193", "986366", "988934", "989056", "991218", "991914",
"995411", "995630", "995873", "995936", "996309"), class = "factor"),
aid = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("mem",
"noMem"), class = "factor"), gentest = structure(c(1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 2L), .Label = c("first", "second"), class = "factor"),
rocks = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L,
6L, 6L, 7L, 7L, 8L, 8L, 8L, 8L, 1L, 1L), .Label = c("R1",
"R2", "R3", "R4", "R5", "R6", "R7", "R8"), class = "factor"),
rating = c(7L, 5L, 2L, 7L, 4L, 2L, 6L, 3L, 3L, 2L, 3L, 3L,
2L, 1L, 3L, 6L, 3L, 2L, 2L, 4L), condition = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("baseline", "category", "property"
), class = "factor"), order = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("after", "before", "none"), class = "factor")), row.names = c(NA,
-20L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(
turkcode = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("100879",
"104655", "108505", "110324", "110600", "112445", "114083",
"115814", "116573", "117411", "117817", "118651", "119324",
"121548", "121883", "121918", "123275", "123718", "125491",
"127450", "127825", "128062", "129061", "131404", "135358",
"135594", "135671", "135945", "137951", "138675", "139469",
"140924", "145730", "147222", "148533", "150851", "153455",
"158882", "164468", "166907", "169260", "171463", "172398",
"175565", "177108", "179000", "180270", "183953", "185574",
"185880", "185948", "186371", "187787", "189220", "190014",
"192550", "193904", "195308", "196755", "197493", "198368",
"200155", "200297", "201915", "214519", "215994", "217903",
"218771", "219302", "220434", "222740", "223223", "224721",
"225118", "225223", "229856", "229874", "231301", "232576",
"233842", "234215", "237581", "239567", "240609", "241098",
"241423", "242108", "244633", "246055", "251597", "252929",
"255252", "256652", "259936", "274962", "277053", "279422",
"280317", "282602", "283750", "285737", "286259", "287544",
"288507", "290503", "291401", "291835", "292160", "294117",
"297863", "298061", "299347", "299499", "301399", "304875",
"305231", "306312", "307410", "308979", "311157", "311524",
"311630", "318956", "318988", "319995", "321405", "324288",
"327086", "327559", "328345", "328401", "330318", "330909",
"332723", "334115", "334517", "335811", "335831", "337145",
"338323", "338542", "338575", "340083", "341182", "343612",
"343947", "344554", "346476", "349874", "350117", "350433",
"350972", "351187", "355311", "356717", "359366", "360048",
"360058", "361191", "361971", "362827", "363543", "367244",
"374254", "374965", "376278", "377622", "382139", "382916",
"384586", "385229", "386782", "388951", "389029", "390299",
"390662", "396335", "396732", "398076", "398573", "399276",
"399587", "403388", "406073", "406160", "411977", "412935",
"417350", "420060", "421393", "422944", "424462", "427143",
"429291", "430758", "431629", "431638", "431935", "432218",
"433788", "434291", "436681", "437087", "439385", "439499",
"440477", "440834", "441253", "441876", "443826", "444080",
"447597", "452643", "454649", "457055", "457946", "463512",
"464079", "464123", "467897", "468650", "470211", "471115",
"471512", "475493", "476937", "479198", "482871", "484066",
"484070", "485462", "486402", "491701", "491835", "499644",
"501833", "502335", "502373", "504800", "507439", "507946",
"507987", "509066", "513078", "515519", "517017", "517988",
"519144", "519210", "519858", "522847", "523683", "525315",
"528577", "532463", "532630", "533028", "539033", "539852",
"540690", "546773", "546916", "549652", "551599", "554198",
"556066", "559920", "560804", "560857", "562080", "562420",
"563841", "565668", "565776", "566509", "569039", "572553",
"575364", "576421", "576694", "576877", "577120", "577155",
"577534", "577605", "578463", "578820", "578995", "580213",
"581893", "582433", "582905", "583887", "584569", "585314",
"585566", "587393", "589144", "592284", "594463", "596863",
"601837", "602632", "604254", "605885", "609296", "609963",
"610062", "612437", "612949", "613161", "614372", "614777",
"615372", "615384", "616927", "618118", "620041", "620336",
"621634", "622289", "624098", "626163", "626612", "627019",
"627856", "630003", "630255", "634018", "634478", "635801",
"638606", "640012", "641078", "641366", "641436", "641821",
"642076", "642446", "643329", "643942", "644015", "646792",
"647254", "647700", "649516", "650792", "650810", "651229",
"652387", "652671", "654778", "657964", "658894", "660500",
"660607", "664469", "666754", "666796", "668996", "669712",
"671682", "673516", "675712", "677835", "678008", "679262",
"680295", "686455", "690471", "691175", "692489", "694023",
"696001", "698716", "700133", "700641", "707812", "707953",
"708010", "708881", "713657", "715255", "715386", "716764",
"718936", "719956", "725348", "727753", "728436", "729588",
"730513", "731928", "732013", "732438", "733366", "733559",
"734672", "735174", "735675", "737044", "737127", "741264",
"745262", "748173", "748414", "748943", "749221", "749963",
"750363", "753518", "754512", "754970", "758639", "760838",
"761642", "766250", "770646", "772574", "773054", "775271",
"776762", "778208", "779453", "781378", "781861", "782257",
"785763", "785860", "787011", "790280", "791735", "791903",
"792178", "796650", "796822", "796970", "798621", "802731",
"804701", "805606", "807848", "809142", "810539", "812182",
"812321", "814029", "814545", "814774", "815079", "816572",
"824215", "825063", "827763", "829973", "829983", "830126",
"832112", "832666", "833066", "834756", "835270", "835340",
"837413", "837746", "839882", "846097", "847975", "848746",
"851745", "851975", "856622", "858918", "859174", "859182",
"859726", "859850", "862222", "864356", "865028", "869700",
"871576", "872256", "873350", "873597", "875873", "883140",
"886308", "886592", "886706", "892144", "893930", "894959",
"896820", "900374", "901373", "902879", "904147", "905194",
"906305", "908049", "908798", "911505", "913314", "915390",
"915833", "919057", "922432", "924120", "925640", "927671",
"932006", "936810", "936916", "938349", "940727", "941945",
"942271", "943188", "944548", "945783", "947164", "948322",
"949181", "951414", "952632", "955090", "956428", "956985",
"959916", "960349", "962224", "962980", "964665", "967160",
"967588", "969929", "972543", "972893", "977734", "978083",
"978981", "980427", "980782", "981541", "981850", "982220",
"983781", "985193", "986366", "988934", "989056", "991218",
"991914", "995411", "995630", "995873", "995936", "996309"
), class = "factor"), rocks = structure(c(1L, 1L, 2L, 2L,
3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 1L, 1L), .Label = c("R1",
"R2", "R3", "R4", "R5", "R6", "R7", "R8"), class = "factor"),
gentest = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("first",
"second"), class = "factor"), .rows = list(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15:16, 17:18,
19L, 20L)), row.names = c(NA, -18L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
Does anyone know how I can modify the second set of ratings for rock #8 so that I can pivot the data wider or even exclude this data from the dataset altogether?
EDIT:
Here is an example of how I'd like the output to look
id <- rep("100879", times = 6)
aid <- rep("mem", times = 6)
test <- rep(c("first", "second"), times = 3)
order <- rep("after", times = 6)
condition <- rep ("cat", times = 6)
R1 <- sample(0:9, 6, replace=T)
R2 <- sample(0:9, 6, replace=T)
R3 <- sample(0:9, 6, replace=T)
R4 <- sample(0:9, 6, replace=T)
R5 <- sample(0:9, 6, replace=T)
R6 <- sample(0:9, 6, replace=T)
R7 <- sample(0:9, 6, replace=T)
R8 <- sample(0:9, 6, replace=T)
df <- cbind(id, aid, test, order, condition, R1, R2, R3, R4, R5, R6, R7, R8)
a data.table suggestion
library( data.table )
#set data as data.table
setDT( mydata )
#create rowid by group
mydata[, row_id := rowidv( mydata, cols = c("turkcode", "aid", "gentest", "condition", "order", "rocks") ) ]
#create new rocks-column to group on
mydata[, rocks2 := paste0( rocks, ifelse( row_id == 1, "", paste0("_",row_id ) ) ) ]
#now cast to wide
dcast( mydata, turkcode + aid + gentest + condition + order ~ rocks2, value.var = "rating" )
# turkcode aid gentest condition order R1 R2 R3 R4 R5 R6 R7 R8 R8_2
# 1: 100879 mem first category after 7 2 4 6 3 3 2 3 6
# 2: 100879 mem second category after 5 7 2 3 2 3 1 3 2
# 3: 104655 mem first category after 2 NA NA NA NA NA NA NA NA
# 4: 104655 mem second category after 4 NA NA NA NA NA NA NA NA
Another option using pivot_wider and separate
library(dplyr)
library(tidyr)
#short version, but you will end up with R1-R8 in list foramt
df %>%
pivot_wider(id_cols = c("turkcode", "aid", "gentest", "condition", "order"),
names_from = "rocks", values_from = "rating", values_fn = list(rating = list))
#clean version
df %>%
#id_cols: A set of columns that uniquely identifies each observation.
#Defaults to all columns in data except for the columns specified in names_from and values_from.
pivot_wider(id_cols = c("turkcode", "aid", "gentest", "condition", "order"),
names_from = "rocks",
values_from = "rating",
values_fn = list(rating = ~paste(., collapse = ","))
#values_fn = list(rating = mean)
#,values_fill = list(rating=0)
) %>%
separate(R8, into = c('R8','R8_1'))
# A tibble: 4 x 14
# Groups: turkcode, gentest [1,118]
turkcode aid gentest condition order R1 R2 R3 R4 R5 R6 R7 R8 R8_1
<fct> <fct> <fct> <fct> <fct> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 100879 mem first category after 7 2 4 6 3 3 2 3 6
2 100879 mem second category after 5 7 2 3 2 3 1 3 2
3 104655 mem first category after 2 NA NA NA NA NA NA NA NA
4 104655 mem second category after 4 NA NA NA NA NA NA NA NA

How can I find a subsequent trial based on a condition?

I am using R to manipulate a large dataset (dataset) that consists of 20,000+ rows. In my data, I have three important columns to focus on for this question: Trial_Nr (consisting of 90 trials), seconds (increasing in .02 second increments), and threat(fixation to threat: 1=yes, 0=no, NA). Within each trial, I need to answer when the initially fixates to threat (1), how long does it take for them to not fixate on threat (0). So basically, within each trial, I would need to find the first threat=1 and the subsequent threat=0 and subtract the time. I am able to get the first threat with this code:
initalfixthreat <- dataset %>%
group_by(Trial_Nr) %>%
slice(which(threat == '1')[1])
I am stumped on how to get the subsequent threat=0 within that trial number.
Here is an example of the data (sorry don't know how to format it better):
So for Trial_Nr=1, I would be interested in 689.9 seconds- 689.8.
For Trial_Nr=2, I would want 690.04-689.96.
Please let me know if I was unclear and thank you all for your help!
One approach is:
library(dplyr)
df %>%
group_by(Trial_Nr) %>%
filter(!is.na(threat)) %>%
mutate(flag = ifelse(threat == 1, 1, threat - lag(threat))) %>%
filter(abs(flag) == 1 & !duplicated(flag)) %>%
summarise(timediff = ifelse(length(seconds) == 1, NA, diff(seconds)))
# A tibble: 2 x 2
Trial_Nr timediff
<int> <dbl>
1 1 0.1
2 2 0.0800
Data:
df <- structure(list(Trial_Nr = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L), seconds = c(689.76, 689.78, 689.8, 689.82,
689.84, 689.86, 689.88, 689.9, 689.92, 689.94, 689.96, 689.98,
690, 690.02, 690.04), threat = c(0L, 0L, 1L, 1L, 1L, NA, NA,
0L, 1L, 0L, 1L, NA, NA, 1L, 0L)), class = "data.frame", row.names = c(NA,
-15L))

Creating row in dataframe for each element in vector

I have a vector of numbers:
a <- c(54, 456, 23432, 4868, 34, 245634, 37, 46453, 1342354)
In my already-existent dataframe (head included via dput below), I would like to create a new variable. Each row of the new variable will contain a single element from the vector. So there would be one value (e.g. 54) in each row of the new variable.
structure(list(Phone = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "a", class = "factor"), Frame = structure(c(1L,
3L, 2L, 4L, 6L, 5L), .Label = c("[-4.46225397 -4.14727267 -4.45203785 -4.67251549 -5.13750066 -4.92839463\n -5.03957588 -5.68530479]",
"[-6.14532579 -4.38918589 -4.12275354 -4.19263549 -4.30380823 -4.35621995\n -4.4079389 -4.47339504]",
"[-6.43104195 -4.75506178 -4.2324676 -4.21878988 -4.1635973 -4.11186806\n -4.05023489 -4.08204198]",
"[-7.1528423 -5.46190925 -5.94873845 -6.635839 -6.84179002 -6.85955335\n -6.83714326 -6.87621415]",
"[-7.23901353 -4.61522546 -3.25206619 -3.38407075 -3.63762837 -3.85352927\n -3.94250123 -4.04015791]",
"[-7.34451319 -5.58664694 -4.69929752 -4.621823 -4.51670576 -4.48494125\n -4.39512713 -4.26553646]"
), class = "factor"), Previous = structure(c(1L, 1L, 1L, 1L,
1L, 1L), .Label = "ch", class = "factor"), Following = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "p", class = "factor"), Word = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "juk'ucha-pi", class = "factor"),
Note = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "", class = "factor"),
"[-10.79197258 -7.97949955 -7.10253093 -7.07957825 -6.98695923\n -6.90015207 -6.79672506 -6.85010073",
"[-10.31251047 -7.36552088 -6.91841906 -7.0356884 -7.2222481\n -7.31020053 -7.39699043 -7.5068328 ",
"[-12.00323036 -9.16566481 -9.982616 -11.13564383 -11.48125155\n -11.51106031 -11.47345379 -11.5390189 ",
"[-12.32487451 -9.37498793 -7.8859212 -7.7559107 -7.5795128\n -7.52620857 -7.37549093 -7.15802398",
"[-12.14783486 -7.74483933 -5.45731306 -5.67883075 -6.10432742\n -6.46663209 -6.61593651 -6.77981481"
), Morph_status = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "", class = "factor"),
row.names = c(NA, 6L), class = "data.frame")
When working with data frames, each variable (column) has as many entries as there are rows. What you are describing then is not a data frame and, if I understand you question correctly, the best your can do is going back to general lists:
df <- data.frame(a = 1:3, b = 1:3)
c(as.list(df), c = list(a))
# $a
# [1] 1 2 3
#
# $b
# [1] 1 2 3
#
# $c
# [1] 54 456 23432 4868 34 245634 37 46453 1342354
One other option, as to still have a data frame, would be to fill all the shorter columns with NA's:
library(rowr)
cbind.fill(df, a, fill = NA)
# a b object
# 1 1 1 54
# 2 2 2 456
# 3 3 3 23432
# 4 NA NA 4868
# 5 NA NA 34
# 6 NA NA 245634
# 7 NA NA 37
# 8 NA NA 46453
# 9 NA NA 1342354

R program, ?count, rename "freq" to something else

I am studying this webpage, and cannot figure out how to rename freq to something else, say number of times imbibed
Here is dput
structure(list(name = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), .Label = c("Bill", "Llib"), class = "factor"), drink = structure(c(2L,
3L, 1L, 4L, 2L, 3L, 1L, 4L), .Label = c("cocoa", "coffee", "tea",
"water"), class = "factor"), cost = 1:8), .Names = c("name",
"drink", "cost"), row.names = c(NA, -8L), class = "data.frame")
And this is working code with output. Again, I'd like to rename the freq column. Thanks!
library(plyr)
bevs$cost <- as.integer(bevs$cost)
count(bevs, "name")
Output
name freq
1 Bill 4
2 Llib 4
Are you trying to do this?
counts <- count(bevs, "name")
names(counts) <- c("name", "number of times imbibed")
counts
The count() function returns a data.frame. Just rename it like any other data.frame:
counts <- count(bevs, "name")
names(counts)[which(names(counts) == "freq")] <- "number of times imbibed"
print(counts)
# name number of times imbibed
# 1 Bill 4
# 2 Llib 4

Automate Data Frame Element Division

I have a dataframe, from which I want to obtain percent treated from the dataset // where % treated = Treated / Total visits
eg. % treated Acute Maxillary Sinusitis = 93470/93470 = 100%
dput(droplevels(head(magma)))
structure(list(DIAG_CODE_1 = structure(c(1L, 1L, 2L, 2L, 2L,
2L), .Label = c("4610 SINUSITIS MAXILLARY ACUT", "4619 SINUSITIS ACUTE UNSP"
), class = "factor"), GENDER = structure(c(1L, 1L, 1L, 1L, 1L,
1L), .Label = "FEMALE", class = "factor"), AGE = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "0-2", class = "factor"), Mention_DRGU = c(5460L,
5460L, 17790L, 17790L, 9400L, 9400L), treatment_status = structure(c(1L,
2L, 1L, 2L, 1L, 2L), .Label = c("Total visits", "Treated"), class = "factor"),
diag_class_1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "Acute sinusitis", class = "factor"),
year = c(2007L, 2007L, 2007L, 2007L, 2008L, 2008L)), .Names = c("DIAG_CODE_1",
"GENDER", "AGE", "Mention_DRGU", "treatment_status", "diag_class_1",
"year"), row.names = c(1285L, 1286L, 1407L, 1410L, 1408L, 1411L
), class = "data.frame")
However with 432 rows, it's possible I could calculate that all manually but that would be incredibly time consuming. Isn't that what computers are for :p. If you guys could help me find ways to automate tasks within R that would be greatly appreciated.
Is there a way that R could create a resulting dataframe that would tell me the DIAG_CODE_1, GENDER, AGE, % treated, and the year? I've created (in Excel) what I want the output to look like so you guys can see what I mean.
I will be doing this sort of calculation for other respiratory diseases, so I'm looking to learn now that way I can make my life easier in the long run.
You could use dplyr
library(dplyr)
library(tidyr)
magma %>%
spread(treatment_status, Mention_DRGU) %>%
mutate(PercentageTreated=100*(Treated/`Total visits`)) %>%
select(-diag_class_1, -`Total visits`, -Treated)
# DIAG_CODE_1 GENDER AGE year PercentageTreated
#1 4610 SINUSITIS MAXILLARY ACUT FEMALE 0-2 2007 100
#2 4619 SINUSITIS ACUTE UNSP FEMALE 0-2 2007 100
#3 4619 SINUSITIS ACUTE UNSP FEMALE 0-2 2008 100
Try this:
magma2<-reshape(magma, idvar = c("DIAG_CODE_1","GENDER","AGE","diag_class_1","year"), timevar = "treatment_status", direction = "wide")
colnames(magma2)<-c("DIAG_CODE_1","GENDER","AGE","diag_class_1","year","Treated","TotVisits")
magma2$PercentageTreated<-as.numeric(as.character(magma2$Treated))/as.numeric(as.character(magma2$TotVisits))
head(magma2)

Resources