Related
Can you think about an intuitive way of calculating the number of times the word space appears in a certain column? Or any other solution that is viable.
I basically want to know how many times the space key was pressed, however some participants made the mistake and pressed other keys which would also be considered a mistake. So I was wondering if I should go with the "key_resp.rt" column instead and count the number of response times instead. If you had any idea of how to do both it would be great as I may need to use both.
I used the following code but the results do not conform to the data.
Data %>% group_by(Participant, Session) %>% summarise(false_start = sum(str_count(key_resp.keys, "space")))
Here is a snippet of my data:
Participant RT Session key_resp.keys key_resp.rt
X 0.431265 1 ["space"] [2.3173399999941466]
X 0.217685 1
X 0.317435 2 ["space","space"] [0.6671900000001187,2.032510000000002] 2020.1.3 4
Y 0.252515 1
Y 0.05127 2 ["space","space","space","space","space","space","space","space","space"] [4.917419999999765,6.151149999999689,6.333714999999771,6.638249999999971,6.833514999999338,7.0362499999992,7.217724999999504,7.38576999999988,7.66913999999997]
dput(droplevels(head(Data_PVT)))
structure(list(Interval_stimulus = c(4.157783411, 4.876139922,
5.67011868, 9.338167417, 9.196342656, 7.62448411), Participant = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "ADH80254", class = "factor"),
RT = c(431.265, 277.99, 253.515, 310.53, 299.165, 539.46),
Session = c(1L, 1L, 1L, 1L, 1L, 1L), date = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "2020-06-12_11h11.47.141", class = "factor"),
key_resp.keys = structure(c(2L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"[\"space\"]"), class = "factor"), key_resp.rt = structure(c(2L,
1L, 1L, 1L, 1L, 1L), .Label = c("", "[2.3173399999941466]"
), class = "factor"), psychopyVersion = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = "2020.1.3", class = "factor"),
Trials = 0:5, Reciprocal = c(2.31875992719094, 3.59725169970143,
3.94453977082224, 3.22030077609249, 3.3426370063343, 1.85370555740926
)), row.names = c(NA, 6L), class = "data.frame")
Expected output:
Participant Session false_start
x 1 0
x 2 1
y 1 2
y 2 1
z 1 10
z 2 3
We can use str_count to count "space" values for each Participant and Session and sum them to get total. For all_false_start we count number of words in it.
library(dplyr)
library(stringr)
df %>%
group_by(Participant, Session) %>%
summarise(false_start = sum(str_count(key_resp.keys, '\\bspace\\b')),
all_false_start = sum(str_count(key_resp.keys, '\\b\\w+\\b')))
I have a data with three columns like
Inputdf<-structure(list(df1 = structure(c(4L, 5L, 2L, 1L, 3L), .Label = c("P61160,P61158,O15143,O15144,O15145,P59998,O15511",
"P78537,Q6QNY1,Q6QNY0", "Q06323,Q9UL46", "Q92793,Q09472,Q9Y6Q9,Q92831",
"Q92828,Q13227,O15379,O75376,O60907,Q9BZK7"), class = "factor"),
df2 = structure(c(3L, 2L, 5L, 4L, 1L), .Label = c("", "P61158,O15143,O15144",
"Q06323,Q9UL46", "Q6QNY0", "Q92828"), class = "factor"),
df3 = structure(c(5L, 4L, 3L, 2L, 1L), .Label = c("", "O15511",
"Q06323,Q9UL46", "Q6QNY0", "Q92793,Q09472"), class = "factor")), .Names = c("df1",
"df2", "df3"), class = "data.frame", row.names = c(NA, -5L))
I am trying to find similar strings in this data for example
in df1, I have the first row I have Q92793,Q09472,Q9Y6Q9,Q92831
then I look at df2 and df3 and see if any of these members are in there then in this example, I make the following data
df1 df2 df3 Numberdf1 df2 df3
1 0 1 4 0 Q92793,Q09472
df1 1 means the first row of df1
df2 0 means it did not have any similarity
df3 1, means the first row of df3 has similarity with df1 row 1
Numberdf1, it is the count of strings separated by a ,which is 4
df2 is 0 because there was not any similar string accords df2
df3 is Q92793,Q09472 which paste the string which were similar in here
a desire output looks like below
out<- structure(list(df1 = 1:5, df2 = c(0L, 3L, 4L, 2L, 1L), df3 = c(1L,
0L, 2L, 4L, 3L), Numberdf1 = c(4L, 6L, 2L, 7L, 2L), df2.1 = structure(c(1L,
5L, 4L, 2L, 3L), .Label = c("0", "P61158,O15143,O15144", "Q06323,Q9UL46",
"Q6QNY0", "Q92828"), class = "factor"), df3.1 = structure(c(5L,
1L, 4L, 2L, 3L), .Label = c("0", "O15511", "Q06323,Q9UL46", "Q6QNY0",
"Q92793,Q09472"), class = "factor")), .Names = c("df1", "df2",
"df3", "Numberdf1", "df2.1", "df3.1"), class = "data.frame", row.names = c(NA,
-5L))
The below function does not work , for example, use this data as input
Inputdf1<- structure(list(df1 = structure(c(2L, 3L, 1L), .Label = c("Q06323,Q9UL46",
"Q92793,Q09472,Q9Y6Q9,Q92831", "Q92828,Q13227,O15379,O75376,O60907,Q9BZK7"
), class = "factor"), df2 = structure(1:3, .Label = c("P25788,P25789",
"Q92828, O60907, O75376", "Q9UL46, Q06323"), class = "factor"),
df3 = structure(c(2L, 1L, 3L), .Label = c("Q92831, Q92793, Q09472",
"Q9BZK7, Q92828, O75376, O60907", "Q9UL46, Q06323"), class = "factor")), .Names = c("df1",
"df2", "df3"), class = "data.frame", row.names = c(NA, -3L))
This works for your example:
# First convert factors to strings to lists
Inputdf[] = lapply(Inputdf, as.character)
Inputdf[] = lapply(Inputdf, function(col) sapply(col, function(x) unlist(strsplit(x,','))))
not.empty = function(x) length(x) > 0
out = data.frame()
for (r in 1:nrow(Inputdf)) {
df2.intersect = lapply(Inputdf$df2, intersect, Inputdf$df1[[r]])
df3.intersect = lapply(Inputdf$df3, intersect, Inputdf$df1[[r]])
out[r, 'df1'] = r
out[r, 'df2'] = Position(not.empty, df2.intersect, nomatch=0)
out[r, 'df3'] = Position(not.empty, df3.intersect, nomatch=0)
out[r, 'Numberdf1'] = length(Inputdf$df1[[r]])
out[r, 'df2.1'] = paste(Find(not.empty, df2.intersect, nomatch=0), collapse=',')
out[r, 'df3.1'] = paste(Find(not.empty, df3.intersect, nomatch=0), collapse=',')
}
out
# df1 df2 df3 Numberdf1 df2.1 df3.1
# 1 1 0 1 4 0 Q92793,Q09472
# 2 2 3 0 6 Q92828 0
# 3 3 4 2 3 Q6QNY0 Q6QNY0
# 4 4 2 4 7 P61158,O15143,O15144 O15511
# 5 5 1 3 2 Q06323,Q9UL46 Q06323,Q9UL46
Note: Find and Position identify the first match only. If there are potentially multiple matches, use which.
EDIT
Version accounting for multiple matches
Inputdf[] = lapply(Inputdf, as.character)
Inputdf[] = lapply(Inputdf, function(col) sapply(col, function(x) unlist(strsplit(x,',\\s*'))))
not.empty = function(x) length(x) > 0
out = data.frame()
for (r in 1:nrow(Inputdf)) {
df2.intersect = lapply(Inputdf$df2, intersect, Inputdf$df1[[r]])
df3.intersect = lapply(Inputdf$df3, intersect, Inputdf$df1[[r]])
out[r, 'df1'] = r
out[r, 'df2'] = paste(which(sapply(df2.intersect, not.empty)), collapse=',')
out[r, 'df3'] = paste(which(sapply(df3.intersect, not.empty)), collapse=',')
out[r, 'Numberdf1'] = length(Inputdf$df1[[r]])
out[r, 'df2.1'] = paste(unique(unlist(df2.intersect)), collapse=',')
out[r, 'df3.1'] = paste(unique(unlist(df3.intersect)), collapse=',')
}
out[out==""] = "0"
I'm trying to get the data from column one that matches with column 2 but only on the "B" values. Need to somehow make the true values a list.
Need this to repeat for 50,000 rows. Around 37,000 of them are true.
I'm incredibly new to this so any help would be nice.
Data <- data.frame(
X = sample(1:10),
Y = sample(c("B", "W"), 10, replace = TRUE)
)
Count <- 1
If(data[count,2] == "B") {
List <- list(data[count,1]
Count <- count + 1
#I'm not sure what to use to repeat I just put
Repeat
} else {
Count <- count + 1
Repeat
}
End result should be a list() of only column one data.
In this if rows 1-5 had "B" I want the column one numbers from that.
Not sure if I understood correctly what you're looking for, but from the comments I would assume that this might help:
setNames(data.frame(Data[1][Data[2]=="B"]), "selected")
# selected
#1 2
#2 5
#3 7
#4 6
No loop needed.
data
Data <- structure(list(X = c(10L, 4L, 9L, 8L, 3L, 2L, 5L, 1L, 7L, 6L),
Y = structure(c(2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L),
.Label = c("B", "W"), class = "factor")),
.Names = c("X", "Y"), row.names = c(NA, -10L),
class = "data.frame")
So I am trying to program function with dplyr withou loop and here is something I do not know how to do
Say we have tv stations (x,y,z) and months (2,3). If I group by this say we get
this output also with summarised numeric value
TV months value
x 2 52
y 2 87
z 2 65
x 3 180
y 3 36
z 3 99
This is for evaluated Brand.
Then I will have many Brands I need to filter to get only those which get value >=0.8*value of evaluated brand & <=1.2*value of evaluated brand
So for example from this down I would only want to filter first two, and this should be done for all months&TV combinations
brand TV MONTH value
sdg x 2 60
sdfg x 2 55
shs x 2 120
sdg x 2 11
sdga x 2 5000
As #akrun said, you need to use a combination of merging and subsetting. Here's a base R solution.
m <- merge(df, data, by.x=c("TV", "MONTH"), by.y=c("TV", "months"))
m[m$value.x >= m$value.y*0.8 & m$value.x <= m$value.y*1.2,][,-5]
# TV MONTH brand value.x
#1 x 2 sdg 60
#2 x 2 sdfg 55
Data
data <- structure(list(TV = structure(c(1L, 2L, 3L, 1L, 2L, 3L), .Label = c("x",
"y", "z"), class = "factor"), months = c(2L, 2L, 2L, 3L, 3L,
3L), value = c(52L, 87L, 65L, 180L, 36L, 99L)), .Names = c("TV",
"months", "value"), class = "data.frame", row.names = c(NA, -6L
))
df <- structure(list(brand = structure(c(2L, 1L, 4L, 2L, 3L), .Label = c("sdfg",
"sdg", "sdga", "shs"), class = "factor"), TV = structure(c(1L,
1L, 1L, 1L, 1L), .Label = "x", class = "factor"), MONTH = c(2L,
2L, 2L, 2L, 2L), value = c(60L, 55L, 120L, 11L, 5000L)), .Names = c("brand",
"TV", "MONTH", "value"), class = "data.frame", row.names = c(NA,
-5L))
I have following data and code:
dd
grp categ condition value
1 A X P 2
2 B X P 5
3 A Y P 9
4 B Y P 6
5 A X Q 4
6 B X Q 5
7 A Y Q 8
8 B Y Q 2
>
>
dput(dd)
structure(list(grp = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), .Label = c("A", "B"), class = "factor"), categ = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("X", "Y"), class = "factor"),
condition = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("P",
"Q"), class = "factor"), value = c(2, 5, 9, 6, 4, 5, 8, 2
)), .Names = c("grp", "categ", "condition", "value"), out.attrs = structure(list(
dim = structure(c(2L, 2L, 2L), .Names = c("grp", "categ",
"condition")), dimnames = structure(list(grp = c("grp=A",
"grp=B"), categ = c("categ=X", "categ=Y"), condition = c("condition=P",
"condition=Q")), .Names = c("grp", "categ", "condition"))), .Names = c("dim",
"dimnames")), row.names = c(NA, -8L), class = "data.frame")
ggplot(dd, aes(grp,value, fill=condition))+geom_bar(stat='identity')+facet_grid(~categ)
How can I convert this bar chart to pie chart? I want 4 pies here with their sizes corresponding to heights of respective bars here. I tried following but they did not work:
ggplot(dd, aes(grp,value, fill=condition))+geom_bar(stat='identity')+facet_grid(~categ)+coord_polar()
ggplot(dd, aes(grp,value, fill=condition))+geom_bar(stat='identity')+facet_grid(~categ)+coord_polar('y')
I also tried to make pie chart similar to Pie charts in ggplot2 with variable pie sizes but I am not able to manage with my data. Thanks for your help.
Using the same idea as in the link you posted, you could add a column size do your dataframe that would be the sum of the values for each group, and use that as the width argument:
library(dplyr)
dd<-dd %>% group_by(categ,grp) %>% mutate(size=sum(value))
ggplot(dd, aes(x=size/2,y=value,fill=condition,width=size))+geom_bar(position="fill",stat='identity')+facet_grid(grp~categ)+coord_polar("y")
You want the group and category both to be variables for the grid, and not inside any plot. Here are two different layouts. X ought to be any single item, string, or something else.
ggplot(dd, aes(x=factor(1),y=value,
fill=condition))+geom_bar(stat='identity')+
facet_grid(~grp+categ)+coord_polar("x")
ggplot(dd, aes(x=factor(1),y=value,
fill=condition))+geom_bar(stat='identity')+
facet_grid(grp~categ)+coord_polar("x")
Something strange happened with the top opening here, maybe its just my interface. Should get you going enough though!