Place 1 heatmap on another with transparency in R - r

I'm new to R and have the following challenge;
I want to create a visualization that basically combines 2 kind of 'heatmaps' in order to visualize at what times there are truly dark skies (for astronomy). For this I want to have a heatmap that visualizes the brightness of the moon based on the moonrise and moonset times and the phase of the moon. On this then we can plot a 'band'like heatmap for the time the sun is up with some transparency.
I'm not sure if this is going to work visualy or if I need to find some other solution, however this seems like a good challenge to get into R some more.
But I could use some pointers as I'm stuck already loading the matrix of size 24(hours) x 31(days) with all the 720 values. When trying to create a basic data.frame from the vectors I get the error that the number of rows are inconsistent.
Furthermore I have some heatmap examples working already, but I'm not sure how to combine 2 of them in the same plot like I described.
As an illustration the current 'heatmap' as it is in excel
And some data:
MOON
moon <- structure(list(X1.9.12 = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L), .Label = c("0%", "100%"), class = "factor"), X2.9.12 = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("0%", "98%"), class = "factor"),
X3.9.12 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L
), .Label = c("0%", "94%"), class = "factor"), X4.9.12 = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("0%", "89%"), class = "factor"),
X5.9.12 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L
), .Label = c("0%", "82%"), class = "factor"), X6.9.12 = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("0%", "74%"), class = "factor"),
X7.9.12 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("0%", "65%"), class = "factor"), X8.9.12 = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0%", "56%"), class = "factor"),
X9.9.12 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("0%", "47%"), class = "factor"), X10.9.12 = structure(c(2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0%", "37%"), class = "factor"),
X11.9.12 = structure(c(2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("0%", "28%"), class = "factor"), X12.9.12 = structure(c(2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0%", "20%"), class = "factor"),
X13.9.12 = structure(c(2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L
), .Label = c("0%", "12%"), class = "factor"), X14.9.12 = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("0%", "6%"), class = "factor"),
X15.9.12 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L
), .Label = c("0%", "2%"), class = "factor"), X16.9.12 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "0%", class = "factor"),
X17.9.12 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L
), .Label = c("0%", "1%"), class = "factor")), .Names = c("X1.9.12",
"X2.9.12", "X3.9.12", "X4.9.12", "X5.9.12", "X6.9.12", "X7.9.12",
"X8.9.12", "X9.9.12", "X10.9.12", "X11.9.12", "X12.9.12", "X13.9.12",
"X14.9.12", "X15.9.12", "X16.9.12", "X17.9.12"), class = "data.frame", row.names = c("0:00:00",
"1:00:00", "2:00:00", "3:00:00", "4:00:00", "5:00:00", "6:00:00",
"7:00:00", "8:00:00", "9:00:00", "10:00:00", "11:00:00", "12:00:00",
"13:00:00", "14:00:00", "15:00:00", "16:00:00", "17:00:00", "18:00:00",
"19:00:00", "20:00:00", "21:00:00", "22:00:00", "23:00:00"))
SUN
September
Day Sunrise Sunset
1 6:52 20:26
2 6:54 20:24
3 6:56 20:22
4 6:57 20:20
5 6:59 20:17
6 7:00 20:15
7 7:02 20:13
8 7:04 20:10
9 7:05 20:08
10 7:07 20:06
11 7:08 20:05
12 7:09 20:02
13 7:11 20:00
14 7:13 19:58
15 7:14 19:55
16 7:16 19:53
17 7:17 19:51
18 7:19 19:48
19 7:21 19:46
20 7:22 19:44
21 7:25 19:40
22 7:26 19:38
23 7:28 19:35
24 7:30 19:33
25 7:31 19:31
26 7:33 19:28
27 7:35 19:26
28 7:36 19:24
29 7:38 19:21
30 7:40 19:19

So from what I understood, there are basically two questions:
Data organization
The easiest would be, if you'd have all data in one data.frame in long format. I.e. for each combination of time and date you have one row, with additional columns for the moon and sun intensity.
So we start with melting and fixing the moon data:
library(reshape2)
moon$time <- row.names(moon)
moon <- melt(moon, id.vars="time", variable.name="date", value.name="moon" )
moon$date <- sub("X(.*)", "\\1", moon$date)
moon$moon <- 1 - as.numeric(sub("%", "", moon$moon)) /100
Now we bring the sun data to an comparable form, by at least give them the same identifier for the date:
sun$Day <- paste( sun$Day, "9.12", sep ="." )
Next step is to merge the data by the date resp. Day and to set a comparable column for the sun intensity as is given already for the moon intensity. This can be done by casting the times to a time format and compare Sunrise and Sunset with the actual time:
mdf <- merge( moon, sun, by.x = "date", by.y = "Day" )
mdf$time.tmp <- strptime(mdf$time, format="%H:%M")
mdf$Sunrise <- round(strptime(mdf$Sunrise, format="%H:%M"), units = "hours")
mdf$Sunset <- round(strptime(mdf$Sunset, format="%H:%M"), units = "hours")
mdf$sun <- ifelse( mdf$Sunrise <= mdf$time.tmp & mdf$Sunset >= mdf$time.tmp, 1, 0 )
mdf <- mdf[c("date", "time", "moon", "sun")]
mdf[ 5:10, ]
date time moon sun
1.9.12 4:00:00 0 0
1.9.12 5:00:00 0 0
1.9.12 6:00:00 0 0
1.9.12 7:00:00 0 1
1.9.12 8:00:00 1 1
1.9.12 9:00:00 1 1
Plotting
Adding multiple layers with different transparencies begs literally for ggplot2. In order to use this in a proper way, there is one more data manipulation necessary, which ensures the proper order on the axes: date and time have to be converted to factors with factor levels ordered not lexically, but by time:
mdf <- within( mdf, {
date <- factor( date, levels=unique(date)[ order(as.Date( unique(date), "%d.%m.%y" ) ) ] )
time <- factor( time, levels=unique(time)[ order(strptime( time, format="%H:%M:%S"), decreasing=TRUE ) ] )
} )
This can be plot now:
library( ggplot2 )
ggplot( data = mdf, aes(x = date, y = time ) ) +
geom_tile( aes( alpha = sun ), fill = "goldenrod1" ) +
geom_tile( aes( alpha = moon ), fill = "dodgerblue3" ) +
scale_alpha_continuous( "moon", range=c(0,0.5) ) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Which gives you the following result

Related

Form groups using block random assignment on two covariates

I often have groups of people who differ in their nationality and their status. They have to work in groups, and I would like to use block random assignment to create groups of a maximum of 5 individuals. Each group should have at least one person who is "foreign" and one who is "female". I have found the library randomizr which is supposedly able to do block random assignments, but my code does not work as intended.
An example dataset would be:
structure(list(Student = c("Susan", "Ciara", "Carl",
"Paula", "Emil", "Tammy", "Logan", "Anna", "Victor",
"Felix", "Federica", "Jesus", "Jens", "Samira", "Berit", "Yi",
"Lea", "Gordon", "Boris", "Silvester", "Celine", "Thomas", "Eduardo",
"RoY", "Marlene", "Amelie", "Claudius", "Herbert", "Cynthia", "Melanie",
"Leander", "Leona", "Tobias", "Leander", "Peter",
"Lilly", "Roxy", "Joachim"), Nationality = structure(c(2L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L), levels = c("Non-foreign", "Foreign"), class = "factor"),
Gender = structure(c(1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L), levels = c("female",
"male"), class = "factor")), class = "data.frame", row.names = c(NA,
-38L))
UPDATE: I have carefully read the vignette for the randomzir package again. I found that it is possible to create blocks with more than 1 covariate. I am now looking to see if i can assign these blocks to the students to get block random groups. I need to test if the code below works as intended.
blocks <- with(data, paste(Nationality, Gender, sep = "_"))
Z <- block_ra(blocks = blocks, num_arms = 6)
table(data$Student, Z)

Stacked barplot using ggplot2 - data visualisation

I have very little experience with R and am trying to make a stacked barplot using ggplot2.
I have 2 groups - control and experimental, and 2 choices - red and green. I'm not sure how to organise my data.
There were 80 animals in my trial (control n=40, experimental n=40) and they were given the choice of red and green substrate, I noted which substrate they chose, and that's the data I'm trying to plot.
I would essentially want 'Experimental' and 'Control on the x-axis, and the number of choices on the y-axis (e.g. Control, Red n=20, Control, Green = 12 etc).
Any help would be appreciated!
Edited to add:
This is the graph it's outputting
This is the code I'm using (including suggested adjustments):
df <- data.frame(group = rep(c("control", "experimental"), each = 40),
substrate = sample (c("red","green"), 80, TRUE))
ggplot(df, aes(x = group, y = substrate, fill = substrate)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("red", "green"))
This is the output:
structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("control", "experimental"
), class = "factor"), substrate = structure(c(1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L), .Label = c("green",
"red"), class = "factor")), class = "data.frame", row.names = c(NA,
-80L))
output from df(behaviour) - original dataframe
structure(list(group = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Control", "Experimental"
), class = "factor"), substrate = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Green",
"Red"), class = "factor")), class = "data.frame", row.names = c(NA,
-80L))
Your data:
behaviour=structure(list(group = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Control", "Experimental"
), class = "factor"), substrate = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Green",
"Red"), class = "factor")), class = "data.frame", row.names = c(NA,
-80L))
We can tabulate your data:
table(behaviour$group,behaviour$substrate)
Green Red
Control 10 30
Experimental 27 13
So you can only specify fill or y with geom_bar. In your case, you specify the fill, the geom_bar() function will do the counting for you:
ggplot(behaviour,aes(x=group,fill=substrate))+
geom_bar() + scale_fill_manual(values=c("#29c7ac","#c02739"))
You could have your data like this, with one row for each observation (i.e. each animal), with the group and the substrate recorded for each:
df <- data.frame(group = rep(c("control", "experimental"), each = 40),
substrate = rep(c("green", "red", "green", "red"), c(10, 30, 27, 13)))
Now define your plot using ggplot, specifying group as your x axis, and ..count.. as your y axis. Use geom_bar to get the stacked bars you are looking for, and finally use scale_fill_manual to set the colours:
library(ggplot2)
ggplot(df, aes(x = group, y = ..count.., fill = substrate)) +
geom_bar(colour = "black") +
scale_fill_manual(values = c("green", "red"))

Difference between densityPlot and geom_density

I am plotting the same data with both geom_density and base R's densityPlot:
head(df)
min_dist iteration
<dbl> <fct>
1 -79277 1
2 -68987 1
3 -98661 1
4 -98145 1
5 -222408 1
6 -217409 1
geom_density:
ggplot(df) + geom_density(aes(min_dist)) + xlim(c(-1e4, 1e4)) + geom_rug(aes(min_dist))
densityPlot:
densityPlot(df$min_dist, xlim = c(-1e4, 1e4))
And they are strikingly different!
I was under the impression that geom_density was a wrapper for stat_density.
Is this just a matter of different adjust values?
df <- structure(list(min_dist = c(-79277, -68987, -98661, -98145, -222408,
-217409, -10759, -10363, 48034, 48525, 617038, 617069, -19656,
-17396, -479, -333, -48721, -47691, -19761, 380, 31429, 33953,
-9220, -61971, -609, -560, 6228, 13082, -217137, -216639, 99268,
102365, -119574, -53795, -329407, -111401, 1018733, -559898,
-1346, -15, -71577, -2883, 19490, -12321, -12240, -8470, -29804,
28724, 40673, -57818, -41421, -39126, 136783, 165727, -52989,
-51058, 58995, -12226, 8057, 44217, 13280, 16828, -80410, 11324,
975, 2630, 14250, 17108, -29341, -27033, -79277, -68987, -98661,
-98145, -222408, -217409, -10759, -10363, 48034, 48525, 617038,
617069, -19656, -17396, -479, -333, -48721, -47691, -19761, 380,
31429, 33953, -9220, -61971, -609, -560, 6228, 13082, -217137,
-216639, 99268, 102365, -119574, -53795, -329407, -111401, 1018733,
-559898, -1346, -15, -71577, -2883, 19490, -12321, -12240, -8470,
-29804, 28724, 40673, -57818, -41421, -39126, 136783, 165727,
-52989, -51058, 58995, -12226, 8057, 44217, 13280, 16828, -80410,
11324, 975, 2630, 14250, 17108, -29341, -27033), iteration = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1",
"2"), class = "factor")), row.names = c(NA, -140L), class = c("tbl_df",
"tbl", "data.frame"))

Check Dataframe by Row, over multiple columns and Code 1 for positive and 0 for negative

I have a set of columns, all coded as factors. The values are coded as 1 for positive and 0 for negative. Samples on rows, and scores for each on the columns.
I want to find out, sample wise, if there are any positives. If there is at least one positive, I want to generate a new column in the same database which says 1, as in this sample was positive for at least one, or 0 as in this sample was negative for all.
dat3 <- structure(list(A = structure(c(2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L), .Label = c("1", "0"), class = "factor"),
B = structure(c(1L,1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 2L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
C = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L), .Label = c("nd","0", "1"), class = "factor"),
D = structure(c(1L, 1L, 1L,2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L,2L, 1L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
E = structure(c(1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0","1"), class = "factor")),
.Names = c("A", "B", "C", "D", "E"), class = "data.frame", row.names = c(NA, 24L))
I tried and achieved the result I wanted by using if and if else statements, but they are really tedious and I don't think thats the best way to do it. I've been trying the apply function, but I haven't had much success.
The result I'm expecting is
dat3$result <- c(0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,1,0,1)
The 'dat3' columns are all factor, which can be converted to numeric then use rowSums to create a binary column
dat3$result <- as.integer(rowSums(sapply(dat3, function(x)
as.integer(as.character(x))), na.rm = TRUE) > 0)
Or convert to a logical matrix and then do the rowSums
as.integer(rowSums(dat3 == "1")> 0)
#[1] 0 0 1 1 1 1 0 1 0 1 0 1 1 0 1 0 1 1 1 0 0 1 0 1

Standard evaluation with mutate_ to calculate percentages by group

I am trying to use standard evaluation with dplyr to calculate percents as a function of two grouping variables. The problem is in my mutate_ statement.
Here is a dataset:
structure(list(
var1 = structure(c(2L, 1L, 1L, 2L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L
),
.Label = c("No", "Yes"), class = "factor"),
var2 = structure(c(2L, 2L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L
),
.Label = c("Female", "Male"), class = "factor")),
.Names = c("var1", "var2"), row.names = c(NA, -100L), class = "data.frame")
Here is the code I am working with:
for_plots = function(data, var1, var2){
grouped_data = data %>% group_by_(var1, var2) %>%
summarise_(n_in_group = ~n()) %>%
mutate_(.dots = setNames(list(
interp(quote(n_in_group / sum(n_in_group, na.rm = TRUE) * 100),
n_in_group = as.name(n_in_group)))
))
return(grouped_data)
}
When I run the code, I receive an error:
Error in setNames(list(interp(quote(n_in_group/sum(n_in_group, na.rm = TRUE) * :
argument "nm" is missing, with no default
Any thoughts?
Here is some code based on #Frank's response:
for_plots = function(data, var1, var2) {
grouped_data = data %>% group_by_(var1, var2) %>%
summarise_(n_in_group = ~n()) %>%
mutate(percent = (n_in_group / sum(n_in_group, na.rm = TRUE)) * 100)
return(grouped_data)
}

Resources