I have survey data that has a binary 1, 0 (indicating peak or off-peak) variable with the related peak or off-peak numbers in two separate columns.
structure(list(q9_jul_2019 = c(1, 0, 1, 0, 1, 0), q9_aug_2019 = c(1,
0, 1, 0, 1, 0), q9_sep_2019 = c(1, 0, 1, 0, 1, 0), q9_oct_2019 = c(0,
0, 1, 0, 1, 0), q9_nov_2019 = c(0, 0, 1, 0, 1, 0), q9_dec_2019 = c(0,
0, 1, 0, 0, 0), q9_jan_2020 = c(0, 0, 1, 0, 0, 0), q9_feb_2020 = c(0,
1, 0, 1, 0, 0), q9_mar_2020 = c(1, 1, 0, 1, 0, 0), q9_apr_2020 = c(1,
1, 1, 1, 0, 1), q9_may_2020 = c(0, 1, 0, 0, 0, 0), q9_jun_2020 = c(0,
0, 0, 0, 0, 0), q15 = c(1, 10, 30, 0, 2, 0), q22 = c(0, 10, 6,
0, 0, 0)), row.names = c(NA, 6L), class = "data.frame")
I have created new monthly columns that have the associated visitation numbers in that column but I'm sure there must be a neater way to do it using across(). I haven't been able to make it work though, so at the moment I'm stuck at the following:
survey <- survey %>%
mutate(visitation_jul_19 = if_else(q9_jul_2019 == 1, q15, q22),
visitation_aug_19 = if_else(q9_aug_2019 == 1, q15, q22),
visitation_sep_19 = if_else(q9_sep_2019 == 1, q15, q22),
visitation_oct_19 = if_else(q9_oct_2019 == 1, q15, q22),
visitation_nov_19 = if_else(q9_nov_2019 == 1, q15, q22),
visitation_dec_19 = if_else(q9_dec_2019 == 1, q15, q22),
visitation_jan_20 = if_else(q9_jan_2020 == 1, q15, q22),
visitation_feb_20 = if_else(q9_feb_2020 == 1, q15, q22),
visitation_mar_20 = if_else(q9_mar_2020 == 1, q15, q22),
visitation_apr_20 = if_else(q9_apr_2020 == 1, q15, q22),
visitation_may_20 = if_else(q9_may_2020 == 1, q15, q22),
visitation_jun_20 = if_else(q9_jun_2020 == 1, q15, q22))
You may try
library(dplyr)
survey %>%
mutate(across(q9_jul_2019:q9_jun_2020, ~ ifelse(.x == 1, q15, q22)))
q9_jul_2019 q9_aug_2019 q9_sep_2019 q9_oct_2019 q9_nov_2019 q9_dec_2019 q9_jan_2020 q9_feb_2020 q9_mar_2020 q9_apr_2020
1 1 1 1 0 0 0 0 0 1 1
2 10 10 10 10 10 10 10 10 10 10
3 30 30 30 30 30 30 30 6 6 30
4 0 0 0 0 0 0 0 0 0 0
5 2 2 2 2 2 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 0
q9_may_2020 q9_jun_2020 q15 q22
1 0 0 1 0
2 10 10 10 10
3 6 6 30 6
4 0 0 0 0
5 0 0 2 0
6 0 0 0 0
Related
I would like to do something very specific. I have a vast set of data, which, in summary, looks more or less like this, with values 0, 1 and 2:
I need to create a situation variable so that it contains the value 0, 1 and 2.
The value 0 for cases that contain only 0's and 1's in the entire line.
The value 1 for the case where the value 2 appears, but at some point 1 appears before it.
The value 2 for the case where the value 2 appears, but at some point 0 appears before it.
So it's something close to:
structure(list(X1 = c(1, 1, 1, 1, 1, 1, 1, 1, 0, 1), X2 = c(1,
1, 1, 1, 0, 0, 0, 0, 0, 2), X3 = c(0, 1, 1, 1, 1, 0, 0, 1, 0,
0), X4 = c(0, 1, 1, 0, 1, 1, 0, 0, 0, 0), X5 = c(2, 1, 1, 0,
2, 1, 1, 0, 0, 0), X6 = c(2, 1, 1, 0, 2, 1, 1, 0, 0, 0), X7 = c(2,
1, 1, 1, 2, 1, 1, 2, 0, 0), X8 = c(0, 1, 1, 1, 2, 1, 2, 2, 2,
0)), class = "data.frame", row.names = c(NA, 10L))
I wrote a score function and applied it over all the rows of your dataframe.
score <- function(x) {
a <- which(x == 2)
ifelse(length(a) > 0, ifelse(a[1] >=2, 2 - x[a[1] - 1], 1), 0)
}
df <- structure(list(X1 = c(1, 1, 1, 1, 1, 1, 1, 1, 0, 1),
X2 = c(1, 1, 1, 1, 0, 0, 0, 0, 0, 2),
X3 = c(0, 1, 1, 1, 1, 0, 0, 1, 0, 0),
X4 = c(0, 1, 1, 0, 1, 1, 0, 0, 0, 0),
X5 = c(2, 1, 1, 0, 2, 1, 1, 0, 0, 0),
X6 = c(2, 1, 1, 0, 2, 1, 1, 0, 0, 0),
X7 = c(2, 1, 1, 1, 2, 1, 1, 2, 0, 0),
X8 = c(0, 1, 1, 1, 2, 1, 2, 2, 2, 0)),
class = "data.frame", row.names = c(NA, 10L))
df$situation <- sapply(1:nrow(df), function(i) score(as.numeric(df[i,])))
df
Here's a tidyverse approach.
I'll first concatenate all columns together, then use grepl() to look for 12 or 02.
library(tidyverse)
df %>% rowwise() %>%
mutate(concat = paste(c_across(everything()), collapse = "")) %>%
ungroup() %>%
mutate(situation = case_when(
!grepl(2, concat) ~ 0,
grepl("12", concat) ~ 1,
grepl("02", concat) ~ 2
)) %>%
select(-concat)
Output
# A tibble: 10 x 9
X1 X2 X3 X4 X5 X6 X7 X8 situation
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 0 0 2 2 2 0 2
2 1 1 1 1 1 1 1 1 0
3 1 1 1 1 1 1 1 1 0
4 1 1 1 0 0 0 1 1 0
5 1 0 1 1 2 2 2 2 1
6 1 0 0 1 1 1 1 1 0
7 1 0 0 0 1 1 1 2 1
8 1 0 1 0 0 0 2 2 2
9 0 0 0 0 0 0 0 2 2
10 1 2 0 0 0 0 0 0 1
Note that this solution assumes that:
2 will not appear in the first column
1 or 2 in the situation is defined by the number immediately before 2 in your dataset
There will not be a case of 12 and 02 happening in the same row
Suppose I have the following string:
l1 = c(0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1)
and I only want to keep the "FIRST new 1", that is, my desire outcome of the above strong is:
l1 = c(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1)
I tried to shift and subtract the lists, whatever is not 1, set to 0; but this way doesn't work.
You may try (base R way)
x <- c(0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1)
y <- rle(x)
z<- cumsum(y$lengths)[y$values == 0] + 1
w <- rep(0, length(x))
w[z] <- 1
w
[1] 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 1
dplyr way
library(dplyr)
library(xts)
library(data.table)
x <- data.frame(
l1 = c(0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1)
)
x %>%
mutate(y = rleid(l1)) %>%
group_by(y) %>%
mutate(l1 = ifelse((y %% 2) == first(l1) & row_number(y)>1, 0, l1)) %>%
ungroup %>%
select(-y) %>%
pull(l1)
[1] 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 1
Clumsy way
bool IsNewOneAppeared = 0
for(int i;i<c.length;i++)
{
if(IsNewOneAppeared )
c[i]= 0;
else if(c[i] equal 1)
{
keep 1;
IsNewOneAppeared =1;
}
}
I have a paneldata dataframe structure, something like this:
df <- data.frame("id" = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3),
"Status_2014" = c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0),
"Status_2015" = c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0),
"Status_2016" = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))
I want to generate a new dummy variable, that takes the value 1, if the rows contains 1 in any of the three columns or otherwise 0 if not. It should end up like this:
df <- data.frame("id" = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3),
"Status_2014" = c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0),
"Status_2015" = c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0),
"Status_2016" = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
"Final_status" = c(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0))
Can anyone help me achieve this?
We can use if_any on the columns that starts_with 'Status', to check for any 1 value in a row and it returns TRUE if there is one or else FALSE which is coerced to binary with as.integer/+
library(dplyr)
df %>%
mutate(Final_status = +(if_any(starts_with('Status'), ~ . ==1)))
-outptu
id Status_2014 Status_2015 Status_2016 Final_status
1 1 1 0 0 1
2 1 1 0 0 1
3 1 1 0 0 1
4 1 1 0 0 1
5 2 0 1 0 1
6 2 0 1 0 1
7 2 0 1 0 1
8 2 0 1 0 1
9 3 0 0 0 0
10 3 0 0 0 0
11 3 0 0 0 0
12 3 0 0 0 0
Or using rowSums from base R
df$Final_status <- +(rowSums(df[-1] > 0) > 0)
You write an if condition to define the variable as 1 or 0, and inside this condition the most straight forward ways would be a dplyr pipe.
I don't have the dplyr syntax in my head, to long not used, but dplyr is what you want.
https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf
best greetings
I have somating like a binary dataframe
> dput(head(dat))
structure(list(CDR3.aa = c("CALWEVQELGKKIKVF", "CAATVGGWGKLQF",
"CACDPLYGGITGGFNTDKLIF", "CACDTLLPTSLGDMAKLIF", "CALGELSSDGGGAIF",
"CALSNTGGFKTIF"), TCR_CS001_T1 = c(1, 1, 1, 1, 1, 0), TCR_CS001_T2 = c(0,
1, 1, 1, 1, 0), TCR_CS002 = c(1, 0, 0, 0, 0, 0), TCR_HC002 = c(0,
0, 0, 0, 0, 1), TCR_HC003 = c(1, 0, 0, 0, 0, 1)), row.names = c(NA,
-6L), .internal.selfref = <pointer: 0x0000023f7a101ef0>, class = c("immunr_public_repertoire",
"data.table", "data.frame"))
That shows if an amin acide exists in a sample we see 1 and if absent shown by 0
I want to replace 1 and 0 by amino acid itself
How I can do that please?
If CDR3.aa is the amino acid column you can do :
dplyr :
library(dplyr)
dat %>% mutate(across(-CDR3.aa, ~ifelse(. == 1, CDR3.aa, .)))
# CDR3.aa TCR_CS001_T1 TCR_CS001_T2 TCR_CS002
#1: CALWEVQELGKKIKVF CALWEVQELGKKIKVF 0 CALWEVQELGKKIKVF
#2: CAATVGGWGKLQF CAATVGGWGKLQF CAATVGGWGKLQF 0
#3: CACDPLYGGITGGFNTDKLIF CACDPLYGGITGGFNTDKLIF CACDPLYGGITGGFNTDKLIF 0
#4: CACDTLLPTSLGDMAKLIF CACDTLLPTSLGDMAKLIF CACDTLLPTSLGDMAKLIF 0
#5: CALGELSSDGGGAIF CALGELSSDGGGAIF CALGELSSDGGGAIF 0
#6: CALSNTGGFKTIF 0 0 0
# TCR_HC002 TCR_HC003
#1: 0 CALWEVQELGKKIKVF
#2: 0 0
#3: 0 0
#4: 0 0
#5: 0 0
#6: CALSNTGGFKTIF CALSNTGGFKTIF
data.table :
library(data.table)
dat[, (names(dat)[-1]) := lapply(.SD, function(x) ifelse(x == 1, CDR3.aa, x)), .SDcols = -1]
I am new to plotly and not very good with R. I am trying to do stack plots and ended up with a very cumbersome code, that I am sure could be simplify using RColorbrewer and perhaps ggplot2 to group my stacked bar plots, but I am unsure on how to do it.
Below is the data I used, which is in a data.frame called data2
Nation glider radar AUV ROV USV corer towed_eq Seismic_eq Drill_rig Manned_sub Other clean
1 Belgium 0 0 1 1 1 3 0 0 0 0 0 6
2 Bulgaria 0 0 0 0 0 0 1 0 0 1 0 2
3 Croatia 0 2 1 2 0 0 0 0 0 0 0 5
4 Cyprus 3 0 0 0 0 0 0 0 0 0 0 3
5 Estonia 0 0 0 1 0 0 0 0 0 0 0 1
6 Finland 1 0 0 0 0 0 0 0 0 0 0 1
7 France 11 2 3 1 0 1 1 3 0 1 0 23
8 Germany 18 3 3 4 0 0 1 4 2 1 0 36
9 Greece 1 0 0 3 0 0 0 0 0 0 0 4
10 Ireland 0 0 0 2 0 0 0 0 0 0 0 2
11 Italy 10 8 3 2 4 0 0 1 0 0 0 28
12 Malta 0 2 0 0 0 0 0 0 0 0 0 2
13 Netherlands 0 2 0 0 0 0 0 0 0 0 0 2
14 Norway 17 3 1 3 0 1 3 1 0 0 1 30
15 Poland 0 0 0 1 0 0 0 0 0 0 0 1
16 Portugal 0 3 6 6 4 2 1 0 0 2 1 25
17 Romania 0 0 0 1 0 0 0 0 0 0 0 1
18 Slovenia 0 1 0 0 0 0 0 0 0 0 0 1
19 Spain 12 17 2 1 0 0 0 2 0 0 0 34
20 Sweden 0 2 1 3 0 0 0 0 0 0 0 6
21 Turkey 0 0 0 0 0 0 0 0 0 2 0 2
22 United Kingdom 0 0 13 4 1 11 4 2 1 0 4 40
23 Unknown 5 0 0 0 0 0 0 0 0 0 0 5
And this is the code I used
fig <- plot_ly(data2, x = ~Nation, y = ~glider, type = 'bar', name = 'Glider')
fig <- fig %>% add_trace(y = ~radar, name = 'Radar', marker=list(color='rgb(26, 118, 255)'))
fig <- fig %>% add_trace(y = ~AUV, name = 'AUV',marker=list(color='rgb(255, 128, 0)'))
fig <- fig %>% add_trace(y = ~ROV, name = 'ROV',marker=list(color='rgb(204, 0, 0)'))
fig <- fig %>% add_trace(y = ~USV, name = 'USV',marker=list(color='rgb(51, 255, 153)'))
fig <- fig %>% add_trace(y = ~corer, name = 'Corer',marker=list(color='rgb(204, 0, 204)'))
fig <- fig %>% add_trace(y = ~towed_eq, name = 'Towed equipment',marker=list(color='rgb(255, 255, 51)'))
fig <- fig %>% add_trace(y = ~Seismic_eq, name = 'Seismic equipment',marker=list(color='rgb(255, 204, 229)'))
fig <- fig %>% add_trace(y = ~Drill_rig, name = 'Drill rig',marker=list(color='rgb(102, 255, 255)'))
fig <- fig %>% add_trace(y = ~Manned_sub, name = 'Manned submersible',marker=list(color='rgb(128, 255, 0)'))
fig <- fig %>% add_trace(y = ~Other, name = 'Other equipment',marker=list(color='rgb(153, 153, 0)'))
fig <- fig %>% layout(xaxis = list(title = "",tickfont = list(size = 14)), yaxis = list(title = 'Number of assets',tickfont = list(size = 14)), barmode = 'stack')
fig
Is there an easier way to code this by using Rcolorbrewer instead of coding each color? and is it possible to group my stacked barplots Group1 (glider, auv, rov, usv), Group 2 (corer,towed_ew, seismic_eq, drill_rig) and Group 3 (radar, manned_sub, Other)?stack_plot
You can try this approach by melting the data:
library(dplyr)
library(plotly)
library(tidyr)
library(RColorBrewer)
#Data
data <- structure(list(Nation = c("Belgium", "Bulgaria", "Croatia", "Cyprus",
"Estonia", "Finland", "France", "Germany", "Greece", "Ireland",
"Italy", "Malta", "Netherlands", "Norway", "Poland", "Portugal",
"Romania", "Slovenia", "Spain", "Sweden", "Turkey", "United Kingdom",
"Unknown"), glider = c(0, 0, 0, 3, 0, 1, 11, 18, 1, 0, 10, 0,
0, 17, 0, 0, 0, 0, 12, 0, 0, 0, 5), radar = c(0, 0, 2, 0, 0,
0, 2, 3, 0, 0, 8, 2, 2, 3, 0, 3, 0, 1, 17, 2, 0, 0, 0), AUV = c(1,
0, 1, 0, 0, 0, 3, 3, 0, 0, 3, 0, 0, 1, 0, 6, 0, 0, 2, 1, 0, 13,
0), ROV = c(1, 0, 2, 0, 1, 0, 1, 4, 3, 2, 2, 0, 0, 3, 1, 6, 1,
0, 1, 3, 0, 4, 0), USV = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0,
0, 0, 0, 4, 0, 0, 0, 0, 0, 1, 0), corer = c(3, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 11, 0), towed_eq = c(0,
1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 0, 0, 4,
0), Seismic_eq = c(0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 1, 0, 0, 1,
0, 0, 0, 0, 2, 0, 0, 2, 0), Drill_rig = c(0, 0, 0, 0, 0, 0, 0,
2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), Manned_sub = c(0,
1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0,
0), Other = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
0, 0, 0, 0, 0, 4, 0), clean = c(6, 2, 5, 3, 1, 1, 23, 36, 4,
2, 28, 2, 2, 30, 1, 25, 1, 1, 34, 6, 2, 40, 5)), row.names = c(NA,
-23L), class = "data.frame")
Now the code:
#First reshape
df2 <- pivot_longer(data,cols = -Nation)
#Plot
p <- plot_ly(df2, x = df2$Nation,
y = df2$value,
type = 'bar',
name = df2$name,
text = df2$value,
color = df2$name,
colors = brewer.pal(length(unique(df2$name)),
"Paired"))%>%
layout(barmode = 'stack',hoverlabel = list(bgcolor= 'white') ,bargap = 0.5) %>%
layout(xaxis = list(categoryorder = 'array',
categoryarray = df2$Nation), showlegend = T)
The output: