I can do it using group by with only one Column but I want the Count for every Column in the Dataframe. See example:
Dataframe with similar values in each Column:
df =
C1 C2 C3
1 Positiv Negativ Neutral
2 NA Neutral Positiv
3 Positiv NA Negativ
4 Negativ Positiv NA
I thought about something like this:
df %>% group_by(names(df)) %>% count()
desired Output: Count Values in each Column
C1 C2 C3
1 Positive 2 1 1
2 Negative 1 1 1
3 Neutral 0 1 1
4 NA 1 1 1
Try this with some tidyverse functions and reshaping your data to long and wide:
library(dplyr)
library(tidyr)
#Code
new <- df %>% pivot_longer(everything()) %>%
group_by(across(everything())) %>%
summarise(N=n()) %>%
pivot_wider(names_from = name,values_from=N,values_fill=0)
Output:
# A tibble: 4 x 4
value C1 C2 C3
<chr> <int> <int> <int>
1 Negativ 1 1 1
2 Positiv 2 1 1
3 NA 1 1 1
4 Neutral 0 1 1
Some data used:
#Data
df <- structure(list(C1 = c("Positiv", NA, "Positiv", "Negativ"), C2 = c("Negativ",
"Neutral", NA, "Positiv"), C3 = c("Neutral", "Positiv", "Negativ",
NA)), class = "data.frame", row.names = c("1", "2", "3", "4"))
We can use base R with table
sapply(df, function(x) table(factor(x,
levels = c('Negativ', 'Positiv', 'Neutral')), useNA = "always"))
# C1 C2 C3
#Negativ 1 1 1
#Positiv 2 1 1
#Neutral 0 1 1
#<NA> 1 1 1
Or using a vectorized approach
table(unlist(df), c(col(df)), useNA = 'always')[, -4]
# 1 2 3
# Negativ 1 1 1
# Neutral 0 1 1
# Positiv 2 1 1
# <NA> 1 1 1
data
df <- structure(list(C1 = c("Positiv", NA, "Positiv", "Negativ"), C2 = c("Negativ",
"Neutral", NA, "Positiv"), C3 = c("Neutral", "Positiv", "Negativ",
NA)), class = "data.frame", row.names = c("1", "2", "3", "4"))
Related
I have a data frame of the following form:
a <- data.frame(list(X1=c("stn", "s1", "stn", "s2"),
X2=c("var1", "1", "var4", "2"),
X3=c("var2", "2", "var3", "3"),
X4=c("NA", "NA", "var2", "2")))
X1 X2 X3 X4
1 stn var1 var2 NA
2 s1 1 2 NA
3 stn var4 var3 var2
4 s2 2 3 2
How can I get the result:
b <- data.frame(list(stn=c("s1", "s2"),
var1=c(1, NA),
var2=c(2, 2),
var3=c(NA, 3),
var4=c(NA, 2)))
stn var1 var2 var3 var4
1 s1 1 2 NA NA
2 s2 NA 2 3 2
A (mostly) base R solution could be split the data.frame and call janitor::row_to_names(1) on each split data.frame, recombine it and remove bad columns using subset
do.call(dplyr::bind_rows,
df |>
split(cumsum(df$X1 == "stn")) |>
lapply(\(x) x |>
janitor::row_to_names(1))
) |>
subset(, -`NA`)
stn var1 var2 var4 var3
1 s1 1 2 <NA> <NA>
2 s2 <NA> 2 2 3
A dplyr solution:
library(dplyr)
a %>%
group_by(grp = rep(1:2, each = 2)) %>%
group_map(~ setNames(.x[-1, ], .x[1, ])) %>%
bind_rows() %>%
select(-`NA`) %>%
mutate(across(var1:var3, as.numeric))
# # A tibble: 2 × 5
# stn var1 var2 var4 var3
# <chr> <dbl> <dbl> <dbl> <dbl>
# 1 s1 1 2 NA NA
# 2 s2 NA 2 2 3
How to replace if the NA values in any column that should replace values by the next column's values in R programming, This has to be done without particularly mentioned the name of the columns (without hardcode)
Also the entire column that had NA values should be removed in R programming
library(tidyverse)
df1 <- structure(list(GID = c("1", "2", "3", "4", "5", "NG1", "MG2", "MG3", "NG4"),
ColA = c(NA, NA, NA, NA, NA, NA, NA, NA, NA),
ColB = c("2", "4", "4", "5", "5", "", "1", "1", "")),
row.names = c(NA, -9L),
class = "data.frame")
df1 %>%
mutate(across(everything(), ~str_replace(., "^$", "N")),
GID = GID %>% str_remove("N"))
#> GID ColA ColB
#> 1 1 NA 2
#> 2 2 NA 4
#> 3 3 NA 4
#> 4 4 NA 5
#> 5 5 NA 5
#> 6 G1 NA N
#> 7 MG2 NA 1
#> 8 MG3 NA 1
#> 9 G4 NA N
Expected output:
#> GID ColA
#> 1 1 2
#> 2 2 4
#> 3 3 4
#> 4 4 5
#> 5 5 5
#> 6 G1 N
#> 7 MG2 1
#> 8 MG3 1
#> 9 G4 N
I guess you already have answer to the first part of your question, here is an alternative way using replace. To drop columns that have all NA in them you can use select with where.
library(dplyr)
df1 %>%
mutate(across(.fns = ~replace(., . == '', 'N')),
GID = sub('N', '', GID)) %>%
select(-where(~all(is.na(.)))) %>%
rename_with(~names(df1)[seq_along(.)])
# GID ColA
#1 1 2
#2 2 4
#3 3 4
#4 4 5
#5 5 5
#6 G1 N
#7 MG2 1
#8 MG3 1
#9 G4 N
I have a dataframe like this. A small sample actually the df is bigger:
LOW 1 4 NA
MID 3 4 4
HIG 2 5 4
And would like to get the difference for LOW and HIG with MID so the ending df would be like this:
LOW 2 0 NA
MID 3 4 4
HIG 1 1 0
So you're getting: LOW = 3 - 1 = 2 and HIG = 3 - 2 = 1. I cand do it via VBA macros but want to scale with R.
It can be done with mutate_if/mutate_at
library(dplyr)
df1 %>%
mutate_if(is.numeric, ~ case_when(grp != 'MID' ~
abs(. - .[grp == 'MID']), TRUE ~ .))
# grp v1 v2 v3
#1 LOW 2 0 NA
#2 MID 3 4 4
#3 HIG 1 1 0
Or in base R
i1 <- df1$grp == 'MID'
df1[!i1, -1] <- abs(df1[!i1, -1] - rep(unlist(df1[i1, -1]), each = sum(!i1)))
data
df1 <- structure(list(grp = c("LOW", "MID", "HIG"), v1 = c(1L, 3L, 2L
), v2 = c(4L, 4L, 5L), v3 = c(NA, 4L, 4L)), class = "data.frame", row.names = c(NA,
-3L))
You can change the 'LOW', 'HIG' rows after subtracting by 'MID' :
df1[df1$grp == 'LOW', -1] <- abs(df1[df1$grp == 'MID',-1]- df1[df1$grp == 'LOW',-1])
df1[df1$grp == 'HIG', -1] <- abs(df1[df1$grp == 'MID',-1]- df1[df1$grp == 'HIG',-1])
df1
# grp v1 v2 v3
#1 LOW 2 0 NA
#2 MID 3 4 4
#3 HIG 1 1 0
I have a table with several columns, I would like to make a column by combining 'R1,R2 and R3' columns in a table.
DF:
ID R1 T1 R2 T2 R3 T3
rs1 A 1 NA . NA 0
rs21 NA 0 C 1 C 1
rs32 A 1 A 1 A 0
rs25 NA 2 NA 0 A 0
Desired output:
ID R1 T1 R2 T2 R3 T3 New_R
rs1 A 1 NA . NA 0 A
rs21 NA 0 C 1 C 1 C
rs32 A 1 A 1 A 0 A
rs25 NA 2 NA 0 A 0 A
We can use tidyverse
library(tidyverse)
DF %>%
mutate(New_R = pmap_chr(select(., starts_with("R")), ~c(...) %>%
na.omit %>%
unique %>%
str_c(collape="")))
#. ID R1 T1 R2 T2 R3 T3 New_R
#1 rs1 A 1 <NA> . <NA> 0 A
#2 rs21 <NA> 0 C 1 C 1 C
#3 rs32 A 1 A 1 A 0 A
#4 rs25 <NA> 2 <NA> 0 A 0 A
If there is only one non-NA element per row, we can use coalecse
DF %>%
mutate(New_R = coalesce(!!! select(., starts_with("R"))))
Or in base R
DF$New_R <- do.call(pmin, c(DF[grep("^R\\d+", names(DF))], na.rm = TRUE))
data
DF <- structure(list(ID = c("rs1", "rs21", "rs32", "rs25"), R1 = c("A",
NA, "A", NA), T1 = c(1L, 0L, 1L, 2L), R2 = c(NA, "C", "A", NA
), T2 = c(".", "1", "1", "0"), R3 = c(NA, "C", "A", "A"), T3 = c(0L,
1L, 0L, 0L)), class = "data.frame", row.names = c(NA, -4L))
you can use the ifelse function in a nested way:
DF$New_R <- ifelse(!is.na(DF$R1), DF$R1,
ifelse(!is.na(DF$R2), DF$R2,
ifelse(!is.na(DF$R3), DF$R3, NA)))
ifelse takes three arguments, a condition, what to do if the condition is fulfilled, and what to do if the condition is not fulfilled. It can be applied to data frame column treating each raw separately. In my example it will pick the first non NA value found.
We can use apply row-wise, remove NA values and keeping only unique values.
cols <- paste0("R", 1:3)
df$New_R <- apply(df[cols], 1, function(x)
paste0(unique(na.omit(x)), collapse = ""))
df
# ID R1 T1 R2 T2 R3 T3 New_R
#1 rs1 A 1 <NA> . <NA> 0 A
#2 rs21 <NA> 0 C 1 C 1 C
#3 rs32 A 1 A 1 A 0 A
#4 rs25 <NA> 2 <NA> 0 A 0 A
I have a data frame that looks like this:
cat df1 df2 df3
1 1 NA 1 NA
2 1 NA 2 NA
3 1 NA 3 NA
4 2 1 NA NA
5 2 2 NA NA
6 2 3 NA NA
I want to populate df3 so that when cat = 1, df3 = df2 and when cat = 2, df3 = df1. However I am getting a few different error messages.
My current code looks like this:
df$df3[df$cat == 1] <- df$df2
df$df3[df$cat == 2] <- df$df1
Try this code:
df[df$cat==1,"df3"]<-df[df$cat==1,"df2"]
df[df$cat==2,"df3"]<-df[df$cat==1,"df1"]
The output:
df
cat df1 df2 df3
1 1 1 1 1
2 2 1 2 1
3 3 1 3 NA
4 4 2 NA NA
5 5 2 NA NA
6 5 2 NA NA
You can try
ifelse(df$cat == 1, df$df2, df$df1)
[1] 1 2 3 1 2 3
# saving
df$df3 <- ifelse(df$cat == 1, df$df2, df$df1)
# if there are other values than 1 and 2 you can try a nested ifelse
# that is setting other values to NA
ifelse(df$cat == 1, df$df2, ifelse(df$cat == 2, df$df1, NA))
# or you can try a tidyverse solution.
library(tidyverse)
df %>%
mutate(df3=case_when(cat == 1 ~ df2,
cat == 2 ~ df1))
cat df1 df2 df3
1 1 NA 1 1
2 1 NA 2 2
3 1 NA 3 3
4 2 1 NA 1
5 2 2 NA 2
6 2 3 NA 3
# data
df <- structure(list(cat = c(1L, 1L, 1L, 2L, 2L, 2L), df1 = c(NA, NA,
NA, 1L, 2L, 3L), df2 = c(1L, 2L, 3L, NA, NA, NA), df3 = c(NA,
NA, NA, NA, NA, NA)), .Names = c("cat", "df1", "df2", "df3"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))