Related
This question already has answers here:
Count number of rows per group and add result to original data frame
(11 answers)
Closed last year.
I have the following data frame:
df <- data.frame(catergory=c("a","b","b","b","b","a","c"), value=c(1,5,3,6,7,4,6))
and I want to record the number of occurrences of each category so the output would be:
df <- data.frame(catergory=c("a","b","b","b","b","a","c"), value=c(1,5,3,6,7,4,6),
category_count=c(2,4,4,4,4,2,1))
Is there a simple way to do this?
# load package
library(data.table)
# set as data.table
setDT(df)
# count by category
df[, category_count := .N, category]
With dplyr:
library(dplyr)
df %>%
group_by(category) %>%
mutate(category_count = n()) %>%
ungroup
# A tibble: 7 × 3
category value category_count
<chr> <dbl> <int>
1 a 1 2
2 b 5 4
3 b 3 4
4 b 6 4
5 b 7 4
6 a 4 2
7 c 6 1
base
df <- data.frame(catergory=c("a","b","b","b","b","a","c"), value=c(1,5,3,6,7,4,6),
category_count=c(2,4,4,4,4,2,1))
df$res <- with(df, ave(x = seq(nrow(df)), list(catergory), FUN = length))
df
#> catergory value category_count res
#> 1 a 1 2 2
#> 2 b 5 4 4
#> 3 b 3 4 4
#> 4 b 6 4 4
#> 5 b 7 4 4
#> 6 a 4 2 2
#> 7 c 6 1 1
Created on 2022-02-08 by the reprex package (v2.0.1)
This question already has answers here:
Subtracting values group-wise by the average of each group in R
(4 answers)
Closed 2 years ago.
Example:
So lets say I have this data frame.
x = data.frame(factor = as.factor(c('a','a','b','b','c','c')),value1 = c(1,3,2,4,5,3), value2 = c(7,9,3,4,9,3))
factor value1 value2
1 a 1 7
2 a 3 9
3 b 2 3
4 b 4 4
5 c 5 9
6 c 3 3
I know how to get the mean per factor, I use this method:
aggregate(x[,c(2,3)], list(x$factor), mean, na.rm = T )
This give me the following output:
Group.1 value1 value2
1 a 2 8.0
2 b 3 3.5
3 c 4 6.0
How do I now go about subtracting from each value in the original dataframe the corresponding mean of its factor. The actual dataset I am using is big so need to have a nice way, I have managed to do it but I used complicated for loops.
So the output that I want would be:
factor value1 value2
1 a -1 -1.0
2 a 1 1.0
3 b -1 -0.5
4 b 1 0.5
5 c 1 3.0
6 c -1 -3.0
Any help would be great. Thanks.
A dplyr solution
library(dplyr)
x %>% group_by(factor) %>% mutate(across(c(value1, value2), ~. - mean(.)))
Output
# A tibble: 6 x 3
# Groups: factor [3]
factor value1 value2
<fct> <dbl> <dbl>
1 a -1 -1
2 a 1 1
3 b -1 -0.5
4 b 1 0.5
5 c 1 3
6 c -1 -3
You can try this dplyr approach:
library(dplyr)
#Data
x = data.frame(factor = as.factor(c('a','a','b','b','c','c')),value1 = c(1,3,2,4,5,3), value2 = c(7,9,3,4,9,3))
#Code
x <- x %>% group_by(factor) %>%
mutate(Mv1=mean(value1),
Mv2=mean(value2),
value1=value1-Mv1,
value2=value2-Mv2) %>% select(-c(Mv1,Mv2))
Output:
# A tibble: 6 x 3
# Groups: factor [3]
factor value1 value2
<fct> <dbl> <dbl>
1 a -1 -1
2 a 1 1
3 b -1 -0.5
4 b 1 0.5
5 c 1 3
6 c -1 -3
Here is a solution with data.table
library("data.table")
setDT(x)
cols <- paste0("value", 1:2)
x[, lapply(.SD, function(x) x - mean(x)), .SDcols=cols, by=factor]
or
library("data.table")
setDT(x)
x[, sweep(.SD, 2, STATS=colMeans(.SD)), by=factor, .SDcols=2:3]
This question already has an answer here:
R code to assign a sequence based off of multiple variables [duplicate]
(1 answer)
Closed 3 years ago.
I have following kind of data and i need output as the second data frame...
a <- c(1,1,1,1,2,2,2,2,2,2,2)
b <- c(1,1,1,2,3,3,3,3,4,5,6)
d <- c(1,2,3,4,1,2,3,4,5,6,7)
df <- as.data.frame(cbind(a,b,d))
output <- c(1,1,1,2,1,1,1,1,2,3,4)
df_output <- as.data.frame(cbind(df,output))
I have tried cumsum and I am not able to get the desired results. Please guide. Regards, Enthu.
based on column a value cahnges and if b is to be reset starting from one.
the condition is if b has same value it should start with 1.
Like in the 5th record, col b has value as 3. It should reset to 1 and if all the values if col b is same ( as the case from ro 6,6,7,8 is same , then it should be 1 and any change should increment by 1).
We can do a group by column 'a' and then create the new column with either match the unique values in 'b'
library(dplyr)
df2 <- df %>%
group_by(a) %>%
mutate(out = match(b, unique(b)))
df2
# A tibble: 11 x 4
# Groups: a [2]
# a b d out
# <dbl> <dbl> <dbl> <int>
# 1 1 1 1 1
# 2 1 1 2 1
# 3 1 1 3 1
# 4 1 2 4 2
# 5 2 3 1 1
# 6 2 3 2 1
# 7 2 3 3 1
# 8 2 3 4 1
# 9 2 4 5 2
#10 2 5 6 3
#11 2 6 7 4
Or another option is to coerce a factor variable to integer
df %>%
group_by(a) %>%
mutate(out = as.integer(factor(b)))
data
df <- data.frame(a, b, d)
This question already has answers here:
Aggregate / summarize multiple variables per group (e.g. sum, mean)
(10 answers)
Closed 6 years ago.
I have this dataset
id = c(1,1,1,2,2,3)
v1 = c(3,4,5,2,4,5)
v2 = c(3,1,2,1,4,5)
v3 = c(2,1,2,3,3,4)
mydata <- data.frame(id ,v1, v2, v3)
> mydata
id v1 v2 v3
1 1 3 3 2
2 1 4 1 1
3 1 5 2 2
4 2 2 1 3
5 2 4 4 3
6 3 5 5 4
and grouped data by id
groupdata <- group_by(mydata, id)
using summarize function can get a specific column mean value by id:
summarize(groupdata, mean = mean(v1))
# A tibble: 3 × 2
id mean
<dbl> <dbl>
1 1 4
2 2 3
3 3 5
what i am tring to do is loop over each column and summarize them
colnames <- names(mydata)
for(i in colnames){
assign(paste(i,"mean", sep = "_"), summarize(groupdata, mean = mean(i)))
}
but i got this
> v1_mean
# A tibble: 3 × 2
id mean
<dbl> <lgl>
1 1 NA
2 2 NA
3 3 NA
I found that you can't pass column names into summarize function as the parameter, is there any suggestions to improve the loop function?
Sorry, I misunderstood. Give this a shot.
library(dplyr)
grouped_mean <- mydata %>%
group_by(id) %>%
mutate_all(.funs = mean) %>%
distinct(.keep_all = TRUE)
> grouped_mean
Source: local data frame [3 x 4]
Groups: id [3]
id v1 v2 v3
<dbl> <dbl> <dbl> <dbl>
1 1 4 2.0 1.666667
2 2 3 2.5 3.000000
3 3 5 5.0 4.000000
Per #jdobres comment, you can skip a step with summarise_all
grouped_mean <- mydata %>%
group_by(id) %>%
summarise_all(.funs = mean)
> grouped_mean
# A tibble: 3 × 4
id v1 v2 v3
<dbl> <dbl> <dbl> <dbl>
1 1 4 2.0 1.666667
2 2 3 2.5 3.000000
3 3 5 5.0 4.000000
I think #Nick meant apply(mydata, 2, mean), which results in:
id v1 v2 v3
1.666667 3.833333 2.666667 2.500000
I have a data frame with 3 columns
df <- data.frame(ID1=c(rep(1,4),rep(2,4)), ID2=rep(1:2,4), value=1:8)
I need to recover the min for each group (ID1, ID2) and the position(row.name) of this min in the original table.
Using group_by and summarise, I have obtained the min but I can't see a way to obtain the position as summarise gets rid of the columns not summarised and not used for group.
df<-data.frame(ID1=c(rep(1,4),rep(2,4)), ID2=rep(1:2,4), value=1:8)
df[['X']] <- paste0(df$ID1,'.',df$ID2)
df <- group_by( df, X )
df <- summarise( df, Objective=min(value) )
Any ideas on how to solve this to get?
X Objective Position
1 1.1 1 1
2 1.2 2 2
3 2.1 5 5
4 2.2 6 6
Thanks in advance
If I understand correct and since you're already using dplyr, you could do it like this:
library(dplyr); library(tidyr)
unite(df, X, ID1:ID2, sep = ".") %>%
mutate(Position = row_number()) %>%
group_by(X) %>% slice(which.min(value))
#Source: local data frame [4 x 3]
#Groups: X
#
# X value Position
#1 1.1 1 1
#2 1.2 2 2
#3 2.1 5 5
#4 2.2 6 6
Or alternatively (only dplyr) - I'd rather use this one:
mutate(df, Position = row_number()) %>% group_by(ID1, ID2) %>% slice(which.min(value))
#Source: local data frame [4 x 4]
#Groups: ID1, ID2
#
# ID1 ID2 value Position
#1 1 1 1 1
#2 1 2 2 2
#3 2 1 5 5
#4 2 2 6 6
data
df <- data.frame(ID1=rep(1:2, each = 4), ID2=rep(1:2,4), value=1:8)
Here's how would I approach this using data.table (rn would be your row number).
library(data.table)
setDT(df, keep.rownames = TRUE)[, .SD[which.min(value)], list(ID1, ID2)]
# ID1 ID2 rn value
# 1: 1 1 1 1
# 2: 1 2 2 2
# 3: 2 1 5 5
# 4: 2 2 6 6
Another option is ordering and then picking the unique values
unique(setorder(df, value), by = c("ID1", "ID2"))
# ID1 ID2 rn value
# 1: 1 1 1 1
# 2: 1 2 2 2
# 3: 2 1 5 5
# 4: 2 2 6 6
Both approaches don't require creating X column
Or using base R
df <- df[order(df$value), ]
df[!duplicated(df[, 1:2]), ]
# ID1 ID2 value
# 1 1 1 1
# 2 1 2 2
# 5 2 1 5
# 6 2 2 6
data
df <- data.frame(ID1=c(rep(1,4),rep(2,4)), ID2=rep(1:2,4), value=1:8)
Using Aggregate:
Data:
df<-data.frame(ID1=c(rep(1,4),rep(2,4)), ID2=rep(1:2,4), value=1:8)
df[['X']] <- paste0(df$ID1,'.',df$ID2)
df$rn<-row.names(df) #rn is the row number
df<-df[c("X","rn","value")]
#> df
# X rn value
#1 1.1 1 1
#2 1.2 2 2
#3 1.1 3 3
#4 1.2 4 4
#5 2.1 5 5
#6 2.2 6 6
#7 2.1 7 7
#8 2.2 8 8
Aggregate step:
df2<- aggregate(df, by=list(c(df$X)), min)
#> df2
# Group.1 X rn value
#1 1.1 1.1 1 1
#2 1.2 1.2 2 2
#3 2.1 2.1 5 5
#4 2.2 2.2 6 6