I am trying to do an inventory calculation in R which requires a row wise calculation for each Mat-Plant combination. Here's a test data set -
df <- structure(list(Mat = c("A", "A", "A", "A", "A", "A", "B", "B"
), Plant = c("P1", "P1", "P1", "P2", "P2", "P2", "P1", "P1"),
Day = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L), UU = c(0L, 10L,
0L, 0L, 0L, 120L, 10L, 0L), CumDailyFcst = c(11L, 22L, 33L,
0L, 5L, 10L, 20L, 50L)), .Names = c("Mat", "Plant", "Day",
"UU", "CumDailyFcst"), class = "data.frame", row.names = c(NA,
-8L))
Mat Plant Day UU CumDailyFcst
1 A P1 1 0 11
2 A P1 2 10 22
3 A P1 3 0 33
4 A P2 1 0 0
5 A P2 2 0 5
6 A P2 3 120 10
7 B P1 1 10 20
8 B P1 2 0 50
I need a new field "EffectiveFcst" such that when Day = 1 then EffectiveFcst = CumDailyFcst and for following days -
Here's the desired output -
Mat Plant Day UU CumDailyFcst EffectiveFcst
1 A P1 1 0 11 11
2 A P1 2 10 22 22
3 A P1 3 0 33 23
4 A P2 1 0 0 0
5 A P2 2 0 5 5
6 A P2 3 120 10 10
7 B P1 1 10 20 20
8 B P1 2 0 50 40
I am currently using a for loop but the actual table is >300K rows so hoping to do this with tidyverse for more elegant and faster approach. Tried the following but didn't work out -
group_by(df, Mat, Plant) %>%
mutate(EffectiveFcst = ifelse(row_number()==1, CumDailyFcst, 0)) %>%
mutate(EffectiveFcst = ifelse(row_number() > 1, CumDailyFcst - lag(CumDailyFcst, default = 0) + max(lag(EffectiveFcst, default = 0) - lag(UU, default = 0), 0), EffectiveFcst)) %>%
print(n = nrow(.))
We can use accumulate from purrr
library(tidyverse)
df %>%
group_by(Mat, Plant) %>%
mutate(EffectiveFcst = accumulate(CumDailyFcst - lag(UU, default = 0), ~
.y , .init = first(CumDailyFcst))[-1] )
# A tibble: 8 x 6
# Groups: Mat, Plant [3]
# Mat Plant Day UU CumDailyFcst EffectiveFcst
# <chr> <chr> <int> <int> <int> <dbl>
#1 A P1 1 0 11 11
#2 A P1 2 10 22 22
#3 A P1 3 0 33 23
#4 A P2 1 0 0 0
#5 A P2 2 0 5 5
#6 A P2 3 120 10 10
#7 B P1 1 10 20 20
#8 B P1 2 0 50 40
Related
C1:
AMT A B C D
1 13 0 1 0 0
2 17 0 0 1 0
3 19 0 0 0 1
4 1 0 0 1 0
5 9 0 1 0 0
How can i count frequency AMT by ABCD?
C2= t(as.matrix(C1[1])) %*% as.matrix(C1[2:5])
It gives me a result of Total Sum by Region.
My desired output to combine A B C D in one col since it is binary then count frequency by Type. ie.
AMT GROUP N
1 1 A 1
2 9 B 1
3 13 B 1
4 17 C 1
5 19 D 1
...
AMT IS NOT LIMITED TO 1 9 13 17 ... RANGE FROM 0-30
res <- C1 %>% group_by( ) %>% summarise(Freq=n())
library(tidyverse)
C1 %>%
tidyr::pivot_longer(
cols = A:D,
names_to = "Names",
values_to = "Values",
) %>%
group_by(Names) %>%
filter(Values == 1) %>%
summarise(AMT = sum(AMT))
select(Names, AMT, -Values)
Output:
Names AMT
<chr> <dbl>
1 B 22
2 C 18
3 D 19
You can use max.col to get the column name which has value 1 in it.
library(dplyr)
C1 %>%
transmute(AMT,
GROUP = names(.)[-1][max.col(select(., -1))],
N = 1) %>%
arrange(AMT) -> res
res
# AMT GROUP N
#4 1 C 1
#5 9 B 1
#1 13 B 1
#2 17 C 1
#3 19 D 1
data
C1 <- structure(list(AMT = c(13L, 17L, 19L, 1L, 9L), A = c(0L, 0L,
0L, 0L, 0L), B = c(1L, 0L, 0L, 0L, 1L), C = c(0L, 1L, 0L, 1L,
0L), D = c(0L, 0L, 1L, 0L, 0L)), class = "data.frame", row.names = c(NA, -5L))
I have dataframe something like:
myData <- User X Y Similar
A 1 4 100
A 1 2 100
A 1 1 100
A 3 2 80
A 2 1 20
A 2 4 100
B 3 1 50
B 4 2 90
B 1 3 100
To something like this:
myData <- User X Y Similar
A 1 4 0
A 1 2 0
A 1 1 0
A 3 2 80
A 2 1 20
A 2 4 100
B 3 1 50
B 4 2 90
B 1 3 0
Question
I want to change value in similar column to 0 with condition. The condition is if variable x = 1 and variable similar = 100. How to do that in r?
Thanks
We create a logical vector based on the 'X' and 'Similar' and do the assignment of 'Similar with that index to replace those values to 0
i1 <- with(myData, X ==1 & Similar == 100)
myData$Similar[i1] <- 0
-output
myData
# User X Y Similar
#1 A 1 4 0
#2 A 1 2 0
#3 A 1 1 0
#4 A 3 2 80
#5 A 2 1 20
#6 A 2 4 100
#7 B 3 1 50
#8 B 4 2 90
#9 B 1 3 0
data
myData <- structure(list(User = c("A", "A", "A", "A", "A", "A", "B", "B",
"B"), X = c(1L, 1L, 1L, 3L, 2L, 2L, 3L, 4L, 1L), Y = c(4L, 2L,
1L, 2L, 1L, 4L, 1L, 2L, 3L), Similar = c(100L, 100L, 100L, 80L,
20L, 100L, 50L, 90L, 100L)), class = "data.frame", row.names = c(NA,
-9L))
I want to sum the "value" column by group1 and by group2.
group2 can range from 1 to 5.
If there is no entry for group2, the sum should be 0.
Data:
group1 group2 value
a 1 100
a 2 200
a 3 300
b 1 10
b 2 20
I am using
aggregate(data$value, by=(list(data$group1, data$group2)), FUN = sum)
which gives
group1 group2 value
a 1 100
a 2 200
a 3 300
b 1 10
b 2 20
However, the result should look like
group1 group2 value
a 1 100
a 2 200
a 3 300
a 4 0
a 5 0
b 1 10
b 2 20
b 3 0
b 4 0
b 5 0
How can i address this using the aggregate function in R?
Thank you!
We can use complete from tidyr to complete missing combinations.
library(dplyr)
library(tidyr)
df %>%
group_by(group1, group2) %>%
summarise(value = sum(value)) %>%
complete(group2 = 1:5, fill = list(value = 0))
# group1 group2 value
# <fct> <int> <dbl>
# 1 a 1 100
# 2 a 2 200
# 3 a 3 300
# 4 a 4 0
# 5 a 5 0
# 6 b 1 10
# 7 b 2 20
# 8 b 3 0
# 9 b 4 0
#10 b 5 0
data
df <- structure(list(group1 = structure(c(1L, 1L, 1L, 2L, 2L), .Label = c("a",
"b"), class = "factor"), group2 = c(1L, 2L, 3L, 1L, 2L), value = c(100L,
200L, 300L, 10L, 20L)), class = "data.frame", row.names = c(NA, -5L))
You need of course to tell R that "group 2 can range from 1 to 5". Best you merge it with an expand.grid accordingly and use with.
with(merge(expand.grid(group1=c("a", "b"), group2=1:5, value=0), data, all=TRUE),
aggregate(value, by=(list(group1, group2)), FUN=sum))
# Group.1 Group.2 x
# 1 a 1 100
# 2 b 1 10
# 3 a 2 200
# 4 b 2 20
# 5 a 3 300
# 6 b 3 0
# 7 a 4 0
# 8 b 4 0
# 9 a 5 0
# 10 b 5 0
Data:
data <- structure(list(group1 = c("a", "a", "a", "b", "b"), group2 = c(1L,
2L, 3L, 1L, 2L), value = c(100L, 200L, 300L, 10L, 20L)), row.names = c(NA,
-5L), class = "data.frame")
The Result field should contain (Sum of Mod values for a STATE where category not equal to 0). Remaining values should be 0.
I am trying to achieve the same using for loop but not getting the desired result. Appreciate your help in getting this resolved.
Here is some SampleData
State Category Mod Result
1 A 0 5 45
2 A 1 10 0
3 A 2 15 0
4 A 3 10 0
5 A 4 10 0
6 B 0 11 60
7 B 1 12 0
8 B 2 14 0
9 B 3 16 0
10 B 4 18 0
This is what I am trying via for loop:
for(i in SampleData) {
SampleData$Result[i] <- ifelse(SampleData$Category[i] == "0",
sum(SampleData$Mod[i:i+5]),
0)
}
You could try
transform(df, Result = ifelse(Category == 0,
ave(Mod, State, FUN = function(x) sum(x) - x[Category == 0]),
0))
# State Category Mod Result
#1 A 0 5 45
#2 A 1 10 0
#3 A 2 15 0
#4 A 3 10 0
#5 A 4 10 0
#6 B 0 11 60
#7 B 1 12 0
#8 B 2 14 0
#9 B 3 16 0
#10 B 4 18 0
ave(Mod, State, FUN = function(x) sum(x) - x[Category == 0]) calculates the sum of Mod for each value of State and subtracts the respective value of Mod where Category == 0 is TRUE.
data
df <- structure(list(State = c("A", "A", "A", "A", "A", "B", "B", "B",
"B", "B"), Category = c(0L, 1L, 2L, 3L, 4L, 0L, 1L, 2L, 3L, 4L
), Mod = c(5L, 10L, 15L, 10L, 10L, 11L, 12L, 14L, 16L, 18L)), .Names = c("State",
"Category", "Mod"), row.names = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10"), class = "data.frame")
A dplyr version, if you like:
library(dplyr)
SampleData <- data_frame(State = c("A", "A", "A", "A", "A", "B", "B", "B", "B", "B"),
Category = c(0L, 1L, 2L, 3L, 4L, 0L, 1L, 2L, 3L, 4L),
Mod = c(5L, 10L, 15L, 10L, 10L, 11L, 12L, 14L, 16L, 18L))
SampleData %>%
group_by(State) %>%
mutate(Result = ifelse(Category == 0, sum(Mod[-1]), 0))
#> # A tibble: 10 x 4
#> # Groups: State [2]
#> State Category Mod Result
#> <chr> <int> <int> <dbl>
#> 1 A 0 5 45
#> 2 A 1 10 0
#> 3 A 2 15 0
#> 4 A 3 10 0
#> 5 A 4 10 0
#> 6 B 0 11 60
#> 7 B 1 12 0
#> 8 B 2 14 0
#> 9 B 3 16 0
#> 10 B 4 18 0
Input Data
ID value
a 10
a 12
a 18
a 13
b 23
b 25
b 33
c 17
c 23
c 27
OUTPUT data Should be look like
ID value Diff
a 10 0
a 12 2
a 18 8
a 13 3
b 23 0
b 25 2
b 33 10
c 17 0
c 23 6
c 27 10
i got this code from net
library(data.table)
DT <- as.data.table(dat)
DT[, `:=`(DIFTIME, c(0, diff(as.Date(DATETIME)))), by = "ID"]
but this only create difference between two successive row not from the first instance of that group
dat<-structure(list(ID = c(1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L),
DATETIME = structure(c(1328346000,1328479200, 1331024400,1331025400, 1328086800, 1328184000, 1336287600, 1336424400),
class = c("POSIXct", "POSIXt"), tzone = ""),
VALUE = c(300L,150L, 650L, 450L, 855L, 240L, 340L, 240L)),
.Names = c("ID", "DATETIME","VALUE"), class = "data.frame", row.names = c(NA, 7L))
You could also use dplyr, where df is the original data
library(dplyr)
group_by(df, ID) %>% mutate(Diff = value - first(value))
# ID value Diff
# 1 a 10 0
# 2 a 12 2
# 3 a 18 8
# 4 a 13 3
# 5 b 23 0
# 6 b 25 2
# 7 b 33 10
# 8 c 17 0
# 9 c 23 6
# 10 c 27 10
using data.table
setDT(df)[, Diff:=value-value[1], by=ID]
df
# ID value Diff
#1: a 10 0
#2: a 12 2
#3: a 18 8
#4: a 13 3
#5: b 23 0
#6: b 25 2
#7: b 33 10
#8: c 17 0
#9: c 23 6
#10: c 27 10
data
df <- structure(list(ID = c("a", "a", "a", "a", "b", "b", "b", "c",
"c", "c"), value = c(10L, 12L, 18L, 13L, 23L, 25L, 33L, 17L,
23L, 27L)), .Names = c("ID", "value"), class = "data.frame", row.names = c(NA,
-10L))
You can do it in base R using the ave function.
dat$Diff <- ave( dat$value, dat$ID, FUN = function(x) x - x[1] )