lag function in ifelse - r

I have a dataframe df with a structure like this:
val1 val2 val3
1 12 NA
2 14 NA
3 54 54
1 35 4
2 3 5
3 7 NA
4 8 NA
5 9 NA
Expected value:
val1 val2 val3 val4
1 12 NA 12
2 14 NA 12
3 54 54 54
1 35 4 35
2 3 5 3
3 7 NA 3
4 8 NA 3
5 9 NA 3
Problem:
I need a new column val4 with the following condition
df$val4 <- ifelse(df$val1 == 1, df$val2, ifelse(is.na(df$val3), lag(df$val4), df$val2))
This leads to
Error in hasTsp(x) : attempt to set an attribute on NULL
Condition:
val4 is equal to value of val2 when val1 is equal 1 (val3 does not matter)
val4 is equal to previous value when val3 is NA ( expect when val1 is not equal to 1)
P.S: I know I can use for loop here, but that would be very slow!

We can use data.table with zoo. Convert the 'data.frame' to 'data.table' (setDT(df)), create the 'val4' by multiplying the 'val2' with a vector of 1's and NA (NA^is.na(val3) - returns NA for NA elements in 'val3' while the non-NA is changed to 1), then for 1 in 'val1', we assign 'val4' to 'val2', and replace the NA elements with the previous non-NA elements with na.locf
library(data.table)
library(zoo)
setDT(df)[, val4 := val2 * NA^is.na(val3)
][val1==1, val4 := val2
][, val4 := na.locf(val4)][]
# val1 val2 val3 val4
#1: 1 12 NA 12
#2: 2 14 NA 12
#3: 3 54 54 54
#4: 1 35 4 35
#5: 2 3 5 3
#6: 3 7 NA 3
#7: 4 8 NA 3
#8: 5 9 NA 3
More code explanation
`is.na` returns a `logical` vector
setDT(df)[, is.na(val3)]
#[1] TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE
If we need to change the TRUE values to NA and 1 to others
setDT(df)[, NA^is.na(val3)]
#[1] NA NA 1 1 1 NA NA NA
Multiply by 'val2'
setDT(df)[, val2 * NA^is.na(val3)]
#[1] NA NA 54 35 3 NA NA NA
and the rest is just assignment based on the logical condition in 'i'
data
df <- structure(list(val1 = c(1L, 2L, 3L, 1L, 2L, 3L, 4L, 5L), val2 = c(12L,
14L, 54L, 35L, 3L, 7L, 8L, 9L), val3 = c(NA, NA, 54L, 4L, 5L,
NA, NA, NA)), .Names = c("val1", "val2", "val3"), class = "data.frame",
row.names = c(NA, -8L))

Related

How to find rows with same values in two columns?

It's a little hard to explain, but I'm trying to compare the column "cpf" from two different data frames. I want to identify when the value in the two "cpf" columns from (df1) and (df2) is equal (these values can be in different rows). After that, I want to update the NA values if these are available from the other data frame
df1
cpf x y
1 21 NA NA
2 32 NA NA
3 43 NA NA
4 54 NA NA
5 65 NA NA
df2
cpf x y
1 54 5 10
2 0 NA NA
3 65 3 2
4 0 NA NA
5 0 NA NA
I want the following result
df3
cpf x y
1 21 NA NA
2 32 NA NA
3 43 NA NA
4 54 5 10
5 65 3 2
We could do a join on 'cpf' and use fcoalecse
library(data.table)
setDT(df1)[df2, c('x', 'y') := .(fcoalesce(x, i.x),
fcoalesce(y, i.y)), on = .(cpf)]
-output
df1
# cpf x y
#1: 21 NA NA
#2: 32 NA NA
#3: 43 NA NA
#4: 54 5 10
#5: 65 3 2
Or using coalecse from dplyr after a left_join
library(dplyr)
left_join(df1, df2, by = 'cpf') %>%
transmute(cpf, x = coalesce(x.x, x.y), y = coalesce(y.x, y.y))
# cpf x y
#1 21 NA NA
#2 32 NA NA
#3 43 NA NA
#4 54 5 10
#5 65 3 2
In base R, can use match
i1 <- match(df1$cpf, df2$cpf, nomatch = 0)
i2 <- match(df2$cpf, df1$cpf, nomatch = 0)
df1[i2, -1] <- df2[i1, -1]
data
df1 <- structure(list(cpf = c(21L, 32L, 43L, 54L, 65L), x = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), y = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), row.names = c("1",
"2", "3", "4", "5"), class = "data.frame")
df2 <- structure(list(cpf = c(54L, 0L, 65L, 0L, 0L), x = c(5L, NA, 3L,
NA, NA), y = c(10L, NA, 2L, NA, NA)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5"))
df1 %>%
left_join(df2, by = "cpf") %>%
select(cpf, x = x.y, y = y.y)
Output:
cpf x y
1 21 NA NA
2 32 NA NA
3 43 NA NA
4 54 5 10
5 65 3 2
Another base R option using merge
merge(df1,
df2,
by = "cpf",
all.x = TRUE,
suffixes = c(".x", "")
)[names(df1)]
gives
cpf x y
1 21 NA NA
2 32 NA NA
3 43 NA NA
4 54 5 10
5 65 3 2

Add rows to a data-frame based on values in one of the columns

Currently the data-frame looks something like this:
Scenario Month A B C
1 1 -0.593186301 1.045550808 -0.593816304
1 2 0.178626141 2.043084432 0.111370583
1 3 1.205779717 -0.324083723 -1.397716949
2 1 0.933615199 0.052647056 -0.656486153
2 2 1.647291688 -1.065793671 0.799040546
2 3 1.613663101 -1.955567231 -1.817457972
3 1 -0.621991775 1.634069402 -1.404981646
3 2 -1.899326887 -0.836322394 -1.826351541
3 3 0.164235141 -1.160701812 1.238246459
I'd like to add rows on top of the row where Month = 1 as below. I know dplyr has an add_rows function but I'd like to add rows based on a condition. Any help is hugely appreciated.
Scenario Month A B C
0
1 1 -0.593186301 1.045550808 -0.593816304
1 2 0.178626141 2.043084432 0.111370583
1 3 1.205779717 -0.324083723 -1.397716949
0
2 1 0.933615199 0.052647056 -0.656486153
2 2 1.647291688 -1.065793671 0.799040546
2 3 1.613663101 -1.955567231 -1.817457972
0
3 1 -0.621991775 1.634069402 -1.404981646
3 2 -1.899326887 -0.836322394 -1.826351541
3 3 0.164235141 -1.160701812 1.238246459
A solution using tidyverse.
library(tidyverse)
dat2 <- dat %>%
split(f = .$Scenario) %>%
map_dfr(~bind_rows(tibble(Scenario = 0), .x))
dat2
# # A tibble: 12 x 5
# Scenario Month A B C
# <dbl> <int> <dbl> <dbl> <dbl>
# 1 0 NA NA NA NA
# 2 1 1 -0.593 1.05 -0.594
# 3 1 2 0.179 2.04 0.111
# 4 1 3 1.21 -0.324 -1.40
# 5 0 NA NA NA NA
# 6 2 1 0.934 0.0526 -0.656
# 7 2 2 1.65 -1.07 0.799
# 8 2 3 1.61 -1.96 -1.82
# 9 0 NA NA NA NA
# 10 3 1 -0.622 1.63 -1.40
# 11 3 2 -1.90 -0.836 -1.83
# 12 3 3 0.164 -1.16 1.24
DATA
dat <- read.table(text = "Scenario Month A B C
1 1 -0.593186301 1.045550808 -0.593816304
1 2 0.178626141 2.043084432 0.111370583
1 3 1.205779717 -0.324083723 -1.397716949
2 1 0.933615199 0.052647056 -0.656486153
2 2 1.647291688 -1.065793671 0.799040546
2 3 1.613663101 -1.955567231 -1.817457972
3 1 -0.621991775 1.634069402 -1.404981646
3 2 -1.899326887 -0.836322394 -1.826351541
3 3 0.164235141 -1.160701812 1.238246459 ",
header = TRUE)
Somehow add_row doesn't take multiple values to its .before parameter.
One way is to split the dataframe wherever Month = 1 and then for each dataframe add a row using add_row above Month = 1.
library(tidyverse)
map_df(split(df, cumsum(df$Month == 1)),
~ add_row(., Scenario = 0, .before = which(.$Month == 1)))
# Scenario Month A B C
#1 0 NA NA NA NA
#2 1 1 -0.5931863 1.04555081 -0.5938163
#3 1 2 0.1786261 2.04308443 0.1113706
#4 1 3 1.2057797 -0.32408372 -1.3977169
#5 0 NA NA NA NA
#6 2 1 0.9336152 0.05264706 -0.6564862
#7 2 2 1.6472917 -1.06579367 0.7990405
#8 2 3 1.6136631 -1.95556723 -1.8174580
#9 0 NA NA NA NA
#10 3 1 -0.6219918 1.63406940 -1.4049816
#11 3 2 -1.8993269 -0.83632239 -1.8263515
#12 3 3 0.1642351 -1.16070181 1.2382465
Here is one option with data.table
library(data.table)
setDT(df1)[, .SD[c(.N+1, seq_len(.N))], Scenario][
!duplicated(Scenario), Scenario := 0][]
# Scenario Month A B C
# 1: 0 NA NA NA NA
# 2: 1 1 -0.5931863 1.04555081 -0.5938163
# 3: 1 2 0.1786261 2.04308443 0.1113706
# 4: 1 3 1.2057797 -0.32408372 -1.3977169
# 5: 0 NA NA NA NA
# 6: 2 1 0.9336152 0.05264706 -0.6564862
# 7: 2 2 1.6472917 -1.06579367 0.7990405
# 8: 2 3 1.6136631 -1.95556723 -1.8174580
# 9: 0 NA NA NA NA
#10: 3 1 -0.6219918 1.63406940 -1.4049816
#11: 3 2 -1.8993269 -0.83632239 -1.8263515
#12: 3 3 0.1642351 -1.16070181 1.2382465
Or as #chinsoon12 mentioned in the comments
setDT(df1)[, rbindlist(.(.(Scenario=0L), c(.(Scenario=rep(Scenario, .N)),
.SD)), use.names=TRUE, fill=TRUE), by=.(Scenario)][, -1L]
data
df1 <- structure(list(Scenario = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L
), Month = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), A = c(-0.593186301,
0.178626141, 1.205779717, 0.933615199, 1.647291688, 1.613663101,
-0.621991775, -1.899326887, 0.164235141), B = c(1.045550808,
2.043084432, -0.324083723, 0.052647056, -1.065793671, -1.955567231,
1.634069402, -0.836322394, -1.160701812), C = c(-0.593816304,
0.111370583, -1.397716949, -0.656486153, 0.799040546, -1.817457972,
-1.404981646, -1.826351541, 1.238246459)), class = "data.frame",
row.names = c(NA,
-9L))
Here's a simple way (without loops) using base R -
df1 <- df[rep(1:nrow(df), (df$Month == 1)+1), ]
df1[duplicated(df1, fromLast = T), ] <- NA
df1$Scenario[is.na(df1$Scenario)] <- 0
df1
Scenario Month A B C
1 0 NA NA NA NA
1.1 1 1 -0.5931863 1.04555081 -0.5938163
2 1 2 0.1786261 2.04308443 0.1113706
3 1 3 1.2057797 -0.32408372 -1.3977169
4 0 NA NA NA NA
4.1 2 1 0.9336152 0.05264706 -0.6564862
5 2 2 1.6472917 -1.06579367 0.7990405
6 2 3 1.6136631 -1.95556723 -1.8174580
7 0 NA NA NA NA
7.1 3 1 -0.6219918 1.63406940 -1.4049816
8 3 2 -1.8993269 -0.83632239 -1.8263515
9 3 3 0.1642351 -1.16070181 1.2382465
Data -
df <- structure(list(Scenario = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L
), Month = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), A = c(-0.593186301,
0.178626141, 1.205779717, 0.933615199, 1.647291688, 1.613663101,
-0.621991775, -1.899326887, 0.164235141), B = c(1.045550808,
2.043084432, -0.324083723, 0.052647056, -1.065793671, -1.955567231,
1.634069402, -0.836322394, -1.160701812), C = c(-0.593816304,
0.111370583, -1.397716949, -0.656486153, 0.799040546, -1.817457972,
-1.404981646, -1.826351541, 1.238246459)), class = "data.frame", row.names = c(NA,
-9L))

Set value of a column to NA based on conditions in R

I have a data frame, a reproducible example is as follows:
structure(list(subscriberid = c(1177460837L, 1177460837L, 1177460837L,
1146526049L, 1146526049L, 1146526049L), variable = c("3134",
"4550", "4550", "5160", "2530", "2530"), value = c(1, 2, 2, 1,
2, 2), gender = c(2, 2, 2, 1, 2, 2), cwe = c(NA, 50L, 50L, NA,
30L, 30L), hw = c(NA, 48L, 48L, NA, 26L, 26L), resp = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), cna = c(3L, 1L, 1L, 3L, 1L, 1L)), .Names = c("subscriberid",
"variable", "value", "gender", "cwe", "hw", "resp", "cna"), row.names = c(4L,
5L, 6L, 9L, 10L, 11L), class = "data.frame")
The actual data frame looks like this:
subscriberid variable value gender cwe hw resp cna
4 1177460837 3134 1 2 NA NA NA 3
5 1177460837 4550 2 2 50 48 NA 1
6 1177460837 4550 2 2 50 48 NA 1
9 1146526049 5160 1 1 NA NA NA 3
10 1146526049 2530 2 2 30 26 NA 1
11 1146526049 2530 2 2 30 26 NA 1
In the above df, row 5 and 6 are exactly the same. From row 5, I want to remove 48 and row 6 I want to remove 50. Essentially, I want to retain only one age in a row and set the other to NA. I tried using a for loop but that sets column values in the column that I refer in both the rows to NA.
for (i in 1:nrow(test)) {
test$hw[i] <- ifelse(!is.na(test$cwe[i]) & !is.na(test$hw[i]), NA, test$hw[i])
}
I am trying to set an if condition to identify if both the rows are same, then I want to iteratively remove one of the values from the first row and remove the other from the second.
The desired output is as follows:
subscriberid variable value gender cwe hw resp cna
4 1177460837 3134 1 2 NA NA NA 3
5 1177460837 4550 2 2 50 NA NA 1
6 1177460837 4550 2 2 NA 48 NA 1
9 1146526049 5160 1 1 NA NA NA 3
10 1146526049 2530 2 2 30 NA NA 1
11 1146526049 2530 2 2 NA 26 NA 1
You can use a combination of which() and duplicated() to receive duplicated rows.
Because you need to change values twice of the rows, you have to create a copy of the dataframe. Note that this will only work if the identical rows are always consecutive.
dfNA <- df
dfNA$hw[which(duplicated(df))-1] <- NA
dfNA$cwe[which(duplicated(df))] <- NA
dfNA
# subscriberid variable value gender cwe hw resp cna
#4 1177460837 3134 1 2 NA NA NA 3
#5 1177460837 4550 2 2 50 NA NA 1
#6 1177460837 4550 2 2 NA 48 NA 1
#9 1146526049 5160 1 1 NA NA NA 3
#10 1146526049 2530 2 2 30 NA NA 1
#11 1146526049 2530 2 2 NA 26 NA 1
A possible solution :
# create a logical vector indicating if current row is identical to previous one
# N.B.: do.call("paste",c(DF,sep="\r")) is used internally by "duplicated.data.frame" function
rowStrings <- do.call("paste", c(DF, sep = "\r"))
currRowIsEqualToPrev <- rowStrings[-1] == rowStrings[-length(rowStrings)]
# set first row hw = NA and second identical row cwe = NA
DF[c(FALSE,currRowIsEqualToPrev),'hw'] <- NA
DF[c(currRowIsEqualToPrev,FALSE),'cwe'] <- NA
> DF
subscriberid variable value gender cwe hw resp cna
4 1177460837 3134 1 2 NA NA NA 3
5 1177460837 4550 2 2 NA 48 NA 1
6 1177460837 4550 2 2 50 NA NA 1
9 1146526049 5160 1 1 NA NA NA 3
10 1146526049 2530 2 2 NA 26 NA 1
11 1146526049 2530 2 2 30 NA NA 1
Using lead and lag from dplyr package:
library(dplyr)
df1 %>%
group_by(subscriberid, variable) %>%
mutate(cwe = if_else(lead(cwe) == cwe, cwe, NA_integer_),
hw = if_else(lag(hw) == hw, hw, NA_integer_)) %>%
ungroup()
# # A tibble: 6 x 8
# subscriberid variable value gender cwe hw resp cna
# <int> <int> <int> <int> <int> <int> <lgl> <int>
# 1 1177460837 3134 1 2 NA NA NA 3
# 2 1177460837 4550 2 2 50 NA NA 1
# 3 1177460837 4550 2 2 NA 48 NA 1
# 4 1146526049 5160 1 1 NA NA NA 3
# 5 1146526049 2530 2 2 30 NA NA 1
# 6 1146526049 2530 2 2 NA 26 NA 1
I took a shot at it. This relies on using group_by from dplyr to find duplicate rows. This method assumes that rows can be reliably be identified as identical by using the subscriberid, variable, value, gender, resp, and cna columns alone.
Because it is operating within groups only, it will work even if a preceding non-identical row contains the same value for cwe (I did check this, but I would also confirm it for yourself if I were you).
library(dplyr)
ndf <- df %>%
group_by(subscriberid, variable, value, gender, resp, cna) %>%
mutate(cwe = na_if(cwe, lag(cwe)),
hw = na_if(hw, lead(hw))) %>%
ungroup()
Output:
# A tibble: 6 x 8
subscriberid variable value gender cwe hw resp cna
<int> <chr> <dbl> <dbl> <int> <int> <int> <int>
1 1177460837 3134 1. 2. NA NA NA 3
2 1177460837 4550 2. 2. 50 NA NA 1
3 1177460837 4550 2. 2. NA 48 NA 1
4 1146526049 5160 1. 1. NA NA NA 3
5 1146526049 2530 2. 2. 30 NA NA 1
6 1146526049 2530 2. 2. NA 26 NA 1

lapply alternative to for loop to append to data frame

I have a data frame:
df<-structure(list(chrom = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
pos = c(10L, 200L, 134L, 400L, 600L, 1000L, 20L, 33L, 40L,
45L, 50L, 55L, 100L, 123L)), .Names = c("chrom", "pos"), row.names = c(NA, -14L), class = "data.frame")
> head(df)
chrom pos
1 1 10
2 1 200
3 1 134
4 1 400
5 1 600
6 1 1000
And I want to calculate pos[i+1] - pos[i] on the sample chromosome (chrom)
By using a for loop over each chrom level, and another over each row I get the expected results:
for (c in levels(df$chrom)){
df_chrom<-filter(df, chrom == c)
df_chrom<-arrange(df_chrom, df_chrom$pos)
for (i in 1:nrow(df_chrom)){
dist<-(df_chrom$pos[i+1] - df_chrom$pos[i])
logdist<-log10(dist)
cat(c, i, df_chrom$pos[i], dist, logdist, "\n")
}
}
However, I want to save this to a data frame, and think that lapply or apply is the right way to go about this. I can't work out how to make the pos[i+1] - pos[i] calculation though (seeing as lapply works on each row/column.
Any pointers would be appreciated
Here's the output from my solution:
chrom index pos dist log10dist
1 1 10 124 2.093422
1 2 134 66 1.819544
1 3 200 200 2.30103
1 4 400 200 2.30103
1 5 600 400 2.60206
1 6 1000 NA NA
2 1 20 13 1.113943
2 2 33 NA NA
3 1 40 5 0.69897
3 2 45 NA NA
4 1 50 5 0.69897
4 2 55 45 1.653213
4 3 100 23 1.361728
4 4 123 NA NA
We could do this using a group by difference. Convert the 'data.frame' to 'data.table' (setDT(df)), grouped by 'chrom', order the 'pos', get the difference of 'pos' (diff) and also log of the difference
library(data.table)
setDT(df)[order(pos), {v1 <- diff(pos)
.(index = seq_len(.N), pos = pos,
dist = c(v1, NA), logdiff = c(log10(v1), NA))}
, by = chrom]
# chrom index pos dist logdiff
# 1: 1 1 10 124 2.093422
# 2: 1 2 134 66 1.819544
# 3: 1 3 200 200 2.301030
# 4: 1 4 400 200 2.301030
# 5: 1 5 600 400 2.602060
# 6: 1 6 1000 NA NA
# 7: 2 1 20 13 1.113943
# 8: 2 2 33 NA NA
# 9: 3 1 40 5 0.698970
#10: 3 2 45 NA NA
#11: 4 1 50 5 0.698970
#12: 4 2 55 45 1.653213
#13: 4 3 100 23 1.361728
#14: 4 4 123 NA NA
Upon running the OP's code the output printed are
#1 1 10 124 2.093422
#1 2 134 66 1.819544
#1 3 200 200 2.30103
#1 4 400 200 2.30103
#1 5 600 400 2.60206
#1 6 1000 NA NA
#2 1 20 13 1.113943
#2 2 33 NA NA
#3 1 40 5 0.69897
#3 2 45 NA NA
#4 1 50 5 0.69897
#4 2 55 45 1.653213
#4 3 100 23 1.361728
#4 4 123 NA NA
We split df by df$chrom (Note that we reorder both df and df$chrom before splitting). Then we go through each of the subgroups (the subgroups are called a in this example) using lapply. On the pos column of each subgroup, we calculate difference (diff) of consecutive elements and take log10. Since diff decreases the number of elements by 1, we add a NA to the end. Finally, we rbind all the subgroups together using do.call.
do.call(rbind, lapply(split(df[order(df$chrom, df$pos),], df$chrom[order(df$chrom, df$pos)]),
function(a) data.frame(a, dist = c(log10(diff(a$pos)), NA))))
# chrom pos dist
#1.1 1 10 2.093422
#1.3 1 134 1.819544
#1.2 1 200 2.301030
#1.4 1 400 2.301030
#1.5 1 600 2.602060
#1.6 1 1000 NA
#2.7 2 20 1.113943
#2.8 2 33 NA
#3.9 3 40 0.698970
#3.10 3 45 NA
#4.11 4 50 0.698970
#4.12 4 55 1.653213
#4.13 4 100 1.361728
#4.14 4 123 NA

How can I use merge so that I have data for all times?

I'm trying to change a data into which all entities have value for all possible times(months). Here's what I'm trying;
Class Value month
A 10 1
A 12 3
A 9 12
B 11 1
B 10 8
From the data above, I want to get the following data;
Class Value month
A 10 1
A NA 2
A 12 3
A NA 4
....
A 9 12
B 11 1
B NA 2
....
B 10 8
B NA 9
....
B NA 12
So I want to have all possible cells with through month from 1 to 12;
How can I do this? I'm right now trying it with merge function, but appreciate any other ways to approach.
We can use tidyverse
library(tidyverse)
df1 %>%
complete(Class, month = min(month):max(month)) %>%
select_(.dots = names(df1)) %>% #if we need to be in the same column order
as.data.frame() #if needed to convert to 'data.frame'
In base R using merge (where df is your data):
res <- data.frame(Class=rep(levels(df$Class), each=12), value=NA, month=1:12)
merge(df, res, by = c("Class", "month"), all.y = TRUE)[,c(1,3,2)]
# Class Value month
# 1 A 10 1
# 2 A NA 2
# 3 A 12 3
# 4 A NA 4
# 5 A NA 5
# 6 A NA 6
# 7 A NA 7
# 8 A NA 8
# 9 A NA 9
# 10 A NA 10
# 11 A NA 11
# 12 A 9 12
# 13 B 11 1
# 14 B NA 2
# 15 B NA 3
# 16 B NA 4
# 17 B NA 5
# 18 B NA 6
# 19 B NA 7
# 20 B 10 8
# 21 B NA 9
# 22 B NA 10
# 23 B NA 11
# 24 B NA 12
df <- structure(list(Class = structure(c(1L, 1L, 1L, 2L, 2L), .Label = c("A",
"B"), class = "factor"), Value = c(10L, 12L, 9L, 11L, 10L), month = c(1L,
3L, 12L, 1L, 8L)), .Names = c("Class", "Value", "month"), class = "data.frame", row.names = c(NA,
-5L))
To add to #akrun's answer, if you want to replace the NA values with 0, you can do the following:
library(dplyr)
library(tidyr)
df1 %>%
complete(Class, month = min(month):max(month)) %>%
mutate(Value = ifelse(is.na(Value),0,Value))

Resources