Merge two rows in a dataframe in R - r

I am trying to merge rows in my data.frame based on <NA> value.
Here is my data frame.
new <- data.frame (
Location = c(rep("Loc 1", 4), rep("Loc 2", 4)),
Place = c("Powder Springs_Original", "Bridge_Other County", "Airport", "County1", "City 4 - Duplicated", "South", "County2", "Formal place"),
Val1 = c(109, 123, NA, 117, 143, NA, 151, 142),
Val2 = c(102, 115, NA, 45, 135, NA, 144, 125),
Val3 = c(99, 112, NA, 26, 127, NA, 140, 132),
Val4 = c(90, 103, NA, 57, 125, NA, 135, 201))
I am expecting something like,
Location Place Val1 Val2 Val3 Val4
Loc 1 Powder Springs - Original 109 102 99 90
Loc 1 Bridge _ Other County 123 115 112 103
Loc 1 Airport County1 117 45 26 57
Loc 2 City 4 - Duplicated 143 135 127 125
Loc 2 South County2 151 144 140 135
Loc 2 Formal place 142 125 132 201
I want to remove the NA rows and merge data with the next row. Location for these values is same. Can someone please help me here.
Thanks in advance.

First off, you shouldn't be using new as your variable name since it's a built-in R function. Second, you could do something like this:
# Find which rows are NA
na_rows <- which(apply(new, 1, function(x) all("NA" == (x[paste0('Val', 1:4)]))))
# Set correct place names
new$Place <- as.character(new$Place)
new$Place[na_rows + 1] <- paste(new$Place[na_rows], new$Place[na_rows + 1])
# Remove NAs
new <- new[-na_rows, ]
# Location Place Val1 Val2 Val3 Val4
# 1 Loc 1 Powder Springs_Original 109 102 99 90
# 2 Loc 1 Bridge_Other County 123 115 112 103
# 4 Loc 1 Airport County1 117 45 26 57
# 5 Loc 2 City 4 - Duplicated 143 135 127 125
# 7 Loc 2 South County2 151 144 140 135
# 8 Loc 2 Formal place 142 125 132 201

(edited as the initial answer was incomplete)
nu <- data.frame (
Location = c(rep("Loc 1", 4), rep("Loc 2", 4)),
Place = c("Powder Springs_Original", "Bridge_Other County", "Airport", "County1", "City 4 - Duplicated", "South", "County2", "Formal place"),
Val1 = c(109, 123, NA, 117, 143, NA, 151, 142),
Val2 = c(102, 115, NA, 45, 135, NA, 144, 125),
Val3 = c(99, 112, NA, 26, 127, NA, 140, 132),
Val4 = c(90, 103, NA, 57, 125, NA, 135, 201), stringsAsFactors=FALSE)
# notice stringsAsFactors = FALSE
# if there was justice in the world, it should be FALSE by default in R
# in any case, nu$Place should be character rather than factor so in real data
# you may need to do nu$Place <- as.character(nu$Place)
ic <- which(!complete.cases(nu))
nu$Place[ic-1] <- paste(nu$Place[ic-1], nu$Place[ic])
nu <- nu[-ic,]
Does this do what you need?

Thanks for your help and support. After lot of trails, I got the below required output. (As suggested by #Robert Krzyzanowski, I renamed my data.frame to Test).
This is what I did. Please suggest, if anything weird is observed.
> new_DF <- subset(Test, is.na(Test$Val1))
> new_DF
Location Place Val1 Val2 Val3 Val4
3 Loc 1 Airport NA NA NA NA
6 Loc 2 South NA NA NA NA
>
> row.names(new_DF)
[1] "3" "6"
> x.num <- as.numeric(row.names(new_DF))
>
> Test$Place <- as.character(Test$Place)
> Test$Place[x.num + 1] <- paste(Test$Place[x.num], Test$Place[x.num + 1])
> Test <- Test[-x.num, ]
> Test
Location Place Val1 Val2 Val3 Val4
1 Loc 1 Powder Springs_Original 109 102 99 90
2 Loc 1 Bridge_Other County 123 115 112 103
4 Loc 1 Airport County1 117 45 26 57
5 Loc 2 City 4 - Duplicated 143 135 127 125
7 Loc 2 South County2 151 144 140 135
8 Loc 2 Formal place 142 125 132 201
Once again, thank you all for your support and your time for looking into this.

Related

Find the Maximum Value with respect to another within two data frames (VLOOKUP which returns Max Value) in R

I am trying to find the do a function which is similar to a vlookup in excel but which returns the maximum value and the other values in the same row.
The data frame looks like this:
The data frame which I am dealing with are given below:
dput(Book3)
structure(list(Item = c("ABA", "ABB", "ABC", "ABD", "ABE", "ABF"
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-6L))
dput(Book4)
structure(list(Item = c("ABA", "ABB", "ABC", "ABD", "ABE", "ABF",
"ABA", "ABB", "ABC", "ABD", "ABE", "ABF", "ABA", "ABB", "ABC",
"ABD", "ABE", "ABF"), Max1 = c(12, 68, 27, 17, 74, 76, 78, 93,
94, 98, 46, 90, 5, 58, 67, 64, 34, 97), Additional1 = c(40, 66,
100, 33, 66, 19, 8, 70, 21, 93, 48, 34, 44, 89, 74, 20, 0, 47
), Additional2 = c(39, 31, 85, 58, 0, 2, 57, 28, 31, 32, 15,
22, 93, 41, 57, 81, 95, 46)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -18L))
The Expected output for this is given below:
You are looking for slice_max:
library(dplyr)
Book4 %>%
group_by(Item) %>%
slice_max(Max1)
# Item Max1 Additional1 Additional2
# 1 ABA 78 8 57
# 2 ABB 93 70 28
# 3 ABC 94 21 31
# 4 ABD 98 93 32
# 5 ABE 74 66 0
# 6 ABF 97 47 46
Using base R
subset(Book4, Max1 == ave(Max1, Item, FUN = max))
-output
# A tibble: 6 × 4
Item Max1 Additional1 Additional2
<chr> <dbl> <dbl> <dbl>
1 ABE 74 66 0
2 ABA 78 8 57
3 ABB 93 70 28
4 ABC 94 21 31
5 ABD 98 93 32
6 ABF 97 47 46
An alternative base solution that is more resilient to floating-point precision problems (c.f., Why are these numbers not equal?, https://cran.r-project.org/doc/FAQ/R-FAQ.html#Why-doesn_0027t-R-think-these-numbers-are-equal_003f). It also allows two behavior options if there are duplicate max-values:
if you want all of them, use ties.method = "min";
if you want the first (or just one) of them, then ties.method = "first".
Book4[ave(Book4$Max1, Book4$Item, FUN = function(z) rank(-z, ties.method = "first")) == 1,]
# # A tibble: 6 x 4
# Item Max1 Additional1 Additional2
# <chr> <dbl> <dbl> <dbl>
# 1 ABE 74 66 0
# 2 ABA 78 8 57
# 3 ABB 93 70 28
# 4 ABC 94 21 31
# 5 ABD 98 93 32
# 6 ABF 97 47 46
Using R base aggregate + max + merge
> merge(Book4, aggregate(Max1~Item, data = Book4, max), by = c("Item", "Max1"))
Item Max1 Additional1 Additional2
1 ABA 78 8 57
2 ABB 93 70 28
3 ABC 94 21 31
4 ABD 98 93 32
5 ABE 74 66 0
6 ABF 97 47 46

sum cells across similar data frames within a list in R, by data frames names

I have a list of data frames that look like this:
df1_BC <- data.frame(name=c("name1", "name2", "name3"),
year1=c(23, 45, 54),
year2=c(54, 23, 79),
year3=c(67, 29, 76))
df2_BC <- data.frame(name=c("name1", "name2", "name3"),
year1=c(93, 32, 56),
year2=c(82, 96, 72),
year3=c(54, 76, 19))
df3_BC <- data.frame(name=c("name1", "name2", "name3"),
year1=c(83, 41, 92),
year2=c(76, 73, 65),
year3=c(63, 62, 95))
df1_BA <- data.frame(name=c("name1", "name2", "name3", "name4"),
year1=c(23, 35, 54, 41),
year2=c(84, 23, 79, 69),
year3=c(97, 29, 76, 0))
df2_BA <- data.frame(name=c("name1", "name2", "name3", "name4"),
year1=c(93, 32, 56, 64),
year2=c(82, 96, 53, 0),
year3=c(54, 76, 19, 3))
df3_BA <- data.frame(name=c("name1", "name2", "name3", "name4"),
year1=c(83, 41, 92, 5),
year2=c(76, 3, 65, 82),
year3=c(3, 62, 95, 6))
list_dfs <- list(df1_BC, df2_BC, df3_BC, df1_BA, df2_BA, df3_BA)
As you can see, dataframes with the same sufix ('BA' or 'BC') have the same columns and number of rows.
What I want to do is to sum across the cells of the two groups of dataframes (the ones with the 'AB' suffix and the ones with the 'BC' suffix).
If I do it on the dataframes alone, without listing them, I get the expected result:
result_BA <- df1_BA[,-1] + df2_BA[,-1] + df3_BA[,-1]
result_BC <- df1_BC[,-1] + df2_BC[,-1] + df3_BC[,-1]
print(result_BA)
year1 year2 year3
1 199 242 154
2 108 122 167
3 202 197 190
4 110 151 9
As you can also see, is necessary to keep the name column away to do the sum. EDIT: Then I would like to put it back. Something like this:
result_BA <- cbind(df1_BA[,-1], result_BA)
To have column of names added back to each corresponding dataframe in the list.
This is a simplified example from much larger lists, so doing it as a list and matching the dataframes to add up by suffix really simplifies the task.
Thanks!
The list didn't have any names. We need to construct with names one option is to create a named list, split the list by the substring of the names, and use Reduce to + the inner list elements
list_dfs <- list(df1_BC = df1_BC, df2_BC = df2_BC, df3_BC = df3_BC,
df1_BA = df1_BA, df2_BA = df2_BA, df3_BA = df3_BA)
lapply(split(list_dfs, sub(".*_", "", names(list_dfs))),
\(x) Reduce(`+`, lapply(x, `[`, -1)))
-output
$BA
year1 year2 year3
1 199 242 154
2 108 122 167
3 202 197 190
4 110 151 9
$BC
year1 year2 year3
1 199 212 184
2 118 192 167
3 202 216 190
Or this may be done with tidyverse using a group by approach
library(dplyr)
library(tidyr)
library(data.table)
list_dfs <- lst(df1_BC, df2_BC, df3_BC, df1_BA, df2_BA, df3_BA)
bind_rows(list_dfs, .id = 'name') %>%
separate(name, into = c("name1", "name2")) %>%
mutate(grp = rowid(name1, name2)) %>%
group_by(name2, grp) %>%
summarise(across(where(is.numeric), sum), .groups = "drop") %>%
select(-grp)
-output
# A tibble: 7 × 4
name2 year1 year2 year3
<chr> <dbl> <dbl> <dbl>
1 BA 199 242 154
2 BA 108 122 167
3 BA 202 197 190
4 BA 110 151 9
5 BC 199 212 184
6 BC 118 192 167
7 BC 202 216 190

Replacing elements in a column of a dataframe by using regular expressions in R

df is a test dataframe and is a subset of my original dataframe which has ~1000000 rows and 21 columns.
df <- data.frame(
Hits = c("# test1", "# Query: tr|A4I9M8|A4I9M8_LEIIN", "# 13135", "tr|E9BQL4|E9BQL4_LEIDB",
"tr|A4I9M8|A4I9M8_LEIIN", "tr|A0A3Q8IUE6|A0A3Q8IUE6_LEIDO", "tr|Q4Q3E9|Q4Q3E9_LEIMA",
"tr|A0A640KX53|A0A640KX53_LEITA", "# test2", "# Query: tr|E9AH01|E9AH01_LEIIN", "# 83771",
"tr|A0A6L0XNG2|A0A6L0XNG2_LEIIN", "tr|E9AH01|E9AH01_LEIIN", "tr|A0A6J8FCW4|A0A6J8FCW4_LEIDO",
"tr|A0A6J8FCW4|A0A6J8FCW4_LEIDO"),
Category1 = c(NA, NA, NA, 0.001, 0.001, 0.002, 0.003, 0.003, NA, NA, NA, 0.023, 0.341, 0.341, 0.569),
Category2 = c(NA, NA, NA, 100, 100, 99, 98, 98, NA, NA, NA, 100, 95, 95, 97),
Category3 = c(NA, NA, NA, 100, 100, 99, 98, 98, NA, NA, NA, 98, 97, 97, 92))
df looks something like this
In the Hits column, the elements which don't start with a # are to be replaced by the portion lying between the first two occurrences of |. The regular expression which I came up with to extract this portion is
^.*?(\\b[A-Z][^|]*).*
The output should look like this
I can't seem to figure out how to replace the elements with the extracted portions. I can think of using conditional loops in this case. But considering the size of the original dataframe, I'm not sure if that would be an efficient way to deal with this as loops tend to be slower in R. Can somebody suggest an alternative way, preferably a vectorized solution to solve this issue?
You can use gsub() inside mutate() to do the job.
library(tidyverse)
# my original answer
df %>% mutate(Hits = gsub("^[^#].+?((?<=\\|).+?(?=\\|)).*", "\\1", Hits, perl = T))
Or
# OP's regex
df %>% mutate(Hits = gsub("^[^#].*?(\\b[A-Z][^\\|]*).*", "\\1", Hits, perl = T))
Both generate the same output.
Output
# A tibble: 15 x 4
Hits Category1 Category2 Category3
<chr> <dbl> <dbl> <dbl>
1 # test1 NA NA NA
2 # Query: tr|A4I9M8|A4I9M8_LEIIN NA NA NA
3 # 13135 NA NA NA
4 E9BQL4 0.001 100 100
5 A4I9M8 0.001 100 100
6 A0A3Q8IUE6 0.002 99 99
7 Q4Q3E9 0.003 98 98
8 A0A640KX53 0.003 98 98
9 # test2 NA NA NA
10 # Query: tr|E9AH01|E9AH01_LEIIN NA NA NA
11 # 83771 NA NA NA
12 A0A6L0XNG2 0.023 100 98
13 E9AH01 0.341 95 97
14 A0A6J8FCW4 0.341 95 97
15 A0A6J8FCW4 0.569 97 92

How to remove some variables in R with names that are dates

I have a dataset in R where some of the variable names are dates, see a simplified example of the input data below (in Excel):
What I want to do with this data is to remove some of the columns with names that are dates that are older than or equal to a certain date, e.g. 2019-01-31. See a simplified example of the desired output data below (in Excel):
Now, I am able to achieve this by transposing the data, filtering out rows with a date lower than or equal to 31 January 2019 and finally transposing the data back. However I am wondering whether there is a different way to do this using just the column names without pivoting back and forth?
# Example data to copy and paste into R for easy reproduction of problem:
df <- data.frame (id = c("apples", "pears", "grapes", "tomatoes", "carrots", "cucumber", "rabbit", "cat", "dog"),
type = c("fruit", "fruit", "fruit", "veggies", "veggies", "veggies", "pets", "pets", "pets"),
color = c("red", "green", "purple", "red", "orange", "green", "grey", "black", "brown"),
'2019-04-30' = c(353, 91, 270, 2029, 107, 62, 30, 61, 137),
'2019-03-31' = c(349, 90, 267, 2028, 104, 60, 29, 59, 133),
'2019-02-28' = c(345, 89, 264, 2027, 101, 58, 28, 57, 129),
'2019-01-31' = c(341, 88, 261, 2026, 98, 56, 27, 55, 125),
'2018-12-31' = c(337, 87, 258, 2025, 95, 54, 26, 53, 121),
'2018-11-30' = c(333, 86, 255, 2024, 92, 52, 25, 51, 117),
check.names = FALSE)
We can do this in base R. Your dates are conveniently in YYYY-MM-DD format, which means they will be ordered correctly by the >= and <= operators. We can also use a simple regex to preserve any columns that are not in date format:
df[!grepl('\\d{4}-\\d{2}-\\d{2}', colnames(df)) | colnames(df) >= '2019-02-28']
id type color 2019-04-30 2019-03-31 2019-02-28
1 apples fruit red 353 349 345
2 pears fruit green 91 90 89
3 grapes fruit purple 270 267 264
4 tomatoes veggies red 2029 2028 2027
5 carrots veggies orange 107 104 101
6 cucumber veggies green 62 60 58
7 rabbit pets grey 30 29 28
8 cat pets black 61 59 57
9 dog pets brown 137 133 129
The approach is as follows:
extract the column names
transform to Date if possible and NA if not date like
create boolean vector to filter too old dates and non dates (i.e. NAs in the step before) columns
Sample Data
## sample data frame
m <- matrix(1, 3, 10)
colnames(m) <- c("a", "b", as.character(seq.Date(as.Date("2021-1-1"), length.out = 8, by = "days")))
(d <- as.data.frame(m))
# a b 2021-01-01 2021-01-02 2021-01-03 2021-01-04 2021-01-05 2021-01-06 2021-01-07 2021-01-08
# 1 1 1 1 1 1 1 1 1 1 1
# 2 1 1 1 1 1 1 1 1 1 1
# 3 1 1 1 1 1 1 1 1 1 1
Filter
r <- vapply(names(d), as.Date, numeric(1), optional = TRUE)
d[, is.na(r) | r <= as.Date("2021-1-3")]
# a b 2021-01-01 2021-01-02 2021-01-03
# 1 1 1 1 1 1
# 2 1 1 1 1 1
# 3 1 1 1 1 1
r <- vapply(names(df), as.Date, numeric(1), optional = TRUE)
df[, is.na(r) | r >= as.Date("2019-1-31")]
# id type color 2019-04-30 2019-03-31 2019-02-28 2019-01-31
# 1 apples fruit red 353 349 345 341
# 2 pears fruit green 91 90 89 88
# 3 grapes fruit purple 270 267 264 261
# 4 tomatoes veggies red 2029 2028 2027 2026
# 5 carrots veggies orange 107 104 101 98
# 6 cucumber veggies green 62 60 58 56
# 7 rabbit pets grey 30 29 28 27
# 8 cat pets black 61 59 57 55
# 9 dog pets brown 137 133 129 125
Description
One can re-shape the data to the long format and filter based on the date column.
Data
Same data as provided in the example
df <- data.frame (id = c("apples", "pears", "grapes", "tomatoes", "carrots", "cucumber", "rabbit", "cat", "dog"),
type = c("fruit", "fruit", "fruit", "veggies", "veggies", "veggies", "pets", "pets", "pets"),
color = c("red", "green", "purple", "red", "orange", "green", "grey", "black", "brown"),
'2019-04-30' = c(353, 91, 270, 2029, 107, 62, 30, 61, 137),
'2019-03-31' = c(349, 90, 267, 2028, 104, 60, 29, 59, 133),
'2019-02-28' = c(345, 89, 264, 2027, 101, 58, 28, 57, 129),
'2019-01-31' = c(341, 88, 261, 2026, 98, 56, 27, 55, 125),
'2018-12-31' = c(337, 87, 258, 2025, 95, 54, 26, 53, 121),
'2018-11-30' = c(333, 86, 255, 2024, 92, 52, 25, 51, 117),
check.names = FALSE)
Solution
library(dplyr)
library(tidyr)
df %>%
tidyr::pivot_longer(cols = !c(id, type, color), names_to = 'date', values_to = 'value') %>%
dplyr::mutate(date = as.Date(date, format = '%Y-%m-%d')) %>%
dplyr::filter( date >= as.Date('2019-01-31')) %>%
tidyr::pivot_wider(names_from = 'date', values_from = 'value')
Desired output
id type color `2019-04-30` `2019-03-31` `2019-02-28` `2019-01-31`
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 apples fruit red 353 349 345 341
2 pears fruit green 91 90 89 88
3 grapes fruit purple 270 267 264 261
4 tomatoes veggies red 2029 2028 2027 2026
5 carrots veggies orange 107 104 101 98
6 cucumber veggies green 62 60 58 56
7 rabbit pets grey 30 29 28 27
8 cat pets black 61 59 57 55
9 dog pets brown 137 133 129 125

Splitting one column into two columns using data wrangling with R

I would really appreciate your help in using R for data wrangling. I have a data where I want to split one column (variable) into two whenever applicable as conditioned by other variables. For example, as per the sample below, the data represents reactions time measures (RT1 and RT2) of some words (item) that appear in different times of reading (block). I want to see if RT1 and RT2 values in block 3, 4, and 5 are correlated with RT1 and RT2 values of the same item at block 1. The target items that appeared in block 1 and re-appeared in subsequent blocks are coded as 'EI' in the column 'condition', whereas items coded as 'E' or 'I' appeared only once.
dput(d1)
structure(list(RECORDING_SESSION_LABEL = c(26, 26, 26, 26, 26,
26, 26, 26), RT1 = c(5171, 3857, 3447, 314, 460, 731, 957, 1253
), RT2 = c(357, 328, 122, 39, 86, 132, 173, 215), item = c("foreign",
"detailed", "large", "foreign", "foreign", "large", "large",
"disputable"), block = c(1, 1, 1, 3, 4, 3, 4, 3), condition = c("EI",
"E", "EI", "EI", "EI", "EI", "EI", "I")), row.names = c(NA, -8L
), class = c("tbl_df", "tbl", "data.frame"))
Where a sample of the data would look like this:
> d1
# A tibble: 8 x 6
RECORDING_SESSION_LABEL RT1 RT2 item block condition
<dbl> <dbl> <dbl> <chr> <dbl> <chr>
1 26 5171 357 foreign 1 EI
2 26 3857 328 detailed 1 E
3 26 3447 122 large 1 EI
4 26 314 39 foreign 3 EI
5 26 460 86 foreign 4 EI
6 26 731 132 large 3 EI
7 26 957 173 large 4 EI
8 26 1253 215 disputable 3 I
In order to present in a format that R would understand, the target data frame I want to achieve would be similar to the one below (where the highlighted columns should be added). Rows in blanks at these columns represent items which do not appear repetitively (condition is not coded as 'EI') ; therefore, they are irrelevant and should be coded as 'NA'.
dput(d2)
structure(list(RECORDING_SESSION_LABEL = c(26, 26, 26, 26, 26,
26, 26, 26), `RT 1` = c(5171, 3857, 3447, 314, 460, 731, 957,
1253), RT2 = c(357, 328, 122, 39, 86, 132, 173, 215), item = c("foreign",
"detailed", "large", "foreign", "foreign", "large", "large",
"disputable"), block = c(1, 1, 1, 3, 4, 3, 4, 3), condition = c("EI",
"E", "EI", "EI", "EI", "EI", "EI", "I"), `RT 1_at_block1` = c(NA,
NA, NA, 5171, 5171, 3447, 3447, NA), RT2_at_block1 = c(NA, NA,
NA, 357, 357, 122, 122, NA)), row.names = c(NA, -8L), class = c("tbl_df",
"tbl", "data.frame"))
And a sample of the data format targeted would look like this:
> d2
# A tibble: 8 x 8
RECORDING_SESSI~ `RT 1` RT2 item block condition `RT 1_at_block1`
<dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
1 26 5171 357 fore~ 1 EI NA
2 26 3857 328 deta~ 1 E NA
3 26 3447 122 large 1 EI NA
4 26 314 39 fore~ 3 EI 5171
5 26 460 86 fore~ 4 EI 5171
6 26 731 132 large 3 EI 3447
7 26 957 173 large 4 EI 3447
8 26 1253 215 disp~ 3 I NA
# ... with 1 more variable: RT2_at_block1 <dbl>
> head(d2)
# A tibble: 6 x 8
RECORDING_SESSION_LABEL `RT 1` RT2 item block condition `RT 1_at_block1` RT2_at_block1
<dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl>
1 26 5171 357 foreign 1 EI NA NA
2 26 3857 328 detailed 1 E NA NA
3 26 3447 122 large 1 EI NA NA
4 26 314 39 foreign 3 EI 5171 357
5 26 460 86 foreign 4 EI 5171 357
6 26 731 132 large 3 EI 3447 122
Thanks in advance for any help.
A possible solution using dplyr:
d1 <- structure(list(RECORDING_SESSION_LABEL = c(26, 26, 26, 26, 26, 26, 26, 26),
RT1 = c(5171, 3857, 3447, 314, 460, 731, 957, 1253),
RT2 = c(357, 328, 122, 39, 86, 132, 173, 215),
item = c("foreign", "detailed", "large", "foreign", "foreign", "large", "large", "disputable"),
block = c(1, 1, 1, 3, 4, 3, 4, 3), condition = c("EI", "E", "EI", "EI", "EI", "EI", "EI", "I")),
row.names = c(NA, -8L), class = c("tbl_df", "tbl", "data.frame"))
library(dplyr)
d2 <- d1 %>%
left_join(d1 %>% filter(block == 1) %>% select(RECORDING_SESSION_LABEL, item, RT1_at_block1 = RT1)) %>%
left_join(d1 %>% filter(block == 1) %>% select(RECORDING_SESSION_LABEL, item, RT2_at_block1 = RT2))
After that, d2 looks like this:
RECORDING_SESSION_LABEL RT1 RT2 item block condition RT1_at_block1 RT2_at_block1
<dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl>
1 26 5171 357 foreign 1 EI 5171 357
2 26 3857 328 detailed 1 E 3857 328
3 26 3447 122 large 1 EI 3447 122
4 26 314 39 foreign 3 EI 5171 357
5 26 460 86 foreign 4 EI 5171 357
6 26 731 132 large 3 EI 3447 122
Edit: Adding a mutate if you want to set the values for block 1 to NA:
d2 <- d1 %>%
left_join(d1 %>% filter(block == 1) %>% select(RECORDING_SESSION_LABEL, item, RT1_at_block1 = RT1)) %>%
left_join(d1 %>% filter(block == 1) %>% select(RECORDING_SESSION_LABEL, item, RT2_at_block1 = RT2)) %>%
mutate(RT1_at_block1 = ifelse(block == 1, NA, RT1_at_block1),
RT2_at_block1 = ifelse(block == 1, NA, RT2_at_block1))

Resources