I have a very complex problem, i hope someone can help -> i want to copy a row value (i.e. Player 1 or Player 2) into two other rows (for Player 3 and 4) if and only if these players are in the same Treatment, Group and Period AND this player was indeed picked (see column Player.Picked)
I know that with tidyverse I can group_by my columns of interest: Treatment, Group, and Period.
However, I am unsure how to proceed with the condition that Player Picked is fulfilled and then how to extract this value appropriately for the players 3 and 4 in the same treatment, group, period.
The column "extracted.Player 1/2 Value" should be the output. (I have manually provided the first four correct solutions).
Any ideas? Help would be very much appreciated. Thanks a lot in advance!
df
T Player Group Player.Picked Period Player1/2Value extracted.Player1/2Value
1 1 6 1 1 10
1 2 6 1 1 9
1 3 5 2 1 NA -> 4
1 4 6 1 1 NA -> 10
1 5 3 1 1 NA
1 1 5 2 1 8
1 2 1 0 1 7
1 3 6 1 1 NA -> 10
1 4 2 2 1 NA
1 5 2 2 1 NA
1 1 1 0 1 7
1 2 2 2 1 11
1 3 3 1 1 NA
1 4 4 1 1 NA
1 5 4 1 1 NA
1 1 2 2 1 21
1 2 4 1 1 17
1 3 1 0 1 NA
1 4 5 2 1 NA -> 4
1 5 6 1 1 NA
1 1 3 1 1 12
1 2 3 1 1 15
1 3 4 1 1 NA
1 4 1 0 1 NA
1 5 1 0 1 NA
1 1 4 1 1 11
1 2 5 2 1 4
1 3 2 2 1 NA
1 4 3 1 1 NA
1 5 5 2 1 NA
I'm not sure if I understood the required logic; here I'm assuming that Player 5 always picks Player 1 or 2 per Group.
So, here is my go at this using library(data.table):
library(data.table)
DT <- data.table::data.table(
check.names = FALSE,
T = c(1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L),
Player = c(1L,2L,3L,
4L,5L,1L,2L,3L,4L,5L,1L,2L,3L,4L,5L,
1L,2L,3L,4L,5L,1L,2L,3L,4L,5L,1L,
2L,3L,4L,5L),
Group = c(6L,6L,5L,
6L,3L,5L,1L,6L,2L,2L,1L,2L,3L,4L,4L,
2L,4L,1L,5L,6L,3L,3L,4L,1L,1L,4L,
5L,2L,3L,5L),
Player.Picked = c(1L,1L,2L,
1L,1L,2L,0L,1L,2L,2L,0L,2L,1L,1L,1L,
2L,1L,0L,2L,1L,1L,1L,1L,1L,0L,0L,
1L,2L,2L,2L),
Period = c(1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L),
`Player1/2Value` = c(10L,9L,NA,
NA,NA,8L,7L,NA,NA,NA,7L,11L,NA,NA,
NA,21L,17L,NA,NA,NA,12L,15L,NA,NA,NA,
11L,4L,NA,NA,NA),
`extracted.Player1/2Value` = c(NA,NA,4L,
10L,NA,NA,NA,10L,NA,NA,NA,NA,NA,NA,
NA,NA,NA,NA,4L,NA,NA,NA,NA,NA,NA,NA,
NA,NA,NA,NA)
)
setorderv(DT, cols = c("T", "Group", "Period", "Player"))
Player5PickedDT <- DT[Player == 5, Player.Picked, by = c("T", "Group", "Period")]
setnames(Player5PickedDT, old = "Player.Picked", new = "Player5Picked")
DT <- DT[Player5PickedDT, on = c("T", "Group", "Period")]
extractedDT <- DT[Player == Player5Picked & Player5Picked > 0, `Player1/2Value`, by = c("T", "Group", "Period")]
setnames(extractedDT, old = "Player1/2Value", new = "extractedValue")
DT[, "Player5Picked" := NULL]
DT <- extractedDT[DT, on = c("T", "Group", "Period")]
DT[, extractedValue := fifelse(Player %in% c(3, 4), yes = extractedValue, no = NA_real_)]
setcolorder(DT, c("T", "Group", "Period", "Player", "Player.Picked", "Player1/2Value", "extracted.Player1/2Value", "extractedValue"))
DT
The resulting table differs from your expected result (extracted.Player1/2Value vs extractedValue, but in my eyes it is following the explained logic):
T Group Period Player Player.Picked Player1/2Value extracted.Player1/2Value extractedValue
1: 1 1 1 1 0 7 NA NA
2: 1 1 1 2 0 7 NA NA
3: 1 1 1 3 0 NA NA NA
4: 1 1 1 4 1 NA NA NA
5: 1 1 1 5 0 NA NA NA
6: 1 2 1 1 2 21 NA NA
7: 1 2 1 2 2 11 NA NA
8: 1 2 1 3 2 NA NA 11
9: 1 2 1 4 2 NA NA 11
10: 1 2 1 5 2 NA NA NA
11: 1 3 1 1 1 12 NA NA
12: 1 3 1 2 1 15 NA NA
13: 1 3 1 3 1 NA NA 12
14: 1 3 1 4 2 NA NA 12
15: 1 3 1 5 1 NA NA NA
16: 1 4 1 1 0 11 NA NA
17: 1 4 1 2 1 17 NA NA
18: 1 4 1 3 1 NA NA 11
19: 1 4 1 4 1 NA NA 11
20: 1 4 1 5 1 NA NA NA
21: 1 5 1 1 2 8 NA NA
22: 1 5 1 2 1 4 NA NA
23: 1 5 1 3 2 NA 4 4
24: 1 5 1 4 2 NA 4 4
25: 1 5 1 5 2 NA NA NA
26: 1 6 1 1 1 10 NA NA
27: 1 6 1 2 1 9 NA NA
28: 1 6 1 3 1 NA 10 10
29: 1 6 1 4 1 NA 10 10
30: 1 6 1 5 1 NA NA NA
T Group Period Player Player.Picked Player1/2Value extracted.Player1/2Value extractedValue
I have a unbalanced data frame with date, localities and prices. I would like calculate diff price among diferents localities by date. My data its unbalanced and to get all diff price I think in create data(localities) to balance data.
My data look like:
library(dplyr)
set.seed(123)
df= data.frame(date=(1:3),
locality= rbinom(21,3, 0.2),
price=rnorm(21, 50, 20))
df %>%
arrange(date, locality)
> date locality price
1 1 0 60.07625
2 1 0 35.32994
3 1 0 63.69872
4 1 1 54.76426
5 1 1 66.51080
6 1 1 28.28602
7 1 2 47.09213
8 2 0 26.68910
9 2 1 100.56673
10 2 1 48.88628
11 2 1 48.29153
12 2 2 29.02214
13 2 2 45.68269
14 2 2 43.59887
15 3 0 60.98193
16 3 0 75.89527
17 3 0 43.30174
18 3 0 71.41221
19 3 0 33.62969
20 3 1 34.31236
21 3 1 23.76955
To get balanced data I think in:
> date locality price
1 1 0 60.07625
2 1 0 35.32994
3 1 0 63.69872
4 1 1 54.76426
5 1 1 66.51080
6 1 1 28.28602
7 1 2 47.09213
8 1 2 NA
9 1 2 NA
10 2 0 26.68910
10 2 0 NA
10 2 0 NA
11 2 1 100.56673
12 2 1 48.88628
13 2 1 48.29153
14 2 2 29.02214
15 2 2 45.68269
16 2 2 43.59887
etc...
Finally to get diff price beetwen pair localities I think:
> date diff(price, 0-1) diff(price, 0-2) diff(price, 1-2)
1 1 60.07625-54.76426 60.07625-47.09213 etc...
2 1 35.32994-66.51080 35.32994-NA
3 1 63.69872-28.28602 63.69872-NA
You don't need to balance your data. If you use dcast, it will add the NAs for you.
First transform the data to show individual columns for each locality
library(data.table)
library(tidyverse)
setDT(df)
df[, rid := rowid(date, locality)]
df2 <- dcast(df, rid + date ~ locality, value.var = 'price')
# rid date 0 1 2
# 1: 1 1 60.07625 54.76426 47.09213
# 2: 1 2 26.68910 100.56673 29.02214
# 3: 1 3 60.98193 34.31236 NA
# 4: 2 1 35.32994 66.51080 NA
# 5: 2 2 NA 48.88628 45.68269
# 6: 2 3 75.89527 23.76955 NA
# 7: 3 1 63.69872 28.28602 NA
# 8: 3 2 NA 48.29153 43.59887
# 9: 3 3 43.30174 NA NA
# 10: 4 3 71.41221 NA NA
# 11: 5 3 33.62969 NA NA
Then create a data frame to_diff of differences to calculate, and pmap over that to calculate the differences. Here c0_1 corresponds to what you call in your question diff(price, 0-1).
to_diff <- CJ(0:2, 0:2)[V1 < V2]
pmap(to_diff, ~ df2[[as.character(.x)]] - df2[[as.character(.y)]]) %>%
setNames(paste0('c', to_diff[[1]], '_', to_diff[[2]])) %>%
bind_cols(df2[, 1:2])
# A tibble: 11 x 5
# c0_1 c0_2 c1_2 rid date
# <dbl> <dbl> <dbl> <int> <int>
# 1 5.31 13.0 7.67 1 1
# 2 -73.9 -2.33 71.5 1 2
# 3 26.7 NA NA 1 3
# 4 -31.2 NA NA 2 1
# 5 NA NA 3.20 2 2
# 6 52.1 NA NA 2 3
# 7 35.4 NA NA 3 1
# 8 NA NA 4.69 3 2
# 9 NA NA NA 3 3
# 10 NA NA NA 4 3
# 11 NA NA NA 5 3
I would greatly appreciate a solution to defined problem below; I think it's a very difficult one.
I join 2 data.frames t1 and t2 using merge(). In the resulting data.frame, which I named "testing", I want to replace the entries of the non-unique rows originating from t1 with "NA" so that only unique rows remain that have the closest distance to t2. The condition is:
min(sqrt((xCor.y - xCor.x)^2 + (yCor.y - yCor.x)^2))
# scroll to end for result I am looking for
This is meant for ~1GB data set, so I have to avoid looping through all data.
t1<- data.frame(trackLabel = c(1, 2, 3, 4, 4, 5, 5, 7, 7, 7),
objNumber = 1:10,
parentObjNumber = rep(0, 10),
time = rep(1,10),
xCor = runif(10),
yCor = runif(10))
t2<- data.frame(trackLabel = c(1, 2, 2, 4, 4, 4, 6, 7, 7, 7, 7),
objNumber = 11:21,
parentObjNumber = c(1, 2, 2, 4, 4, 4, 7, 8, 9, 9, 9),
time = rep(2,11),
xCor = runif(11),
yCor = runif(11))
testing<- merge(t1, t2,
by.x = c("trackLabel", "objNumber"),
by.y = c("trackLabel", "parentObjNumber"),
all = TRUE,
incomparables = NA)
#Warning message:
#In merge.data.frame(t1, t2, by.x = c("trackLabel", "objNumber"), :
# column name ‘objNumber’ is duplicated in the result
ind<-colnames(testing)=="objNumber"
colnames(testing)[min(which(ind == TRUE )) ] <- paste("objNumber", 1, sep = "")
> t1
trackLabel objNumber parentObjNumber time xCor yCor
1 1 1 0 1 0.25852366 0.360631607
2 2 2 0 1 0.69987607 0.048360258
3 3 3 0 1 0.23047883 0.414221880
4 4 4 0 1 0.58169548 0.718223111
5 4 5 0 1 0.61419336 0.435153774
6 5 6 0 1 0.50028765 0.735970291
7 5 7 0 1 0.41380332 0.097256739
8 7 8 0 1 0.57563080 0.828142024
9 7 9 0 1 0.39512092 0.728903233
10 7 10 0 1 0.16675690 0.284307824
> t2
trackLabel objNumber parentObjNumber time xCor yCor
1 1 11 1 2 0.473735625 0.454637752
2 2 12 2 2 0.623971860 0.517089522
3 2 13 2 2 0.470885840 0.703872484
4 4 14 4 2 0.188280842 0.678683831
5 4 15 4 2 0.198772198 0.160836676
6 4 16 4 2 0.251950005 0.958747183
7 6 17 7 2 0.545521560 0.005505346
8 7 18 8 2 0.477450908 0.819060935
9 7 19 9 2 0.509430458 0.997968108
10 7 20 9 2 0.027918865 0.138014769
11 7 21 9 2 0.568532497 0.911921770
> testing
trackLabel objNumber1 parentObjNumber time.x xCor.x yCor.x objNumber time.y xCor.y yCor.y
1 1 1 0 1 0.25852366 0.360631607 11 2 0.473735625 0.454637752
2 2 2 0 1 0.69987607 0.048360258 12 2 0.623971860 0.517089522
3 2 2 0 1 0.69987607 0.048360258 13 2 0.470885840 0.703872484
4 3 3 0 1 0.23047883 0.414221880 NA NA NA NA
5 4 4 0 1 0.58169548 0.718223111 14 2 0.188280842 0.678683831
6 4 4 0 1 0.58169548 0.718223111 15 2 0.198772198 0.160836676
7 4 4 0 1 0.58169548 0.718223111 16 2 0.251950005 0.958747183
8 4 5 0 1 0.61419336 0.435153774 NA NA NA NA
9 5 6 0 1 0.50028765 0.735970291 NA NA NA NA
10 5 7 0 1 0.41380332 0.097256739 NA NA NA NA
11 6 7 NA NA NA NA 17 2 0.545521560 0.005505346
12 7 8 0 1 0.57563080 0.828142024 18 2 0.477450908 0.819060935
13 7 9 0 1 0.39512092 0.728903233 19 2 0.509430458 0.997968108
14 7 9 0 1 0.39512092 0.728903233 20 2 0.027918865 0.138014769
15 7 9 0 1 0.39512092 0.728903233 21 2 0.568532497 0.911921770
16 7 10 0 1 0.16675690 0.284307824 NA NA NA NA
# and here is what I want to achieve:
> testing[c(3, 6, 7, 13, 14 ), 1:6] <-NA
> testing
trackLabel objNumber1 parentObjNumber time.x xCor.x yCor.x objNumber time.y xCor.y yCor.y
1 1 1 0 1 0.25852366 0.360631607 11 2 0.473735625 0.454637752
2 2 2 0 1 0.69987607 0.048360258 12 2 0.623971860 0.517089522
3 NA NA NA NA NA NA 13 2 0.470885840 0.703872484
4 3 3 0 1 0.23047883 0.414221880 NA NA NA NA
5 4 4 0 1 0.58169548 0.718223111 14 2 0.188280842 0.678683831
6 NA NA NA NA NA NA 15 2 0.198772198 0.160836676
7 NA NA NA NA NA NA 16 2 0.251950005 0.958747183
8 4 5 0 1 0.61419336 0.435153774 NA NA NA NA
9 5 6 0 1 0.50028765 0.735970291 NA NA NA NA
10 5 7 0 1 0.41380332 0.097256739 NA NA NA NA
11 6 7 NA NA NA NA 17 2 0.545521560 0.005505346
12 7 8 0 1 0.57563080 0.828142024 18 2 0.477450908 0.819060935
13 NA NA NA NA NA NA 19 2 0.509430458 0.997968108
14 NA NA NA NA NA NA 20 2 0.027918865 0.138014769
15 7 9 0 1 0.39512092 0.728903233 21 2 0.568532497 0.911921770
16 7 10 0 1 0.16675690 0.284307824 NA NA NA NA