Related
for example there is table have 100 pairs of x and y value, how can I create matrix of joint distribution P[x,y] in r. I tried to solve it, but no idea how to start
For example, lets derive x and y from categorical distributions like
library(extraDistr)
x <-rcat(100,rep(1,6)/6)
y <- rcat(100, rep(1,4)/4)
z <- cbind(x,y)
where z is joint sample. Then,
library(dplyr)
z %>%
as.data.frame %>%
group_by(x,y) %>%
table()/dim(z)[1]
y
x 1 2 3 4
1 0.02 0.06 0.01 0.02
2 0.04 0.05 0.06 0.08
3 0.06 0.03 0.04 0.03
4 0.02 0.03 0.04 0.05
5 0.04 0.04 0.07 0.02
6 0.07 0.04 0.01 0.07
gives you an sample joint distributions for discrete variables.
With #Alex's data
dummy <- read.csv("x.csv")
head(dummy)
x y
1 3 3
2 1 3
3 1 3
4 2 2
5 1 4
6 3 4
Apply same code above.
dummy %>%
as.data.frame %>%
group_by(x,y) %>%
table()/dim(z)[1]
y
x 1 2 3 4
1 0.03 0.11 0.20 0.09
2 0.01 0.09 0.16 0.04
3 0.03 0.10 0.11 0.03
Here is an approach using base R. First provide reproducible data using dput()
x <- c(3, 1, 1, 2, 1, 3, 2, 3, 2, 2, 2, 3, 3, 1, 1, 2, 1, 2, 1, 1,
2, 3, 1, 2, 3, 2, 2, 2, 3, 1, 2, 2, 2, 1, 1, 3, 3, 1, 2, 3, 1,
1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1,
3, 1, 2, 2, 3, 3, 1, 1, 3, 3, 2, 3, 2, 1, 2, 3, 3, 2, 1, 3, 2,
1, 2, 1, 1, 2, 2, 3, 3, 3, 1, 2, 1, 1, 3, 1, 2, 2)
y <- c(3, 3, 3, 2, 4, 4, 2, 4, 3, 2, 2, 1, 2, 3, 3, 2, 3, 3, 2, 4,
3, 2, 3, 3, 3, 1, 3, 3, 1, 4, 2, 4, 2, 3, 3, 3, 3, 3, 4, 3, 4,
4, 3, 2, 2, 2, 3, 2, 4, 4, 4, 3, 2, 3, 2, 3, 3, 2, 3, 3, 4, 3,
3, 3, 3, 4, 2, 2, 3, 2, 3, 1, 3, 2, 3, 3, 3, 2, 3, 3, 2, 2, 4,
1, 3, 3, 2, 2, 3, 2, 4, 2, 2, 3, 1, 2, 3, 1, 3, 3)
Then tabulate:
(tbl <- table(x, y))
# y
# x 1 2 3 4
# 1 3 11 20 9
# 2 1 9 16 4
# 3 3 10 11 3
(tbl.prp <- prop.table(tbl))
# y
# x 1 2 3 4
# 1 0.03 0.11 0.20 0.09
# 2 0.01 0.09 0.16 0.04
# 3 0.03 0.10 0.11 0.03
sum(tbl)
# [1] 100
Here's an example dataset.
structure(list(vector1 = c(1, 4, 4, 2, 1, 3, 2, 3, 4, 5, 3, 5,
5, 1, 4, 2, 4, 5, 2, 5), vector2 = c(4, 2, 3, 5, 3, 5, 2, 2,
3, 3, 4, 1, 4, 1, 2, 1, 2, 1, 1, 2)), class = "data.frame", row.names = c(NA,
-20L))
Basically what I'm trying to do is create a new variable 'Direction' based on differences between these numbers. I want to say something like:
if vector2 == vector1 or vector2 == vector1 +/- 1 than Direction == 'NS'
if vector2 < vector1 -1 or if vector 2 > vector1 + 1 than Direction == 'EW'
Hopefully this makes sense. Thanks!
A similar solution is this (slightly simpler):
Data:
df <- data.frame(
vector1 = c(1, 4, 4, 2, 1, 3, 2, 3, 4, 5, 3, 5, 5, 1, 4, 2, 4, 5, 2, 5),
vector2 = c(4, 2, 3, 5, 3, 5, 2, 2, 3, 3, 4, 1, 4, 1, 2, 1, 2, 1, 1, 2)
)
Desired new column:
df$direction <- ifelse(df$vector1==vector2 |
df$vector1==vector2 + 1 |
df$vector1==vector2 - 1, "NS","EW")
Outcome:
df
vector1 vector2 direction
1 1 4 EW
2 4 2 EW
3 4 3 NS
4 2 5 EW
5 1 3 EW
6 3 5 EW
7 2 2 NS
8 3 2 NS
9 4 3 NS
10 5 3 EW
11 3 4 NS
12 5 1 EW
13 5 4 NS
14 1 1 NS
15 4 2 EW
16 2 1 NS
17 4 2 EW
18 5 1 EW
19 2 1 NS
20 5 2 EW
you can try this
df <- structure(list(vector1 = c(1, 4, 4, 2, 1, 3, 2, 3, 4, 5, 3, 5,
5, 1, 4, 2, 4, 5, 2, 5), vector2 = c(4, 2, 3, 5, 3, 5, 2, 2,
3, 3, 4, 1, 4, 1, 2, 1, 2, 1, 1, 2)), class = "data.frame", row.names = c(NA,
-20L))
df$direction <- with(df,ifelse((vector2 == vector1) | (vector2 == (vector1 + 1)) | (vector2 == (vector1 - 1)), "NS",
ifelse(vector2 < (vector1-1) | (vector2 > (vector1 + 1)),"EW", NA)))
I am learning R (focused on the tidyverse packages) and am hoping that someone could help with the following problem that has me stumped.
I have a data-set that looks similar to the following:
library("tibble")
myData <- frame_data(
~id, ~r1, ~r2, ~r3, ~r4, ~r5, ~r6, ~r7, ~r8, ~r9, ~r10, ~r11, ~r12, ~r13, ~r14, ~r15, ~r16,
"A", 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
"B", 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
"C", 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2,
"D", 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2,
"E", 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
)
Basically, I have multiple rows of respondent data, and each respondent gave 16 responses of either "1" or "2".
For each respondent (i.e., each row) I would like to create an additional three columns:
The first new column - called "switchCount" - identifies the number of times the respondent switched from a "2" response to a "1" response.
The second new column - called "switch1" - identifies the index of the first time the respondent switched from a "2" response to a "1" response.
The third new column - called "switch2" - identifies the index of the final time the respondent switched from a "2" response to a "1" response.
If there is no switch and all values are "2", then return the index of 0.
If there is no switch and all values are "1", then return the index of 16.
The final datatable should therefore look like this:
myData <- frame_data(
~id, ~r1, ~r2, ~r3, ~r4, ~r5, ~r6, ~r7, ~r8, ~r9, ~r10, ~r11, ~r12, ~r13, ~r14, ~r15, ~r16, ~switchCount, ~switch1, ~switch2,
"A", 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1,
"B", 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4,
"C", 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 3, 9,
"D", 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 3, 6, 15,
"E", 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 16, 16
)
One approach could be to concatenate all response columns row wise and then find the occurrences of 2,1 using gregexpr
library(dplyr)
myData %>%
rowwise() %>%
mutate(concat_cols = paste(r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15,r16,sep=";"),
switchCount = ifelse(gregexpr("2;1", concat_cols)[[1]][1] == -1,
0,
length(gregexpr("2;1", concat_cols)[[1]])),
switch1 = ifelse(switchCount == 0,
ifelse(grepl("2",concat_cols), 1, 16),
min(floor(gregexpr("2;1", concat_cols)[[1]]/2)+1)),
switch2 = ifelse(switchCount == 0,
ifelse(grepl("2",concat_cols), 1, 16),
max(floor(gregexpr("2;1", concat_cols)[[1]]/2)+1))) %>%
select(-concat_cols)
Output is:
id r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14 r15 r16 switchCount switch1 switch2
1 A 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 1 1
2 B 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 4 4
3 C 2 2 2 1 1 1 2 2 2 1 1 1 1 2 2 2 2 3 9
4 D 1 1 2 2 2 2 1 1 2 2 1 1 1 2 2 1 3 6 15
5 E 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 16 16
Sample data:
myData <- structure(list(id = c("A", "B", "C", "D", "E"), r1 = c(2, 2,
2, 1, 1), r2 = c(2, 2, 2, 1, 1), r3 = c(2, 2, 2, 2, 1), r4 = c(2,
2, 1, 2, 1), r5 = c(2, 1, 1, 2, 1), r6 = c(2, 1, 1, 2, 1), r7 = c(2,
1, 2, 1, 1), r8 = c(2, 1, 2, 1, 1), r9 = c(2, 1, 2, 2, 1), r10 = c(2,
1, 1, 2, 1), r11 = c(2, 1, 1, 1, 1), r12 = c(2, 1, 1, 1, 1),
r13 = c(2, 1, 1, 1, 1), r14 = c(2, 1, 2, 2, 1), r15 = c(2,
1, 2, 2, 1), r16 = c(2, 1, 2, 1, 1), switchCount = c(0, 1,
2, 3, 0), switch1 = c(1, 4, 3, 6, 16), switch2 = c(1, 4,
9, 15, 16)), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
I have two matrices
A = matrix(c(2, 2, 2, 3, 3, 3),nrow=3,ncol=2)
> A
[,1] [,2]
[1,] 2 3
[2,] 2 3
[3,] 2 3
B = matrix(c(2, 4, 3, 1, 5, 7),nrow=3, ncol=2)
> B
[,1] [,2]
[1,] 2 1
[2,] 4 5
[3,] 3 7
take the mean of all values in B that correspond to 3 in B:
Create a matrix with only the means:
Wanted matrix:
C
[,1] [,2]
[1,] 3 4.3
[2,] 3 4.3
[3,] 3 4.3
When the groups are not column specific this might help:
A <- matrix( c(2, 2, 2, 3, 3, 3),nrow=3,ncol=2)
B <- matrix(c(2, 4, 3, 1, 5, 7),nrow=3, ncol=2)
C <- matrix(nrow = dim(A)[1], ncol=dim(A)[2])
groups <- unique(c(A))
for(group in groups) {
C[which(A==group)] <- mean(B[which(A==group)])
}
If A contains NAvalues, then use
groups <- na.omit(unique(c(A)))
What about:
A <- matrix(c(2, 2, 2, 3, 3, 2, 3, 2), nrow=4, ncol=2)
B <- matrix(c(2, 4, 3, 1, 5, 7, 4, 2), nrow=4, ncol=2)
matrix(tapply(B, A, mean)[as.character(A)], nrow=nrow(A))
?
I have a vector v and I want to have a vector w which is the weight of each element of v. How can I get the result (vector w)in R? For example,
v = c(0, 0, 1, 1, 1, 3, 4, 4, 4, 4, 5, 5, 6)
u = unique(v)
w = c(2, 3, 1, 4, 2, 1)
Use table:
table(v)
v
0 1 3 4 5 6
2 3 1 4 2 1