Data manipulation with R: Restructuring Data

Data manipulation with R: Restructuring Data - r

I have a dataset that looks like this:
a <- data.frame(rep(1,5),1:5,1:5)
b <- data.frame(rep(2,5),1:5,1:5)
colnames(a) <- c(1,2,3)
colnames(b) <- c(1,2,3)
c <- rbind(a,b)
1 2 3
1 1 1 1
2 1 2 2
3 1 3 3
4 1 4 4
5 1 5 5
6 2 1 1
7 2 2 2
8 2 3 3
9 2 4 4
10 2 5 5
but I want it to be restructured to this:
2_1 2_2 3_1 3_2
1 1 1 1 1
2 2 2 2 2
3 3 3 3 4
4 4 4 4 4
5 5 5 5 5

a <- data.frame(rep(1,5),1:5,1:5)
b <- data.frame(rep(2,5),1:5,1:5)
colnames(b) <- colnames(a) <- paste("a", c(1,2,3), sep='')
d <- rbind(a,b)
library(reshape)
recast(d, a2 ~ a1, measure.var="a3")
I changed your example slightly, since it had numbers as variable names. This is not recommended because it permits the following nonsense:
"1" <- 3
print(1)
[1] 1
print("1")
[1] "1"
print(`1`)
[1] 3
Need I say more?

Related

Producing all combinations of two column values in R

I have a data.frame with two columns
> data.frame(a=c(5,4,3), b =c(1,2,4))
a b
1 5 1
2 4 2
3 3 4
I want to produce a list of data.frames with different combinations of those column values; there should be a total of six possible scenarios for the above example (correct me if I am wrong):
a b
1 5 1
2 4 2
3 3 4
a b
1 5 1
2 4 4
3 3 2
a b
1 5 2
2 4 1
3 3 4
a b
1 5 2
2 4 4
3 3 1
a b
1 5 4
2 4 2
3 3 1
a b
1 5 4
2 4 1
3 3 2
Is there a simple function to do it? I don't think expand.grid worked out for me.

Actually expand.grid can work here, but it is not recommended since it's rather inefficient when you have many rows in df (you need to subset n! out of n**n if you have n rows).
Below is an example using expand.grid
u <- do.call(expand.grid, rep(list(seq(nrow(df))), nrow(df)))
lapply(
asplit(
subset(
u,
apply(u, 1, FUN = function(x) length(unique(x))) == nrow(df)
), 1
), function(v) within(df, b <- b[v])
)
One more efficient option is to use perms from package pracma
library(pracma)
> lapply(asplit(perms(df$b),1),function(v) within(df,b<-v))
[[1]]
a b
1 5 4
2 4 2
3 3 1
[[2]]
a b
1 5 4
2 4 1
3 3 2
[[3]]
a b
1 5 2
2 4 4
3 3 1
[[4]]
a b
1 5 2
2 4 1
3 3 4
[[5]]
a b
1 5 1
2 4 2
3 3 4
[[6]]
a b
1 5 1
2 4 4
3 3 2

Using combinat::permn create all possible permutations of b value and for each bind it with a column.
df <- data.frame(a= c(5,4,3), b = c(1,2,4))
result <- lapply(combinat::permn(df$b), function(x) data.frame(a = df$a, b = x))
result
#[[1]]
# a b
#1 5 1
#2 4 2
#3 3 4
#[[2]]
# a b
#1 5 1
#2 4 4
#3 3 2
#[[3]]
# a b
#1 5 4
#2 4 1
#3 3 2
#[[4]]
# a b
#1 5 4
#2 4 2
#3 3 1
#[[5]]
# a b
#1 5 2
#2 4 4
#3 3 1
#[[6]]
# a b
#1 5 2
#2 4 1
#3 3 4

how to remove part of a phrase from all variables in a column in R

say I have a Data frame
g <- c("Smember_1", "Smember_1", "Smember_1", "Smember_2", "Smember_2", "Smember_2", "Smember_3", "Smember_3", "Smember_3")
m <- c(1,2,1,3,4,1,3,5,6)
df <- data.frame(g, m)
g m
1 Smember_1 1
2 Smember_1 2
3 Smember_1 1
4 Smember_2 3
5 Smember_2 4
6 Smember_2 1
7 Smember_3 3
8 Smember_3 5
9 Smember_3 6
I would like to remove Smember_ in from all the variables in the g column such that the data frame df looks like
> df
g m
1 1 1
2 1 2
3 1 1
4 2 3
5 2 4
6 2 1
7 3 3
8 3 5
9 3 6

I think you want
df$g <- gsub(".*(\\d+)$", "\\1", df$g)

df2$variable <- gsub("Smember_","", df2$variable)
worked!

how to reverse 'list' function to its original input

I list some data frames(same structures) to a list to perform lapply to each data frames. But after the lapply i want the list elements back to the original data frames format.
Is there any function except for loop to do this?
dt1<-data.frame(a=c(1,1,1),b=c(2,2,2))
dt2<-data.frame(a=c(3,3,3),b=c(4,4,4))
dt3<-data.frame(a=c(5,5,5),b=c(6,6,6))
lst<-list(dt1,dt2,dt3)
for (i in 1:3)
{ #after the data manipilaton
assign(paste('newdt',i,sep=''),lst[[i]])
}
> dt1
a b
1 1 2
2 1 2
3 1 2
> dt2
a b
1 3 4
2 3 4
3 3 4
> dt3
a b
1 5 6
2 5 6
3 5 6
desired output (I did not include the data manipulation part so the output and input looks the same)
> newdt1
a b
1 1 2
2 1 2
3 1 2
> newdt2
a b
1 3 4
2 3 4
3 3 4
> newdt3
a b
1 5 6
2 5 6
3 5 6

Use sapply, manipulate on each data frame in the loop, return it as a list. rename the list to newdt-n and you should be good to go.
A <- sapply(1:length(lst), function(i){
a_tmp <- list(lst[[i]])
names(a_tmp) <- sprintf('newdt%s', i)
a_tmp
})
> names(A)
[1] "newdt1" "newdt2" "newdt3"
> class(A)
[1] "list"
> A
$newdt1
a b
1 1 2
2 1 2
3 1 2
$newdt2
a b
1 3 4
2 3 4
3 3 4
$newdt3
a b
1 5 6
2 5 6
3 5 6
For brevity sake...here's with some meaningless manip:
> A <- sapply(1:length(lst), function(i){
+ new_col <- lst[[i]] %>% mutate(sum_ab = a + b)
+ a_tmp <- list(new_col)
+ names(a_tmp) <- sprintf('newdt%s', i)
+ a_tmp
+ })
> A
$newdt1
a b sum_ab
1 1 2 3
2 1 2 3
3 1 2 3
$newdt2
a b sum_ab
1 3 4 7
2 3 4 7
3 3 4 7
$newdt3
a b sum_ab
1 5 6 11
2 5 6 11
3 5 6 11

remove i+1th term if reoccuring

Say we have the following data
A <- c(1,2,2,2,3,4,8,6,6,1,2,3,4)
B <- c(1,2,3,4,5,1,2,3,4,5,1,2,3)
data <- data.frame(A,B)
How would one write a function so that for A, if we have the same value in the i+1th position, then the reoccuring row is removed.
Therefore the output should like like
data.frame(c(1,2,3,4,8,6,1,2,3,4), c(1,2,5,1,2,3,5,1,2,3))
My best guess would be using a for statement, however I have no experience in these

You can try
data[c(TRUE, data[-1,1]!= data[-nrow(data), 1]),]

Another option, dplyr-esque:
library(dplyr)
dat1 <- data.frame(A=c(1,2,2,2,3,4,8,6,6,1,2,3,4),
B=c(1,2,3,4,5,1,2,3,4,5,1,2,3))
dat1 %>% filter(A != lag(A, default=FALSE))
## A B
## 1 1 1
## 2 2 2
## 3 3 5
## 4 4 1
## 5 8 2
## 6 6 3
## 7 1 5
## 8 2 1
## 9 3 2
## 10 4 3

using diff, which calculates the pairwise differences with a lag of 1:
data[c( TRUE, diff(data[,1]) != 0), ]
output:
A B
1 1 1
2 2 2
5 3 5
6 4 1
7 8 2
8 6 3
10 1 5
11 2 1
12 3 2
13 4 3

Using rle
A <- c(1,2,2,2,3,4,8,6,6,1,2,3,4)
B <- c(1,2,3,4,5,1,2,3,4,5,1,2,3)
data <- data.frame(A,B)
X <- rle(data$A)
Y <- cumsum(c(1, X$lengths[-length(X$lengths)]))
View(data[Y, ])
row.names A B
1 1 1 1
2 2 2 2
3 5 3 5
4 6 4 1
5 7 8 2
6 8 6 3
7 10 1 5
8 11 2 1
9 12 3 2
10 13 4 3

Relevel a factor

There is given a unordered factor ID, a reference vector for the rank of each level and a label for each level. Now I want to order the ID's by given rank and after that I want to overrider the labels in the factor.
Could you give a advise if there is a better way to do so:
ID<-factor(c(1,2,2,3,1,3,3,2,1,1)+10)
Rank<-c("11"=3,"12"=1,"13"=2)
Label<-c("11"="B","12"="A","13"="C")
ID.Rank<-factor(ID, levels=names(Rank),labels=Rank)
ID.Rank<-factor(ID.Rank, levels=sort(Rank),order=T)
ID.Label<-factor(ID, levels=names(Label),labels=Label)
data.frame(ID,ID.Rank,ID.Label)
### here is importent that ID.Rank has a certain order.
factor(ID.Rank, labels=Label[match(levels(ID.Rank), Rank)])

If I understood your question correctly, here is how you can solve the problem.
set.seed(2)
ID<-as.numeric(ID)
df1<-as.data.frame(ID)
> df1
ID
1 1
2 1
3 3
4 2
5 3
6 2
7 3
8 3
9 2
10 3
df2<-as.data.frame(Rank)
df2$ID<-rownames(df2)
> df2
Rank ID
1 3 1
2 1 2
3 2 3
df3<-merge(df1,df2,by="ID")
ID Rank
1 1 3
2 1 3
3 2 1
4 2 1
5 2 1
6 3 2
7 3 2
8 3 2
9 3 2
10 3 2
df3$Rank is what you are looking as the final result. You can convert that to factor.
Updated as per comments: If you want the original order of ID:
df1$IDo<-rownames(df1)
df3
ID IDo Rank
1 1 1 3
2 1 7 3
3 1 4 3
4 2 3 1
5 2 9 1
6 2 10 1
7 3 2 2
8 3 5 2
9 3 6 2
10 3 8 2

myFac <- factor(ID, levels=Rank, labels=names(Rank) )
myFac
[1] 3 3 2 2 3 1 1 2 2 3
Levels: 1 < 2 < 3
match(levels(myFac), names(Label) )
[1] 1 2 3
Label[match(levels(myFac), names(Label) )]
1 2 3
"B" "A" "C"
levels(myFac) <- Label[match(levels(myFac), names(Label) )]
myFac
#-----
[1] C C A A C B B A A C
Levels: B < A < C

Assuming Rank and Label are always in the same order, you just need to order the labels appropriately and then use them to create the ordered factor.
ID <- factor(c(1,2,2,3,1,3,3,2,1,1)+10)
Rank <- c("11"=3,"12"=1,"13"=2)
Label <- c("11"="B","12"="A","13"="C")
Label <- Label[order(Rank)]
factor(ID, levels=names(Label), labels=Label, order=TRUE)
## [1] B A A C B C C A B B
## Levels: A < C < B

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Data manipulation with R: Restructuring Data - r

Related

Producing all combinations of two column values in R

how to remove part of a phrase from all variables in a column in R

how to reverse 'list' function to its original input

remove i+1th term if reoccuring

Relevel a factor

Categories

Resources