How to reorder rows in a matrix - r

I have a matrix and would like to reorder the rows so that for example row 5 can be switched to row 2 and row 2 say to row 7. I have a list with all rownames delimited with \n and I thought I could somehow read it into R (its a txt file) and then just use the name of the matrix (in my case 'k' and do something like k[txt file,]-> k_new but this does not work since the identifiers are not the first column but are defined as rownames.

k[ c(1,5,3,4,7,6,2), ] #But probably not what you meant....
Or perhaps (if your 'k' object rownames are something other than the default character-numeric sequence):
k[ char_vec , ] # where char_vec will get matched to the row names.
(dat <- structure(list(person = c(1, 1, 1, 1, 2, 2, 2, 2), time = c(1,
2, 3, 4, 1, 2, 3, 4), income = c(100, 120, 150, 200, 90, 100,
120, 150), disruption = c(0, 0, 0, 1, 0, 1, 1, 0)), .Names = c("person",
"time", "income", "disruption"), row.names = c("h", "g", "f",
"e", "d", "c", "b", "a"), class = "data.frame"))
dat[ c('h', 'f', 'd', 'b') , ]
#-------------
person time income disruption
h 1 1 100 0
f 1 3 150 0
d 2 1 90 0
b 2 3 120 1

Related

Remove Columns from a table that are 90% one value

Example Data:
A<- c(1,2,3,4,1,2,3,4,1,2)
B<- c(A,B,C,D,E,F,G,H,I,J)
C<- c(1,1,1,1,1,1,1,1,1,0)
D<- c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,FALSE)
df1<-data.frame(A,B,C,D)
df1 %>%
select_if(
###column is <90% one value
)
So I have a table that has a few columns that are predominantly one value--like C and D in the above example. I need to get rid of any columns that are 90% or more one unique value. How can I get rid of the columns that fit this criteria?
We may use select with where, get the frequency count with table, convert to proportions, get the max value and check if it is less than .90 to select the particular column
library(dplyr)
df1 <- df1 %>%
select(where(~ max(proportions(table(.))) < .90))
data
df1 <- structure(list(A = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2), B = c("A",
"B", "C", "D", "E", "F", "G", "H", "I", "J"), C = c(1, 1, 1,
1, 1, 1, 1, 1, 1, 0), D = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, FALSE)), class = "data.frame", row.names = c(NA,
-10L))

How to apply a function to a data.table subset by multiple columns in R?

I have a data table with counts for changes for multiple groups. For example:
input <- data.table(from = c("A", "A", "A", "B", "B", "B", "A", "A", "A", "B", "B", "B"),
to = c(letters[1:6], letters[1:6]),
from_N = c(100, 100, 100, 50, 50, 50, 60, 60 ,60, 80, 80, 80),
to_N = c(10, 20, 40, 5, 5, 15, 10, 5, 10, 20, 5, 10),
group = c(1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2))
How can I calculate the total for each change across groups? I can do this using a for loop, for example:
out <- list()
for (i in 1:length(unique(input$from))){
sub <- input[from == unique(input$from)[i]]
out2 <- list()
for (j in 1:length(unique(sub$to))){
sub2 <- sub[to == unique(sub$to)[j]]
out2[[j]] <- data.table(from = sub2$from[1],
to = sub2$to[1],
from_N = sum(sub2$from_N),
to_N = sum(sub2$to_N))
print(unique(sub$to)[j])
}
out[[i]] <- do.call("rbind", out2)
print(unique(input$from)[i])
}
output <- do.call("rbind", out)
However, the data table I need to apply this to is very large, and I therefore need to maximise performance. Is there a data.table method? Any help will be greatly appreciated!
Perhaps I've overlooked something, but it seems you're just after:
library(data.table)
setDT(input)[, .(from_N = sum(from_N), to_N = sum(to_N)), by = .(from, to)]
Output:
from to from_N to_N
1: A a 160 20
2: A b 160 25
3: A c 160 50
4: B d 130 25
5: B e 130 10
6: B f 130 25
An option with dplyr
library(dplyr)
input %>%
group_by(from, to) %>%
summarise_at(vars(ends_with('_N')), sum)
Or in data.table
library(data.table)
setDT(input)[, lapply(.SD, sum), by = .(from, to), .SDcols = patterns('_N$')]

Building sequence data for a recommender system- replacing cross-tabular matrix with a variable value

I am trying to build a sequence data for a recommender system. I have built a cross-tabular data (Table 1) and Table 2 as shown below:
enter image description here
I have been trying to replace all the 1's in Table 1 by the "Grade" from the Table 2 in R.
Any insight/suggestion is greatly appreciated.
Instead of replacing the first one with second, the second table and directly changed to 'wide' with dcast
library(reshape2)
res <- dcast(df2, St.No. ~ Courses, value.var = 'Grade')[names(df1)]
res
# St.No. Math Phys Chem CS
#1 1 A B
#2 2 B B
#3 3 A A C
#4 4 B B D
If we need to replace the blanks with 0
res[res =='"] <- "0"
data
df1 <- data.frame(St.No. = 1:4, Math = c(0, 0, 1, 1), Phys = c(1, 1, 0, 1),
Chem = c(0, 1, 1, 0), CS = c(1, 0, 1, 1))
df2 <- data.frame(St.No. = rep(1:4, each = 4), Courses = rep(c("Math",
"Phys", "Chem", "CS"), 4),
Grade = c("", "A", "", "B", "", "B", "B", "",
"A", "", "A", "C", "B", "B", "", "D"),
stringsAsFactors = FALSE)

Coding help in R - Subset and colSum is the topic [duplicate]

If I have a table like this:
user,v1,v2,v3
a,1,0,0
a,1,0,1
b,1,0,0
b,2,0,3
c,1,1,1
How to I turn it into this?
user,v1,v2,v3
a,2,0,1
b,3,0,3
c,1,1,1
In base R,
D <- matrix(c(1, 0, 0,
1, 0, 1,
1, 0, 0,
2, 0, 3,
1, 1, 1),
ncol=3, byrow=TRUE, dimnames=list(1:5, c("v1", "v2", "v3")))
D <- data.frame(user=c("a", "a", "b", "b", "c"), D)
aggregate(. ~ user, D, sum)
Returns
> aggregate(. ~ user, D, sum)
user v1 v2 v3
1 a 2 0 1
2 b 3 0 3
3 c 1 1 1
You can use dplyr for this:
library(dplyr)
df = data.frame(
user = c("a", "a", "b", "b", "c"),
v1 = c(1, 1, 1, 2, 1),
v2 = c(0, 0, 0, 0, 1),
v3 = c(0, 1, 0, 3, 1))
group_by(df, user) %>%
summarize(v1_sum = sum(v1),
v2_sum = sum(v2),
v3_sum = sum(v3))
If you're not familiar with the %>% notation, it is basically like piping from bash. It takes the output from group_by() and puts it into summarize(). The same thing would be accomplished this way:
by_user = group_by(df, user)
df_summarized = summarize(by_user,
v1_sum = sum(v1),
v2_sum = sum(v2),
v3_sum = sum(v3))

R: how to sum columns grouped by a factor?

If I have a table like this:
user,v1,v2,v3
a,1,0,0
a,1,0,1
b,1,0,0
b,2,0,3
c,1,1,1
How to I turn it into this?
user,v1,v2,v3
a,2,0,1
b,3,0,3
c,1,1,1
In base R,
D <- matrix(c(1, 0, 0,
1, 0, 1,
1, 0, 0,
2, 0, 3,
1, 1, 1),
ncol=3, byrow=TRUE, dimnames=list(1:5, c("v1", "v2", "v3")))
D <- data.frame(user=c("a", "a", "b", "b", "c"), D)
aggregate(. ~ user, D, sum)
Returns
> aggregate(. ~ user, D, sum)
user v1 v2 v3
1 a 2 0 1
2 b 3 0 3
3 c 1 1 1
You can use dplyr for this:
library(dplyr)
df = data.frame(
user = c("a", "a", "b", "b", "c"),
v1 = c(1, 1, 1, 2, 1),
v2 = c(0, 0, 0, 0, 1),
v3 = c(0, 1, 0, 3, 1))
group_by(df, user) %>%
summarize(v1_sum = sum(v1),
v2_sum = sum(v2),
v3_sum = sum(v3))
If you're not familiar with the %>% notation, it is basically like piping from bash. It takes the output from group_by() and puts it into summarize(). The same thing would be accomplished this way:
by_user = group_by(df, user)
df_summarized = summarize(by_user,
v1_sum = sum(v1),
v2_sum = sum(v2),
v3_sum = sum(v3))

Resources