I got two large matrix with this format:
row.names 1 2 3 ... row.names 1 2 3 ....
A 0.1 0.2 0.3 A 1 1 1
B 0.4 0.9 0.3 B 2 3 1
C 0.9 0.9 0.4 C 1 3 1
.
And I want to obtain something like this:
X S CONF P
1 A 0.1 1
1 B 0.4 2
1 C 0.9 1
2 A 0.2 1
2 B ......
Getting the colnames in one column and repeat the rownames and the information per each of the column names.
Thank you so much
You can do this pretty easily with some rep and c work:
out <- data.frame(X = rep(colnames(conf), each = nrow(conf)),
S = rep(rownames(conf), ncol(conf)),
CONF = c(conf), P = c(P))
out
# X S CONF P
# 1 1 A 0.1 1
# 2 1 B 0.2 1
# 3 1 C 0.3 1
# 4 2 A 0.4 2
# 5 2 B 0.9 3
# 6 2 C 0.3 1
# 7 3 A 0.9 1
# 8 3 B 0.9 3
# 9 3 C 0.4 1
#Thomas had a similar approach (but one which matches the answer you show in your question). His answer looked like this:
cbind.data.frame(X = rep(colnames(conf), each=nrow(conf)),
S = rep(rownames(conf), times=nrow(conf)),
CONF = matrix(t(conf), ncol=1),
P = matrix(t(P), ncol=1))
Assuming we're talking about matrices, I would convert to a data frame, add the rownames as a column and then "melt" each data.frame...
conf <- matrix(
c(0.1, 0.4, 0.9,
0.2, 0.9, 0.9,
0.3, 0.3, 0.4),
ncol=3, byrow=T
)
rownames(conf) <- c("A", "B", "C")
colnames(conf) <- 1:3
P <- matrix(
c(1, 2, 1,
1, 3, 3,
1, 1, 1),
ncol=3, byrow=T
)
rownames(P) <- c("A", "B", "C")
colnames(P) <- 1:3
library(reshape)
conf <- cbind(as.data.frame(conf), "S"=rownames(conf))
P <- cbind(as.data.frame(P), "S"=rownames(P))
out <- merge(melt(conf, id="S"), melt(P, id="S"), by=c("variable", "S"))
colnames(out) <- c("X", "S", "CONF", "P")
Related
I would like to create a function in R that rounds numeric dataframes (or columns in a dataframe) depending on the number. If the number is less than 1, round to 1 decimal, but if it is greater than 1, round to 0 decimals.
This is what I have
data <- data.frame(x = c(1.111, 0.809, 5.55555, 0.567), y = c(0.235, 0.777, 4.55555555, 393.55))
round0 <- function(x) format(round(x, digits=0), nsmall = 0, trim = TRUE)
round0(data)
x y
1 1 0
2 1 1
3 6 5
4 1 394
# What I want
x y
1 1 0.2
2 0.8 1
3 6 5
4 1 394
> round0 <- function(x) ifelse(x<1,round(x,1),round(x))
> sapply(data,round0)
x y
[1,] 1.0 0.2
[2,] 0.8 0.8
[3,] 6.0 5.0
[4,] 0.6 394.0
You can use :
round0 <- function(x) ifelse(x < 1, format(round(x, 1), nsmall = 1), round(x))
data[] <- lapply(data, round0)
data
# x y
#1 1 0.2
#2 0.8 0.8
#3 6 5
#4 0.6 394
Note that this is only for display purpose and classes of columns are of type character. If you want to perform any mathematical calculation on it you need to convert it back to numeric.
I have a data.frame of this form:
sequence support
1 a-b 0.6
2 b-c 0.6
3 a-c 0.6
4 a-b-c 1.0
5 a-d 0.6
and I can transform this to the following:
1 2 3 support
1 a b <NA> 0.6
2 b c <NA> 0.6
3 a c <NA> 1.0
4 a b c 0.6
5 a d <NA> 1.0
I need to transform above table to like this:
1 2 support
1 a b 0.6
2 b c 0.6
3 a d 1.0
More specifically, I want to draw a Sankey Diagram.
So I have to transform the first data.table to the form of 'start node' and 'end node'.
For example, to draw the sequences 'a-b-c' and 'a-d', I need following data.frame:
start end
a b
b c
a d
How can I do this?
Using strsplit and apply:
# data
df1 <- read.table(text = "sequence support
1 a-b 0.6
2 b-c 0.6
3 a-c 0.6
4 a-b-c 1.0
5 a-d 0.6", header = TRUE, as.is = TRUE)
# result - input for sankey
datSankey <-
do.call(rbind,
apply(df1, 1, function(i){
x <- unlist(strsplit(i[1], "-"))
cbind.data.frame(
From = x[1:length(x) - 1],
To = x[2:(length(x))],
Weight = as.numeric(i[2]),
deparse.level = 0)
})
)
# From To Weight
# 1 a b 0.6
# 2 b c 0.6
# 3 a c 0.6
# 4.sequence1 a b 1.0
# 4.sequence2 b c 1.0
# 5 a d 0.6
# plot
library(googleVis)
plot(gvisSankey(datSankey,
from = "From", to = "To", weight = "Weight"))
We can try
library(splitstackshape)
i1 <- grepl("-[^-]+-", df$sequence)
df$sequence[i1] <- sub("-[^-]+", "", df$sequence[i1])
res <- cSplit(df[!(duplicated(df$sequence)|duplicated(df$sequence,
fromLast=TRUE)),], "sequence", "-")
res[, 2:3, with = FALSE]
# sequence_1 sequence_2
#1: a b
#2: b c
#3: a d
I have a matrix:
mat<-matrix(NA, ncol=7,nrow=9)
mat[,1]<-c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9)
mat[,2]<-c(2,4,5,6,7,7,7,8,9)
mat[,3]<-c(2,48,63,72,81,100,100,100,100)
mat[,4]<-c(1,2,3,3,4,4,5,5,6)
mat[,5]<-c(1,2,6,7,8,8,9,10,10)
mat[,6]<-c(1,1,1,2,3,3,4,4,4)
mat[,7]<-c(1,1,1,3,4,4,4,5,5)
colnames(mat)<-c("facet","A1","A2","B1","B2","C1","C2")
facet A1 A2 B1 B2 C1 C2
[1,] 0.1 2 2 1 1 1 1
[2,] 0.2 4 48 2 2 1 1
[3,] 0.3 5 63 3 6 1 1
[4,] 0.4 6 72 3 7 2 3
[5,] 0.5 7 81 4 8 3 4
[6,] 0.6 7 100 4 8 3 4
[7,] 0.7 7 100 5 9 4 4
[8,] 0.8 8 100 5 10 4 5
[9,] 0.9 9 100 6 10 4 5
I would like to create the following plot:
Create 9 separate plots faceted by "facet".
Each plot should contain the following:
on the same position on the x axis plot A1 and A2 using points, i.e. (X=1, y=A1) and (X=1,y=A2)
on the same position on the x axis plot B1 and B2 using points, i.e. (X=2, y=B1) and (X=2,y=B2)
on the same position on the x axis plot C1 and C2 using points, i.e. (X=3, y=C1) and (X=3,y=C2)
How can this be done? I understand how to do faceting but I'm struggling with plotting the two values in the same position on the x axis and repeating for each A,B and C. can someone help?
First, reshape your matrix to a data frame in the long format:
library(reshape2)
dat <- melt(as.data.frame(mat), id.vars = "facet")
> head(dat)
# facet variable value
# 1 0.1 A1 2
# 2 0.2 A1 4
# 3 0.3 A1 5
# 4 0.4 A1 6
# 5 0.5 A1 7
# 6 0.6 A1 7
Then, create two variables based on the information in the column variable:
dat2 <- transform(dat, fac = substr(variable, 2, 2),
variable = substr(variable, 1, 1))
> head(dat2)
# facet variable value fac
# 1 0.1 A 2 1
# 2 0.2 A 4 1
# 3 0.3 A 5 1
# 4 0.4 A 6 1
# 5 0.5 A 7 1
# 6 0.6 A 7 1
Plot:
library(ggplot2)
ggplot(dat2, aes(x = variable, y = value)) +
geom_point(aes(colour = fac)) +
facet_wrap( ~ facet)
a <- cbind(mat[, 1], mat[, 2], 1, 1)
b <- cbind(mat[, 1], mat[, 3], 1, 2)
c <- cbind(mat[, 1], mat[, 4], 2, 1)
d <- cbind(mat[, 1], mat[, 5], 2, 2)
e <- cbind(mat[, 1], mat[, 6], 3, 1)
f <- cbind(mat[, 1], mat[, 7], 3, 2)
data <- as.data.frame(rbind(a, b, c, d, e, f))
colnames(data) <- c("facet", "value", "type", "time")
data$type <- factor(data$type, labels = c("A", "B", "C"))
ggplot(data, aes(y = value, x = type, fill = factor(time))) +
geom_point(aes(color = factor(time)),
position = position_jitter(w = 0.1, h = 0.0))+
facet_wrap(~facet)
I have these two data.frames
df1 <- data.frame(V1=c("A", "A", "B", "B", "B"), V2=c(0.8, 0.2, 0.3, 0.4, 0.9))
V1 V2
1 A 0.8
2 A 0.2
3 B 0.3
4 B 0.4
5 B 0.9
df2 <- data.frame(V1=c("A", "B"), V2=c(0.3, 0.8))
V1 V2
1 A 0.3
2 B 0.8
I would like add a new column to df1, df$v3 based on V1 names and on the values of df2,i.e, if (df1$V1 < df2$V2) {df$V3 == -1, else df$V3 == 0}. For illustration the desired output for the example will be the following:
V1 V2 V3
1 A 0.8 0
2 A 0.2 -1
3 B 0.3 -1
4 B 0.4 -1
5 B 0.9 0
Thanks in advance
As I outlined in my comment, I think this is sort of what you're after:
df1 <- data.frame(V1=c("A", "A", "B", "B", "B"), V2=c(0.8, 0.2, 0.3, 0.4, 0.9))
df2 <- data.frame(V1=c("A", "B"), V2a=c(0.3, 0.8))
df <- merge(df1,df2)
df$V3 <- with(df,ifelse(V2 < V2a,-1,0))
> df
V1 V2 V2a V3
1 A 0.8 0.3 0
2 A 0.2 0.3 -1
3 B 0.3 0.8 -1
4 B 0.4 0.8 -1
5 B 0.9 0.8 0
As I mentioned, I changed one of the columns names in df2 to make the merging work properly.
R Version 2.11.1 32-bit on Windows 7
I have two data sets as shown below:
data_set_A:
USER_B ACTION
10 0.1
11 0.3
12 0.1
data_set_B:
USER_A USER_B ACTION
1 10 0.2
1 11 0.1
1 15 0.1
2 12 0.2
How to add the ACTION of USER_B from data_set_A to data_set_B? The USER_B in data_set_A is a subset of USER_B in data_set_B.
for the example above, it may be:
USER_A USER_B ACTION
1 10 0.2+0.1
1 11 0.1+0.3
1 15 0.1
2 12 0.2+0.1
In data_set_B I don't need to consider the USER_A, just consider the USER_B appear in data_set_A.
I wonder if it could be achieved without doing one by one?
dfa <- data.frame(
user_b = 10:12,
action = c(0.1, 0.3, 0.1)
)
dfb <- data.frame(
user_a = c(1, 1, 1, 2),
user_b = c(10, 11, 15, 12),
action = c(0.2, 0.1, 0.1, 0.2)
)
action <- dfa$action[match(dfb$user_b, dfa$user_b)]
action[is.na(action)] <- 0
dfb$action <- dfb$action + action
dfb
user_a user_b action
1 1 10 0.3
2 1 11 0.4
3 1 15 0.1
4 2 12 0.3
One way is to do the equivalent of a database merge on the two data sets to form the action pairs you want and then sum those. Using #Andrie's example code:
dfa <- data.frame(
user_b = 10:12,
action = c(0.1, 0.3, 0.1)
)
dfb <- data.frame(
user_a = c(1, 1, 1, 2),
user_b = c(10, 11, 15, 12),
action = c(0.2, 0.1, 0.1, 0.2)
)
Solution Code
I'll first present the full solution and then explain the steps:
mdat <- merge(dfb, dfa, by = "user_b", all.x = TRUE)
res <- data.frame(mdat[,c(2,1)],
action = rowSums(mdat[, c("action.x", "action.y")],
na.rm = TRUE))
res <- res[order(res$user_a, res$user_b),]
res now contains the results.
Explanation
We first merge the two data frames, matching on user_b:
## merge the data
mdat <- merge(dfb, dfa, by = "user_b", all.x = TRUE)
mdat
giving:
> mdat
user_b user_a action.x action.y
1 10 1 0.2 0.1
2 11 1 0.1 0.3
3 12 2 0.2 0.1
4 15 1 0.1 NA
Then we just use this object to create the result data frame, and sum the two action. columns row-wise:
## format the merged data with summed `action`
res <- data.frame(mdat[,c(2,1)],
action = rowSums(mdat[, c("action.x", "action.y")],
na.rm = TRUE))
## reorder
res <- res[order(res$user_a, res$user_b),]
res
resulting in
> res
user_a user_b action
1 1 10 0.3
2 1 11 0.4
4 1 15 0.1
3 2 12 0.3