Related
I want to move value from the column II to the column I only when rows on the column I show NA.
This is the data.frame :
id <- c("A","B","C","D", "E","F","G")
I <- c("NA","NA","NA","10","20","NA","30")
II <- c("3","4","5","6","7", "8", "8")
df <- data.frame(id, I, II)
The expected result would be like this :
id <- c("A","B","C","D", "E","F","G")
I <- c("NA","NA","NA","10","20","NA","30")
II <- c("3","4","5","6","7", "8", "8")
III <- c("3","4","5","10","20", "8", "30")
df <- data.frame(id, I, II,III)
Thanks in advance!
You can use ifelse :
transform(df, III = ifelse(I == 'NA', II, I))
# id I II III
#1 A NA 3 3
#2 B NA 4 4
#3 C NA 5 5
#4 D 10 6 10
#5 E 20 7 20
#6 F NA 8 8
#7 G 30 8 30
We can use a simple coalesce after converting the quoted "NA" to actual unquoted NA in a single line
library(dplyr)
df1 <- df %>%
mutate(III = coalesce(na_if(I, "NA"), II))
df1
# id I II III
#1 A <NA> 3 3
#2 B <NA> 4 4
#3 C <NA> 5 5
#4 D 10 6 10
#5 E 20 7 20
#6 F <NA> 8 8
#7 G 30 8 30
Or using base R, change the "NA" to NA, create a logical vector based on the presence of NA elements in 'I' to change the values of 'III' (after assigning the values of 'II'
df$I[df$I == "NA"] <- NA
df$III <- df$II
df$III[!is.na(df$I)] <- df$I[!is.na(df$I)]
Or with ifelse
df$III <- with(df, ifelse(I == "NA", II, I))
A simple for loop in Base R will get this done
III = 0
for (i in 1:length(id)){
if (I[i] == "NA"){
III[i] = II[i]} else {
III[i] = I[i]}
}
df = data.frame(id, I, II, III)
I want to remove a specific repetitive value in each row in the dataframe.
let's say:
DF (3*5)
# c1 c2 c3 c4 c5
1 A A B A NA
2 C C A A B
3 B A A NA NA
I want to remove subsequent A value.
so New_df:
# c1 c2 c3 c4 c5
1 A B A NA NA
2 C C A B NA
3 B A NA NA NA
P.S. Column 5(c5) can be removed because all NA value.
The point is to remove only subsequent 'A' value, and other subsequent values should not be changed (e.g. the second row has two 'C' which should not be removed).
How can I do it in R?
Let's assume that you entered these as character values rather than as factors. That way we don't need the additional step of running DF[] <- lapply ( . , as.character).
DF <- read.table(text = "
A A B A NA
C C A A B
B A A NA NA", stringsAsFactors=FALSE)
It appears you only want the duplicated A's removed and shifted with the positions at the end replaced with NA's:
t( apply(DF, 1, function(x){ xR <- rle(x)
xR$lengths[xR$values == "A"] <- 1
x <- c( rep( xR$values, xR$lengths),
rep(NA, length(x)-sum(xR$lengths) ) ) }
)
)
[,1] [,2] [,3] [,4] [,5]
[1,] "A" "B" "A" NA NA
[2,] "C" "C" "A" "B" NA
[3,] "B" "A" NA NA NA
Loop through rows, use rle and get values:
# example data
DF <- read.table(text = "
A A B A NA
C C A A B
B A A NA NA")
data.frame(t(
apply(DF, 1, function(i){
res <- rle(i)$values
length(res) <- length(i)
res
})))
# output
# X1 X2 X3 X4 X5
# 1 A B A <NA> <NA>
# 2 C A B <NA> <NA>
# 3 B A <NA> <NA> <NA>
Is this what you're after?
as.data.frame(t(apply(DF, 1, function(x) {
idx <- which(x == "A");
x[-idx[c(0, diff(idx) == 1)]];
})))
# V1 V2 V3 V4
#1 A B A <NA>
#2 C C A B
#3 B A <NA> <NA>
Sample data
DF <- read.table(text =
"1 A A B A NA
2 C C A A B
3 B A A NA NA", header = F, row.names = 1)
I have a data frame with two columns "A" and "B". I created a function that works as mentioned below:
If X (user entered value) is found in column A, then return the X value found in column A and it's corresponding value in B column.
Here's my code:
myfunction <- function(x) {
r<- with(my_dataframe, my_dataframe[A %in% x, c("A", "B")])
return(data.frame(r))
}
I want to tweak this in such a way that if user input (value for X) doesn't appear in column A, return that value and NA for column B.
Example:
A B
1 A12
2 F1222
If the values for X are 1, 5. I want the output to look like this --
1 A12
5 NA
One approach could be to first find matched rows using condition as matched = my_dataframe$A==x.
Now, there are any matched rows found use matched value to return corresponding rows. Otherwise create a row with NA value for B.
myfunction <- function(x) {
r <- data.frame()
matched = my_dataframe$A %in% x
if(sum(matched) > 0){
r<- with(my_dataframe, my_dataframe[matched, c("A", "B")])
} else{
r<-data.frame(A = x, B = NA)
}
return(r)
}
#Test
myfunction(2)
# A B
# 2 2 A34
myfunction(11)
# A B
# 1 11 NA
Edited: Based on latest feedback from OP, I think dplyr::left_join will do the trick for him as:
a <- 1
dplyr::left_join(data.frame(A=a), my_dataframe, by="A")
# A B
# 1 1 A21
a <- c(2,3,12,34,45)
dplyr::left_join(data.frame(A=a), my_dataframe, by="A")
# A B
# 1 2 A34
# 2 3 D345
# 3 12 <NA>
# 4 34 <NA>
# 5 45 <NA>
Data
my_dataframe <- data.frame(A = 1:4,
B=c("A21", "A34", "D345", "E45"), stringsAsFactors = FALSE)
myfunction <- function(x) {
r<- with(my_dataframe, my_dataframe[A %in% x, c("A", "B")])
if(!nrow(r)) data.frame(A=x,B=NA) else data.frame(r)
}
> myfunction(3)
A B
1 3 NA
> myfunction(2)
A B
2 2 F1222
edit to allow vectors:
my=function(x){
s=subset(data,A==x)
m=x%in%s$A
if(all(m)) s else rbind(s,cbind(A=x[!m],B=NA))
}
my(1)
A B
1 1 A12
> my(1:10)
A B
1 1 A12
2 2 F1222
3 3 <NA>
4 4 <NA>
5 5 <NA>
6 6 <NA>
7 7 <NA>
8 8 <NA>
9 9 <NA>
10 10 <NA>
> my(4)
A B
1 4 NA
my(c(1,3.11))
A B
1 1.00 A12
2 3.11 <NA>
I have a list of lists and I want to convert it into a dataframe. The challenge is that there are missing variables names in lists (not NA's but the variable is missing completely).
To illustrate on example: from
my_list <- list()
my_list[[1]] <- list(a = 1, b = 2, c = 3)
my_list[[2]] <- list(a = 4, c = 6)
I would like to get
a b c
[1,] 1 2 3
[2,] 4 NA 6
Another option is
library(reshape2)
as.data.frame(acast(melt(my_list), L1~L2, value.var='value'))
# a b c
#1 1 2 3
#2 4 NA 6
Or as #David Arenburg suggested a wrapper for melt/dcast would be recast
recast(my_list, L1 ~ L2, value.var = 'value')[, -1]
# a b c
#1 1 2 3
#2 4 NA 6
You can use the bind_rows function from the dplyr package :
my_list <- list()
my_list[[1]] <- list(a = 1, b = 2, c = 3)
my_list[[2]] <- list(a = 4, c = 6)
dplyr::bind_rows(lapply(my_list, as.data.frame))
This outputs:
Source: local data frame [2 x 3]
a b c
1 1 2 3
2 4 NA 6
Another answer, this requires to change the class of the arguments to data.frames:
library(plyr)
lista <- list(a=1, b=2, c =3)
listb <- list(a=4, c=6)
lista <- as.data.frame(lista)
listb <- as.data.frame(listb)
my_list <- list(lista, listb)
my_list <- do.call(rbind.fill, my_list)
my_list
a b c
1 1 2 3
2 4 NA 6
Hi guys I have a difficult situation to manage:
I have a data.frame that looks like this:
General_name
a
b
c
d
m
n
and another data.frame that looks like this:
First_names_list a=34;b=4
Second_names_list d=2;m=98;n=32
Third_names_list c=1;d=12;m=0.1
I have to match each element of the first data.frame with each element before = in the second data.frame[,2] so that finally I have to obtain the following table:
Names a b c d m n
First_names_list 34 4 NA NA NA NA
Second_names_list NA NA NA 2 98 32
Third_names_list NA NA 1 12 0.1 NA
Any suggestion? It seems to be too difficult to me.
Best
E.
Option 1
Here is one approach using dcast from "reshape2" and concat.split from my "splitstackshape" package:
library(splitstackshape)
## The following can also be done in 2 steps. The basic idea is to split
## the values into a semi-long form for `dcast` to be able to use. So,
## I've split first on the semicolon, and made the data into a long form
## at the same time, then I've split on =, but kept it wide that time.
out <- concat.split(concat.split.multiple(df, "V2", ";", "long"),
"V2", "=", drop = TRUE)
out
# V1 time V2_1 V2_2
# 1 First_names_list 1 a 34.0
# 2 Second_names_list 1 d 2.0
# 3 Third_names_list 1 c 1.0
# 4 First_names_list 2 b 4.0
# 5 Second_names_list 2 m 98.0
# 6 Third_names_list 2 d 12.0
# 7 First_names_list 3 <NA> NA
# 8 Second_names_list 3 n 32.0
# 9 Third_names_list 3 m 0.1
library(reshape2)
dcast(out[complete.cases(out), ], V1 ~ V2_1, value.var="V2_2")
# V1 a b c d m n
# 1 First_names_list 34 4 NA NA NA NA
# 2 Second_names_list NA NA NA 2 98.0 32
# 3 Third_names_list NA NA 1 12 0.1 NA
Option 2
Here's another option using a more recent version of data.table. The concept is very similar to the approach taken above.
library(data.table)
library(reshape2)
packageVersion("data.table")
# [1] ‘1.8.11’
dt <- data.table(df)
S1 <- dt[, list(X = unlist(strsplit(as.character(V2), ";"))), by = V1]
S1[, c("A", "B") := do.call(rbind.data.frame, strsplit(X, "="))]
S1
# V1 X A B
# 1: First_names_list a=34 a 34
# 2: First_names_list b=4 b 4
# 3: Second_names_list d=2 d 2
# 4: Second_names_list m=98 m 98
# 5: Second_names_list n=32 n 32
# 6: Third_names_list c=1 c 1
# 7: Third_names_list d=12 d 12
# 8: Third_names_list m=0.1 m 0.1
dcast.data.table(S1, V1 ~ A, value.var="B")
# V1 a b c d m n
# 1: First_names_list 34 4 NA NA NA NA
# 2: Second_names_list NA NA NA 2 98 32
# 3: Third_names_list NA NA 1 12 0.1 NA
Both of the above options assume we're starting with:
df <- structure(list(V1 = c("First_names_list", "Second_names_list",
"Third_names_list"), V2 = c("a=34;b=4", "d=2;m=98;n=32",
"c=1;d=12;m=0.1")), .Names = c("V1", "V2"), class = "data.frame",
row.names = c(NA, -3L))
Here is a solution, using apply within apply:
#Data frame 1
df1 <- read.table(text=
"General_name
a
b
c
d
m
n", header=T, as.is=T)
#Data frame 2
df2 <- read.table(text=
"col1 col2
First_names_list a=34;b=4
Second_names_list d=2;m=98;n=32
Third_names_list c=1;d=12;m=0.1", header=T, as.is=T)
#make lists for each row, sep by ";"
df2split <- strsplit(df2$col2,split=";")
#result
t(
sapply(seq(1:nrow(df2)),function(c){
x <- df2split[[c]]
sapply(df1$General_name,function(n){
t <- gsub(paste0(n,"="),"",x[grepl(n,x)])
ifelse(length(t)==0,NA,as.numeric(t))
})
})
)
I feel this is a slightly round-about way to do it so I look forward to a better solution as well. But this works.
library(data.table)
library(reshape2)
#creating datasets
dt <- data.table(read.csv(textConnection('
"First_names_list","a=34;b=4"
"Second_names_list","d=2;m=98;n=32"
"Third_names_list","c=1;d=12;m=0.1"
'),header = FALSE))
General_name = c('a','b','c','d','m','n')
TotalBreakup <- data.table(
V1 = General_name
)
# Fixing datatypes
TotalBreakup <- TotalBreakup[,lapply(.SD,as.character)]
dt <- dt[,lapply(.SD,as.character)]
# looping through each row and calculating breakdown
for(i in 1:nrow(dt))
{
# the next two statements are the workhorse of this code. Run each part of these statements step by step to see
dtlist <- strsplit(unlist(strsplit(dt[i,V2],";")),"=")
breakup <- data.table(
t(
matrix(
unlist(
strsplit(
unlist(
strsplit(
dt[i,V2],
";"
)
),
"="
)
),
nrow = 2
)
)
)
# fixing datatypes again
breakup <- breakup[,lapply(.SD,as.character)]
#appending to master dataset
TotalBreakup <- merge(TotalBreakup, breakup, by = "V1", all.x = TRUE)
}
#formatting results
setnames(TotalBreakup,c("Names",dt[,V1]))
TotalBreakup <- acast(melt(TotalBreakup,id.vars = "Names"),variable~Names)
Output -
> TotalBreakup
a b c d m n
First_names_list "34" "4" NA NA NA NA
Second_names_list NA NA NA "2" "98" "32"
Third_names_list NA NA "1" "12" "0.1" NA
A way is this:
#the second dataframe you provided
DF2 <- read.table(text = '
First_names_list a=34;b=4
Second_names_list d=2;m=98;n=32
Third_names_list c=1;d=12;m=0.1
', header = F, stringsAsFactors = F)
#empty dataframe
DF <- structure(list(a = c(NA, NA, NA), b = c(NA, NA, NA), c = c(NA,
NA, NA), d = c(NA, NA, NA), m = c(NA, NA, NA), n = c(NA, NA,
NA)), .Names = c("a", "b", "c", "d", "m", "n"), row.names = c("First_names_list",
"Second_names_list", "Third_names_list"), class = "data.frame")
DF
# a b c d m n
#First_names_list NA NA NA NA NA NA
#Second_names_list NA NA NA NA NA NA
#Third_names_list NA NA NA NA NA NA
#fill the dataframe
myls <- strsplit(DF2$V2, split = ";")
for(i in 1:length(myls))
{
sapply(myls[[i]],
function(x) { res <- unlist(strsplit(x, "=")) ; DF[i,res[1]] <<- res[2] })
}
DF
# a b c d m n
#First_names_list 34 4 <NA> <NA> <NA> <NA>
#Second_names_list <NA> <NA> <NA> 2 98 32
#Third_names_list <NA> <NA> 1 12 0.1 <NA>