I want to merge the 50 MAF files with the sample information so that I can read it as a data.table and subset it.
library(maftools)
# Load MAF files
maf = system.file("extdata", list.files(path="mafs/"), package="maftools")
# Load sample information
si <- system.file("extdata", "sample-information.tsv", package="maftools")
d = read.maf(maf=maf, clinicalData=si)
Traceback:
Error in data.table::fread(file = maf, sep = "\t", stringsAsFactors = FALSE, :
File '' does not exist or is non-readable. getwd()=='C:/Users/User/Documents/VanAllen'
> traceback()
3: stop("File '", file, "' does not exist or is non-readable. getwd()=='",
getwd(), "'")
2: data.table::fread(file = maf, sep = "\t", stringsAsFactors = FALSE,
verbose = FALSE, data.table = TRUE, showProgress = TRUE,
header = TRUE, fill = TRUE, skip = "Hugo_Symbol", quote = "")
1: read.maf(maf = maf, clinicalData = si)
1: data.table::fread(input = maf)
Maftools documentation:
https://www.bioconductor.org/packages/release/bioc/manuals/maftools/man/maftools.pdf
When I run your code, maf indeed points to no character ( "" ), which of course cannot be read by fread. However when I try
fread("R/x86_64-pc-linux-gnu-library/3.6/maftools/extdata/brca.maf.gz")
it works as expected.
Related
I am using
myCounts<-read.csv("myCounts.csv", header = TRUE, row.names = 1, sep = ",")
and
Book4 <- read_delim("Book4.csv", delim = ";",
escape_double = FALSE, trim_ws = TRUE)
to read two csv files. But read.csv and read.delim is pressing them differently.
Could you please explane how to read in book4 data in the same structure of myCounts data?
I tried following, it works.
df<-read.delim("~/Documents/sample.csv" ,sep = ";",row.names = 1)
I wish to write a data.frame from one R file to another R file using base R. So far I have tried cat, capture.output, write.table and sink. I found suggested solutions for capture.output and write.table here:
writing a data.frame using cat
However, I have not been able to obtain an ideal solution. write.table comes the closest but returns an unwanted warning message.
Here is the data.frame in the source R file:
my.df <- data.frame(scenario = 3333,
AA = 200,
BB = 999,
CC = 444,
DD = 7)
Here is the desired appearance in the recipient R file except I want the name to be my.df, not desired.format:
desired.format <- read.table(text = '
scenario AA BB CC DD
3333 200 999 444 7
', header = TRUE, stringsAsFactors = FALSE)
Here is the full code for the source R file except for the setwd() statement:
R.file <- 'my_R_file.R'
cat(' ' , file = R.file, sep=c("\n") )
cat('This is my stuff' , file = R.file, sep=c("\n"), append = TRUE)
cat('#' , file = R.file, sep=c("\n"), append = TRUE)
cat(' ' , file = R.file, sep=c("\n"), append = TRUE)
my.df <- data.frame(scenario = 3333,
AA = 200,
BB = 999,
CC = 444,
DD = 7)
str(my.df)
# Desired format in my_R_file.R
desired.format <- read.table(text = '
scenario AA BB CC DD
3333 200 999 444 7
', header = TRUE, stringsAsFactors = FALSE)
str(desired.format)
# capture.output includes an unwanted row number
cat(' ' , file = R.file, sep=c("\n"), append = TRUE)
cat('capture.output' , file = R.file, sep=c("\n"), append = TRUE)
capture.output(my.df, file = R.file, append = TRUE)
cat(' ' , file = R.file, sep=c("\n"), append = TRUE)
# write.table returns an unwanted warning message
cat('write.table' , file = R.file, sep=c("\n"), append = TRUE)
cat('my.df <- read.table(text = \'' , file = R.file, sep=c("\n"), append = TRUE)
write.table(my.df, file = R.file, col.names = TRUE, row.names = FALSE, quote = FALSE, append=TRUE)
cat('\', header = TRUE, stringsAsFactors = FALSE)' , file = R.file, sep=c("\n"), append = TRUE)
cat(' ' , file = R.file, sep=c("\n"), append = TRUE)
# sink does not return any useful output
#cat('sink' , file = R.file, sep=c("\n"), append = TRUE)
#sink(R.file)
#sink()
#my.df
#sink()
#cat(' ' , file = R.file, sep=c("\n"), append = TRUE)
cat('This is the end' , file = R.file, sep=c("\n"), append = TRUE)
cat(' ' , file = R.file, sep=c("\n"), append = TRUE)
Here are the full contents of the recipient R file my_R_file.R:
This is my stuff
#
capture.output
scenario AA BB CC DD
1 3333 200 999 444 7
write.table
my.df <- read.table(text = '
scenario AA BB CC DD
3333 200 999 444 7
', header = TRUE, stringsAsFactors = FALSE)
This is the end
Here is the warning message returned by write.table:
Warning message:
In write.table(my.df, file = R.file, col.names = TRUE, row.names = FALSE, :
appending column names to file
Thank you for any suggestions on eliminating this warning message or arriving at a better solution. I would rather not suppress all warning messages.
Notice that there is base:::print.data.frame method involved when evaluating my.df which is of class "data.frame". It has arguments such as row.names=. Accordingly you may specify:
capture.output(print(my.df, row.names=FALSE), file=R.file, append=TRUE)
I have been trying to combine files, and I keep getting a variety of errors each time I try to run it Error in
list.files(path = "~/Documents", full.names = FALSE) %>% lapply(read_csv) %>% : could not find function "%>%"
> write.csv(data, file = "Fecundity.csv", row_names = FALSE)
Error in utils::write.table(data, file = "Fecundity.csv", row_names = FALSE, :
unused argument (row_names = FALSE)
> write.csv(data, file = "Fecundity.csv", row.names = FALSE)
Error in as.data.frame.default(x[[i]], optional = TRUE) :
cannot coerce class ‘"function"’ to a data.frame
> write.csv(data, file = "Fecundity.csv", row.names = TRUE)
Error in as.data.frame.default(x[[i]], optional = TRUE) :
cannot coerce class ‘"function"’ to a data.frame
this is the code
library(data.table)
setwd(/cloud/project/Data sheets)
files <- list.files(pattern = ".cvs")
temp <- lapply(files, fread, sep= ".")
data <- rbindlist(temp)
write.csv(data, file = "Fecundity.csv", row.names = FALSE)`
Do it this way.
library(data.table)
library(fs)
files = dir_ls("csvFiles", regexp = "\\.csv$")
if(length(files)>0){
for(i in 1:length(files)){
lines = fread(text = files[i], sep = "|", header=FALSE)
fwrite(lines, "csvFiles/Fecundity.csv", append = TRUE)
}
}
This is the fastest way I know. This code will read thousands of files in seconds!
I'm trying to make a function in R, that performs some specific operations on a lot of different data sets, with the following code:
library(parallel)
cluster = makeCluster(2)
setwd("D:\\Speciale")
data_func <- function(kommune) {
rm(list=ls())
library(dplyr)
library(data.table)
library (tidyr)
#Load address and turbine datasets
distances <- fread(file="Adresser og distancer\\kommune.csv", header=TRUE, sep=",", colClasses = c("longitude" = "character", "latitude" = "character", "min_distance" = "character", "distance_turbine" = "character", "id_turbine" = "character"), encoding="Latin-1")
turbines <- fread(file="turbines_DK.csv", header=TRUE, sep=",", colClasses = c("lon" = "character", "lat" = "character", "id_turbine" = "character", "total_height" = "character", "location" = "character"), encoding="Latin-1")
Some cleaning of the data and construction of new variables
#write out the dataset
setwd("D:\\Speciale\\Analysedata")
fwrite(mock_final, file = "final_kommune.csv", row.names = FALSE)
}
do.call(rbind, parLapply(cl = cluster, c("Albertslund", "Alleroed"), data_func))
When I do this, I get the following error message:
Error in checkForRemoteErrors(val) :
2 nodes produced errors; first error: File 'Adresser og distancer\kommune.csv' does not exist or is non-readable. getwd()=='C:/Users/KSAlb/OneDrive/Dokumenter'
I need it to change the name of the files. Here it should insert Albertslund instead of kommune in the file names, perform the operations, write out a CSV file (changing "final_kommune.csv" to "final_Albertslund.csv"), clear the environment and then move on to the next data set, Alleroed.
Albertslund and Alleroed are just examples, there is a total of 98 data sets I need to process.
Maybe something like the code below can be of help. Untested, since there are no data.
library(parallel)
library(dplyr)
library(data.table)
library(tidyr)
data_func <- function(kommune, inpath = "Adresser og distancer",
turbines, outpath = "D:/Speciale/Analysedata") {
filename <- paste0(kommune, ".csv")
filename <- file.path(inpath, filename)
#Load address and turbine datasets
distances <- fread(
file = filename,
header = TRUE,
sep = ",",
colClasses = c("longitude" = "character", "latitude" = "character", "min_distance" = "character", "distance_turbine" = "character", "id_turbine" = "character"),
encoding = "Latin-1"
)
#Some cleaning of the data and construction of new variables
#write out the dataset
outfile <- paste0("final_", kommune, ".csv")
outfile <- file.path(outpath, outfile)
fwrite(mock_final, file = outfile, row.names = FALSE)
}
cluster = makeCluster(2)
setwd("D:\\Speciale")
# Read turbines file just once
turbines <- fread(
file = "turbines_DK.csv",
header = TRUE,
sep=",",
colClasses = c("lon" = "character", "lat" = "character", "id_turbine" = "character", "total_height" = "character", "location" = "character"),
encoding = "Latin-1"
)
kommune_vec <- c("Albertslund", "Alleroed")
do.call(rbind, parLapply(cl = cluster, kommune_vec, data_func, turbines = turbines))
I am using the RAM package.
The function I use is very simple for diversity index, adding up a column in my metadata ;
outname <-OTU.diversity(data=OTUtables, meta=metatables)
(Arguments: data a list of OTU tables.
meta the metadata to append the outputs)
I am looping it but I get this error:
please provide otu tables as list; see ?RAM.input.formatting
So I go to that help menu and read this:
one data set:
data=list(data=otu)
multiple data sets:
data=list(data1=otu1, data2=otu2, data3=otu3)
here is my code:
i <- 1
for(i in 1:nrow(metadataMasterTax)){
temp <- read.table(paste(metadataMasterTax$DataAnFilePath[i], metadataMasterTax$meta[i], sep = ""),
sep = "\t", header = TRUE, dec = ".", comment.char = "", quote = "", stringsAsFactors = TRUE,
as.is = TRUE)
temp2 <- temp
temp2$row.names <- NULL #to unactivate numbers generated in the margin
trans <- read.table(paste(metadataMasterTax$taxPath[i], metadataMasterTax$taxName[i], sep = ""),
sep = "\t", header = TRUE, dec = ".", comment.char = "", quote = "", stringsAsFactors = TRUE,
as.is = TRUE, check.names = FALSE)
trans2 <- trans
trans2$row.names <- NULL #to unactivate numbers generated in the margin
data=list(data=trans2[i])
temp2[i] <- OTU.diversity(data=trans2[i], meta=temp2[i])
# Error in OTU.diversity(trans2, temp2) :
# please provide otu tables as list; see ?RAM.input.formatting
# is.list(trans2)
# [1] TRUE
# is.list(data)
# [1] TRUE
temp$taxonomy <- temp2$taxonomy
write.table(temp, file=paste(pathDataAn, "diversityDir/", metadataMasterTax$ShortName[i], ".meta.div.tsv", sep = ""),
append = FALSE,
sep = "\t",
row.names = FALSE)
}
Can anyone help me please....
thanks a lot
Because the main problem appears to be getting the OTU.diversity function to work, I focus on this issue. The code snippet below runs OTU.diversity without any problems, using the Google sheets data provided by OP.
library(gsheet)
library(RAM)
for (i in 1:2) {
# Meta data
temp <- as.data.frame(gsheet2tbl("https://drive.google.com/open?id=1hF47MbYZ1MG6RzGW-fF6tbMT3z4AxbGN5sAOxL4E8xM"))
temp$row.names <- NULL
# OTU
trans <- as.data.frame(gsheet2tbl("https://drive.google.com/open?id=1gOaEjDcs58T8v1GA-OKhnUsyRDU8Jxt2lQZuPWo6XWU"))
trans$row.names <- NULL
rownames(temp) <- colnames(trans)[-ncol(trans)]
temp2 <- OTU.diversity(data = list(data = trans), meta = temp)
write.table(temp2,
file = paste0("file", i, ".meta.div.tsv"), # replace
append = FALSE,
sep = "\t",
row.names = FALSE)
}
Replace for (i in 1:2) with for(i in 1:nrow(metadataMasterTax)), as.data.frame(gsheet2tbl(...)) with read.table(...), and the file argument in write.table with the appropriate string.