Column names shift to left on read.table or read.csv - r

Originally I have this TSV file (sample):
name type qty
cxfm 1C 0
d2 H50 2
g3g 1G 2
hb E37 1
nlx E45 4
so I am using read.csv to read data from a .tsv file but I always get this output:
name type qty
1 cxfm 1C 0
2 d2 H50 2
3 g3g 1G 2
4 hb E37 1
5 nlx E45 4
instead of getting this one:
name type qty
1 cxfm 1C 0
2 d2 H50 2
3 g3g 1G 2
4 hb E37 1
5 nlx E45 4
Any ideas this? this is what I am using to read the files:
file_list<-list.files()
for (file in file_list){
if (!exists("dataset")){
dataset <- read.table(file, header = TRUE, sep = "\t", row.names = NULL, blank.lines.skip = TRUE, fill = TRUE)
names(dataset) <- c("rowID", names(dataset)[1:ncol(dataset)-1])
}
if (exists("dataset")){
temp_dataset <- read.table(file, header = TRUE, sep = "\t", row.names = NULL, blank.lines.skip = TRUE, fill = TRUE)
names(temp_dataset) <- c("rowID", names(temp_dataset)[1:ncol(temp_dataset)-1])
dataset <- rbind(dataset, temp_dataset)
rm(temp_dataset)
}
}
dataset <- unique(dataset)
write.table(dataset, file = "dataset.tsv", sep = "\t")

There appears to be a missing column header in your source CSV file. One option here would be to leave your read.csv() call as it is and simply adjust the names of the resulting data frame:
df <- read.csv(file,
header = TRUE,
sep = "\t",
row.names = NULL,
blank.lines.skip = TRUE,
fill = TRUE,
comment.char = "",
quote = "", stringsAsFactors = FALSE)
names(df) <- c("rowID", names(df)[1:ncol(df)-1])

This is what I had to do to Fix it: set row.names to FALSE
write.table(dataset, file = "data.tsv", sep = "\t", row.names = FALSE)

Related

write a data.frame to an R file

I wish to write a data.frame from one R file to another R file using base R. So far I have tried cat, capture.output, write.table and sink. I found suggested solutions for capture.output and write.table here:
writing a data.frame using cat
However, I have not been able to obtain an ideal solution. write.table comes the closest but returns an unwanted warning message.
Here is the data.frame in the source R file:
my.df <- data.frame(scenario = 3333,
AA = 200,
BB = 999,
CC = 444,
DD = 7)
Here is the desired appearance in the recipient R file except I want the name to be my.df, not desired.format:
desired.format <- read.table(text = '
scenario AA BB CC DD
3333 200 999 444 7
', header = TRUE, stringsAsFactors = FALSE)
Here is the full code for the source R file except for the setwd() statement:
R.file <- 'my_R_file.R'
cat(' ' , file = R.file, sep=c("\n") )
cat('This is my stuff' , file = R.file, sep=c("\n"), append = TRUE)
cat('#' , file = R.file, sep=c("\n"), append = TRUE)
cat(' ' , file = R.file, sep=c("\n"), append = TRUE)
my.df <- data.frame(scenario = 3333,
AA = 200,
BB = 999,
CC = 444,
DD = 7)
str(my.df)
# Desired format in my_R_file.R
desired.format <- read.table(text = '
scenario AA BB CC DD
3333 200 999 444 7
', header = TRUE, stringsAsFactors = FALSE)
str(desired.format)
# capture.output includes an unwanted row number
cat(' ' , file = R.file, sep=c("\n"), append = TRUE)
cat('capture.output' , file = R.file, sep=c("\n"), append = TRUE)
capture.output(my.df, file = R.file, append = TRUE)
cat(' ' , file = R.file, sep=c("\n"), append = TRUE)
# write.table returns an unwanted warning message
cat('write.table' , file = R.file, sep=c("\n"), append = TRUE)
cat('my.df <- read.table(text = \'' , file = R.file, sep=c("\n"), append = TRUE)
write.table(my.df, file = R.file, col.names = TRUE, row.names = FALSE, quote = FALSE, append=TRUE)
cat('\', header = TRUE, stringsAsFactors = FALSE)' , file = R.file, sep=c("\n"), append = TRUE)
cat(' ' , file = R.file, sep=c("\n"), append = TRUE)
# sink does not return any useful output
#cat('sink' , file = R.file, sep=c("\n"), append = TRUE)
#sink(R.file)
#sink()
#my.df
#sink()
#cat(' ' , file = R.file, sep=c("\n"), append = TRUE)
cat('This is the end' , file = R.file, sep=c("\n"), append = TRUE)
cat(' ' , file = R.file, sep=c("\n"), append = TRUE)
Here are the full contents of the recipient R file my_R_file.R:
This is my stuff
#
capture.output
scenario AA BB CC DD
1 3333 200 999 444 7
write.table
my.df <- read.table(text = '
scenario AA BB CC DD
3333 200 999 444 7
', header = TRUE, stringsAsFactors = FALSE)
This is the end
Here is the warning message returned by write.table:
Warning message:
In write.table(my.df, file = R.file, col.names = TRUE, row.names = FALSE, :
appending column names to file
Thank you for any suggestions on eliminating this warning message or arriving at a better solution. I would rather not suppress all warning messages.
Notice that there is base:::print.data.frame method involved when evaluating my.df which is of class "data.frame". It has arguments such as row.names=. Accordingly you may specify:
capture.output(print(my.df, row.names=FALSE), file=R.file, append=TRUE)

data.table::fread error when converting MAF files to data table

I want to merge the 50 MAF files with the sample information so that I can read it as a data.table and subset it.
library(maftools)
# Load MAF files
maf = system.file("extdata", list.files(path="mafs/"), package="maftools")
# Load sample information
si <- system.file("extdata", "sample-information.tsv", package="maftools")
d = read.maf(maf=maf, clinicalData=si)
Traceback:
Error in data.table::fread(file = maf, sep = "\t", stringsAsFactors = FALSE, :
File '' does not exist or is non-readable. getwd()=='C:/Users/User/Documents/VanAllen'
> traceback()
3: stop("File '", file, "' does not exist or is non-readable. getwd()=='",
getwd(), "'")
2: data.table::fread(file = maf, sep = "\t", stringsAsFactors = FALSE,
verbose = FALSE, data.table = TRUE, showProgress = TRUE,
header = TRUE, fill = TRUE, skip = "Hugo_Symbol", quote = "")
1: read.maf(maf = maf, clinicalData = si)
1: data.table::fread(input = maf)
Maftools documentation:
https://www.bioconductor.org/packages/release/bioc/manuals/maftools/man/maftools.pdf
When I run your code, maf indeed points to no character ( "" ), which of course cannot be read by fread. However when I try
fread("R/x86_64-pc-linux-gnu-library/3.6/maftools/extdata/brca.maf.gz")
it works as expected.

Opening csv-file in R

I want to read a csv-data record into R. I downloaded the script and the data set from SoSci Survey and got the following error message:
Error in scan(file = file, what = what, sep = sep, quote = quote, dec
= dec, : scan() expected 'a logical', got '3'
in the script:
zh = read.table(
file=zh_file, encoding="UTF-8",
header = FALSE, sep = "\t", quote = "\"",
dec = ".", row.names = "CASE",
col.names = c(
"CASE","SERIAL","REF","QUESTNNR","MODE","LANGUAGE","STARTED","ZH02","ZH03",
"ZH19","ZH19_03","ZH04","ZH05","ZH08_01","ZH08_02","ZH08_03","ZH08_04",
"ZH08_05","ZH08_06","ZH09_01","ZH09_02","ZH11_01","ZH11_02","ZH11_03","ZH11_04",
"ZH13_01","ZH13_02","ZH13_03","ZH13_04","ZH13_05","ZH14","ZH14_01","ZH14_02",
"ZH14_03","ZH14_04","ZH14_05","ZH14_06","ZH14_07","ZH14_09","ZH14_08",
"ZH14_08a","ZH15","ZH15_01","ZH15_02","ZH15_03","ZH15_04","ZH15_05","ZH15_06",
"ZH15_07","ZH15_08","ZH15_09","ZH15_09a","ZH16","ZH16_01","ZH16_02","ZH16_03",
"ZH16_04","ZH16_05","ZH16_06","ZH16_07","ZH16_08","ZH16_09","TIME001","TIME002",
"TIME003","TIME004","TIME005","TIME006","TIME007","TIME008","TIME009","TIME010",
"TIME011","TIME012","TIME013","TIME014","TIME015","TIME016","TIME017",
"TIME_SUM","MAILSENT","LASTDATA","FINISHED","Q_VIEWER","LASTPAGE","MAXPAGE",
"MISSING","MISSREL","TIME_RSI","DEG_TIME"
),
as.is = TRUE,
colClasses = c(
CASE="numeric", SERIAL="character", REF="character", QUESTNNR="character",
MODE="character", LANGUAGE="character", STARTED="POSIXct", ZH02="numeric",
ZH03="numeric", ZH19="numeric", ZH19_03="character", ZH04="numeric",
ZH05="numeric", ZH08_01="numeric", ZH08_02="numeric", ZH08_03="numeric",
ZH08_04="numeric", ZH08_05="numeric", ZH08_06="numeric", ZH09_01="numeric",
ZH09_02="numeric", ZH11_01="numeric", ZH11_02="numeric", ZH11_03="numeric",
ZH11_04="numeric", ZH13_01="numeric", ZH13_02="numeric", ZH13_03="numeric",
ZH13_04="numeric", ZH13_05="numeric", ZH14="numeric", ZH14_01="logical",
ZH14_02="logical", ZH14_03="logical", ZH14_04="logical", ZH14_05="logical",
ZH14_06="logical", ZH14_07="logical", ZH14_09="logical", ZH14_08="logical",
ZH14_08a="character", ZH15="numeric", ZH15_01="logical", ZH15_02="logical",
ZH15_03="logical", ZH15_04="logical", ZH15_05="logical", ZH15_06="logical",
ZH15_07="logical", ZH15_08="logical", ZH15_09="logical",
ZH15_09a="character", ZH16="numeric", ZH16_01="logical", ZH16_02="logical",
ZH16_03="logical", ZH16_04="logical", ZH16_05="logical", ZH16_06="logical",
ZH16_07="logical", ZH16_08="logical", ZH16_09="logical", TIME001="integer",
TIME002="integer", TIME003="integer", TIME004="integer", TIME005="integer",
TIME006="integer", TIME007="integer", TIME008="integer", TIME009="integer",
TIME010="integer", TIME011="integer", TIME012="integer", TIME013="integer",
TIME014="integer", TIME015="integer", TIME016="integer", TIME017="integer",
TIME_SUM="integer", MAILSENT="POSIXct", LASTDATA="POSIXct",
FINISHED="logical", Q_VIEWER="logical", LASTPAGE="numeric",
MAXPAGE="numeric", MISSING="numeric", MISSREL="numeric", TIME_RSI="numeric",
DEG_TIME="numeric"
),
skip = 1,
check.names = TRUE, fill = TRUE,
strip.white = FALSE, blank.lines.skip = TRUE,
comment.char = "",
na.strings = ""
)
What should I do?
Looking for help!
Have you tried using read.csv("filename.csv",header=T,sep=",") instead of read.table?

hello i want create tsne plot by my dataset in R . but when i write commands i get this error

library(Rtsne)
setwd("n/g")
expression_data <- read.table(file = "zdata.matrix.xlsx", row.names = 1, sep=',', header = T)
meta_data <- read.table(file = "atac_v1_pbmc_10k_singlecell.xlsx", row.names = 1, sep=',', header = T)
tsne_realData <- Rtsne(expression_data, perplexity=10, check_duplicates = FALSE)
# Error in terms.formula(object, data = data) :
# '.' in formula and no 'data' argument
I briefly looked at the Rtsne package documentation and the Rtsne() function requires the data format to be a matrix. Try converting expression_data to a matrix before passing it to the Rtsne function. You can do so like this:
library(Rtsne)
setwd("n/g")
expression_data <- read.table(file = "zdata.matrix.xlsx", row.names = 1, sep=',', header = T)
meta_data <- read.table(file = "atac_v1_pbmc_10k_singlecell.xlsx", row.names = 1, sep=',', header = T)
expression_matrix <- as.matrix(expression_data)
tsne_realData <- Rtsne(expression_matrix, perplexity=10, check_duplicates = FALSE)

R asks for a list which seems to be a list according to is.list (=TRUE)

I am using the RAM package.
The function I use is very simple for diversity index, adding up a column in my metadata ;
outname <-OTU.diversity(data=OTUtables, meta=metatables)
(Arguments: data a list of OTU tables.
meta the metadata to append the outputs)
I am looping it but I get this error:
please provide otu tables as list; see ?RAM.input.formatting
So I go to that help menu and read this:
one data set:
data=list(data=otu)
multiple data sets:
data=list(data1=otu1, data2=otu2, data3=otu3)
here is my code:
i <- 1
for(i in 1:nrow(metadataMasterTax)){
temp <- read.table(paste(metadataMasterTax$DataAnFilePath[i], metadataMasterTax$meta[i], sep = ""),
sep = "\t", header = TRUE, dec = ".", comment.char = "", quote = "", stringsAsFactors = TRUE,
as.is = TRUE)
temp2 <- temp
temp2$row.names <- NULL #to unactivate numbers generated in the margin
trans <- read.table(paste(metadataMasterTax$taxPath[i], metadataMasterTax$taxName[i], sep = ""),
sep = "\t", header = TRUE, dec = ".", comment.char = "", quote = "", stringsAsFactors = TRUE,
as.is = TRUE, check.names = FALSE)
trans2 <- trans
trans2$row.names <- NULL #to unactivate numbers generated in the margin
data=list(data=trans2[i])
temp2[i] <- OTU.diversity(data=trans2[i], meta=temp2[i])
# Error in OTU.diversity(trans2, temp2) :
# please provide otu tables as list; see ?RAM.input.formatting
# is.list(trans2)
# [1] TRUE
# is.list(data)
# [1] TRUE
temp$taxonomy <- temp2$taxonomy
write.table(temp, file=paste(pathDataAn, "diversityDir/", metadataMasterTax$ShortName[i], ".meta.div.tsv", sep = ""),
append = FALSE,
sep = "\t",
row.names = FALSE)
}
Can anyone help me please....
thanks a lot
Because the main problem appears to be getting the OTU.diversity function to work, I focus on this issue. The code snippet below runs OTU.diversity without any problems, using the Google sheets data provided by OP.
library(gsheet)
library(RAM)
for (i in 1:2) {
# Meta data
temp <- as.data.frame(gsheet2tbl("https://drive.google.com/open?id=1hF47MbYZ1MG6RzGW-fF6tbMT3z4AxbGN5sAOxL4E8xM"))
temp$row.names <- NULL
# OTU
trans <- as.data.frame(gsheet2tbl("https://drive.google.com/open?id=1gOaEjDcs58T8v1GA-OKhnUsyRDU8Jxt2lQZuPWo6XWU"))
trans$row.names <- NULL
rownames(temp) <- colnames(trans)[-ncol(trans)]
temp2 <- OTU.diversity(data = list(data = trans), meta = temp)
write.table(temp2,
file = paste0("file", i, ".meta.div.tsv"), # replace
append = FALSE,
sep = "\t",
row.names = FALSE)
}
Replace for (i in 1:2) with for(i in 1:nrow(metadataMasterTax)), as.data.frame(gsheet2tbl(...)) with read.table(...), and the file argument in write.table with the appropriate string.

Resources