I am trying to make ExpressionSet files for the analysis of RNA-seq data. I simply have a matrix of counts called "exprs", a data.frame of features (genes) called "features" and a data.frame of sample attributes called "phenotypes".
Here is the code I run to import all data into R and create a single "Object" of Expressionset. But it returns an error.
## DE object creation
### importing 3 data files to R first
### Count MATRIX
dataDirectory <- system.file("extdata", package="Biobase")
exprs <- as.matrix(read.table("counts.txt", sep = "\t", header = TRUE, row.names = 1, as.is = TRUE))
class(exprs)
head.matrix(exprs)
str(exprs)
Output:
num [1:40220, 1:20] 12.39 6.37 11.18 10.72 10.65 ...
- attr(*, "dimnames")=List of 2
..$ : chr [1:40220] "ENSG00000000003" "ENSG00000000005" "ENSG00000000419" "ENSG00000000457" ...
..$ : chr [1:20] "sample_1379_PNC" "sample_1360_PA_A" "sample_1412_PNB" "sample_1405_PA_A" ...
### Features data which contains gene names and symbols for each ensembl id (gene) -> DATAFRAME
features <- read.csv("features.txt", sep = "\t")
rownames(features) <- features$ID
features$ID <- NULL
str(features)
Output:
'data.frame': 40223 obs. of 2 variables:
$ Symbol : chr "TSPAN6" "TNMD" "DPM1" "SCYL3" ...
$ Symbol2: chr "TSPAN6" "TNMD" "DPM1" "SCYL3" ...
### Phenotype data which contains attributes for each sample -> DATAFRAME
phenotypes <- read.csv("phenotypes.txt", sep = "\t")
rownames(phenotypes) <- phenotypes$X1
phenotypes$X1 <- NULL
str(phenotypes)
Output:
'data.frame': 20 obs. of 2 variables:
$ condition: chr "normal" "tumor" "normal" "tumor" ...
$ type : chr "mono" "mono" "poly" "mono" ...
# Load package
library(Biobase)
# Create ExpressionSet object
eset <- ExpressionSet(assayData = exprs,
phenoData = annotatedDataFrameFrom(phenotypes),
featureData = annotatedDataFrameFrom(features))
Output:
Error in (function (classes, fdef, mtable) :
unable to find an inherited method for function ‘annotatedDataFrameFrom’ for signature ‘"character"’
Related
I'm trying to get an API response using a URL that exists in an API data frame I just got, but I'm receiving the error:
"Error: arguments imply differing number of rows"
Does someone now how to fix it?
SCRIPT SO FAR
install.packages("jsonlite", "httr")
library(jsonlite)
library(httr)
### Generating URL and first request
url_deputados <- "https://dadosabertos.camara.leg.br/api/v2/deputados?idLegislatura=57&ordem=ASC&ordenarPor=nome"
get_deputados <- GET(url_deputados)
### Transforming it to text
deputados_text <- content(get_deputados, "text")
deputados_text
### Converting
deputados_json <- fromJSON(deputados_text, flatten = TRUE)
deputados_json
### Transforming it to table
deputados_df <- as.data.frame(deputados_json)
deputados_df
### And removing the two last columns which I don't need
deputados_df <- deputados_df[1:9]
### Now for the secondary requisitions, I'm creating a URL with the Id that is present in the first column of the data frame I just got
url_base <- "``https://dadosabertos.camara.leg.br/api/v2/``"
url_deputados <- "deputados/"
url_id <- deputados_df$dados.id
id_list <- c(url_id)
i <- 1
url <- paste0(url_base, url_deputados, id_list[i])
url
### Up to this point everything works, but I need to make sequential requests so I can GET the info for the next line of the existing data frame
while (i <= 531) {
print("Próxima página encontrada, baixando...")
get_deputados_id <- GET(paste0(url_base, url_deputados, id_list[i]))
deputados_id_text <- content(get_deputados_id, "text")
deputados_id_json <- fromJSON(deputados_id_text, flatten = TRUE)
deputados_id_df <- as.data.frame(deputados_id_json)
i <- i + 1
}
And this is where I receive the message error
When you run into problems at one line in your code, stop and look at the previous results. For instance, for me (since you didn't specify), I'm getting an error here:
deputados_df <- as.data.frame(deputados_json)
# Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE, :
# arguments imply differing number of rows: 532, 3
So ... let's look at deputados_json:
str(deputados_json)
# List of 2
# $ dados:'data.frame': 532 obs. of 9 variables:
# ..$ id : int [1:532] 220593 204379 220714 221328 204560 204528 121948 74646 160508 136811 ...
# ..$ uri : chr [1:532] "https://dadosabertos.camara.leg.br/api/v2/deputados/220593" "https://dadosabertos.camara.leg.br/api/v2/deputados/204379" "https://dadosabertos.camara.leg.br/api/v2/deputados/220714" "https://dadosabertos.camara.leg.br/api/v2/deputados/221328" ...
# ..$ nome : chr [1:532] "Abilio Brunini" "Acácio Favacho" "Adail Filho" "Adilson Barroso" ...
# ..$ siglaPartido : chr [1:532] "PL" "MDB" "REPUBLICANOS" "PL" ...
# ..$ uriPartido : chr [1:532] "https://dadosabertos.camara.leg.br/api/v2/partidos/37906" "https://dadosabertos.camara.leg.br/api/v2/partidos/36899" "https://dadosabertos.camara.leg.br/api/v2/partidos/37908" "https://dadosabertos.camara.leg.br/api/v2/partidos/37906" ...
# ..$ siglaUf : chr [1:532] "MT" "AP" "AM" "SP" ...
# ..$ idLegislatura: int [1:532] 57 57 57 57 57 57 57 57 57 57 ...
# ..$ urlFoto : chr [1:532] "https://www.camara.leg.br/internet/deputado/bandep/220593.jpg" "https://www.camara.leg.br/internet/deputado/bandep/204379.jpg" "https://www.camara.leg.br/internet/deputado/bandep/220714.jpg" "https://www.camara.leg.br/internet/deputado/bandep/221328.jpg" ...
# ..$ email : chr [1:532] "dep.abiliobrunini#camara.leg.br" "dep.acaciofavacho#camara.leg.br" "dep.adailfilho#camara.leg.br" "dep.adilsonbarroso#camara.leg.br" ...
# $ links:'data.frame': 3 obs. of 2 variables:
# ..$ rel : chr [1:3] "self" "first" "last"
# ..$ href: chr [1:3] "https://dadosabertos.camara.leg.br/api/v2/deputados?idLegislatura=57&ordem=ASC&ordenarPor=nome" "https://dadosabertos.camara.leg.br/api/v2/deputados?idLegislatura=57&ordem=ASC&ordenarPor=nome&pagina=1&itens=1000" "https://dadosabertos.camara.leg.br/api/v2/deputados?idLegislatura=57&ordem=ASC&ordenarPor=nome&pagina=1&itens=1000"
(Hint: that's not unambiguously converted into a frame.)
My guess is that you just need to access $dados:
head(deputados_json$dados)
# id uri nome siglaPartido uriPartido siglaUf idLegislatura urlFoto email
# 1 220593 https://dadosabertos.camara.leg.br/api/v2/deputados/220593 Abilio Brunini PL https://dadosabertos.camara.leg.br/api/v2/partidos/37906 MT 57 https://www.camara.leg.br/internet/deputado/bandep/220593.jpg dep.abiliobrunini#camara.leg.br
# 2 204379 https://dadosabertos.camara.leg.br/api/v2/deputados/204379 Acácio Favacho MDB https://dadosabertos.camara.leg.br/api/v2/partidos/36899 AP 57 https://www.camara.leg.br/internet/deputado/bandep/204379.jpg dep.acaciofavacho#camara.leg.br
# 3 220714 https://dadosabertos.camara.leg.br/api/v2/deputados/220714 Adail Filho REPUBLICANOS https://dadosabertos.camara.leg.br/api/v2/partidos/37908 AM 57 https://www.camara.leg.br/internet/deputado/bandep/220714.jpg dep.adailfilho#camara.leg.br
# 4 221328 https://dadosabertos.camara.leg.br/api/v2/deputados/221328 Adilson Barroso PL https://dadosabertos.camara.leg.br/api/v2/partidos/37906 SP 57 https://www.camara.leg.br/internet/deputado/bandep/221328.jpg dep.adilsonbarroso#camara.leg.br
# 5 204560 https://dadosabertos.camara.leg.br/api/v2/deputados/204560 Adolfo Viana PSDB https://dadosabertos.camara.leg.br/api/v2/partidos/36835 BA 57 https://www.camara.leg.br/internet/deputado/bandep/204560.jpg dep.adolfoviana#camara.leg.br
# 6 204528 https://dadosabertos.camara.leg.br/api/v2/deputados/204528 Adriana Ventura NOVO https://dadosabertos.camara.leg.br/api/v2/partidos/37901 SP 57 https://www.camara.leg.br/internet/deputado/bandep/204528.jpg dep.adrianaventura#camara.leg.br
After that, make sure you fix your url_base, It should almost certainly not contain so many backticks.
Finally, you should do the same thing in your while loop:
while (i <= 531) {
get_deputados_id <- GET(paste0(url_base, url_deputados, id_list[i]))
deputados_id_text <- content(get_deputados_id, "text")
deputados_id_json <- fromJSON(deputados_id_text, flatten = TRUE)
# deputados_id_df <- as.data.frame(deputados_id_json)
deputados_id_df <- deputados_id_json$dados
i <- i + 1
}
I'd like to convert a sf object to a dataframe and restore it to its original state. But when I make the conversion of st_as_text(st_sfc(stands_sel$geometry)) is shows very difficult to retrieve it again. In my example:
library(sf)
# get AOI in shapefile
download.file(
"https://github.com/Leprechault/trash/raw/main/sel_stands_CMPC.zip",
zip_path <- tempfile(fileext = ".zip")
)
unzip(zip_path, exdir = tempdir())
# Open the file
setwd(tempdir())
stands_sel <- st_read("sel_stands_CMPC.shp")
st_crs(stands_sel) = 4326
# Extract geometry as text
geom <- st_as_text(st_sfc(stands_sel$geometry))
# Add the features
features <- st_drop_geometry(stands_sel)
str(features)
# Joining feature + geom
geo_df <- cbind(features, geom)
str(geo_df)
# 'data.frame': 2 obs. of 17 variables:
# $ CD_USO_SOL: num 2433 9053
# $ ID_REGIAO : num 11 11
# $ ID_PROJETO: chr "002" "344"
# $ PROJETO : chr "BARBA NEGRA" "CAMPO SECO"
# $ CD_TALHAO : chr "159A" "016A"
# $ CARACTERIS: chr "Plantio Comercial" "Plantio Comercial"
# $ CARACTER_1: chr "Produtivo" "Produtivo"
# $ CICLO : int 2 1
# $ ROTACAO : int 1 1
# $ DATA_PLANT: chr "2008/04/15" "2010/04/15"
# $ LOCALIDADE: chr "BARRA DO RIBEIRO" "DOM FELICIANO"
# $ ESPACAMENT: chr "3.00 x 2.50" "3.5 x 2.14"
# $ ESPECIE : chr "SALIGNA" "DUNNI"
# $ SISTEMA_PR: chr "MACRO ESTACA - EUCALIPTO" "SEMENTE - EUCALIPTO"
# $ VLR_AREA : num 8.53 28.07
# $ ID_UNIQUE : chr "BARBANEGRA159A" "CAMPOSECO016A"
# $ geom : chr "MULTIPOLYGON (((-51.21423 -30.35172, -51.21426 -30.35178, -51.2143 -30.35181, -51.21432 -30.35186, -51.21433 -3"| __truncated__
# Return to original format again
stands_sf <- geo_df %>%
st_geometry(geom) %>%
sf::st_as_sf(crs = 4326)
#Error in UseMethod("st_geometry") :
Please, any help to restore my stands_sf object to the orinal state?
I think geom isn't in a format st_geometry is expecting. st_as_text converted your geometry into WKT as discussed in the help:
The returned WKT representation of simple feature geometry conforms to the simple features access specification and extensions, known as EWKT, supported by PostGIS and other simple features implementations for addition of SRID to a WKT string.
https://r-spatial.github.io/sf/reference/st_as_text.html
Instead, use st_as_sf(wkt=) to set the new (old) geometry.
st_as_sf(geo_df, wkt = "geom", crs = 4326)
I'd like to create a BigQuery table with geoJSON files, despite the geoJSONis an accepted format in BQ (NEWLINE_DELIMITED_JSON) and bq_fields specification, or something coercible to it (like a data frame) the function bq_table_create() of the bigrquery package doesn't work. In my example below the output error is Erro: Unsupported type: list:
library(sf)
library(bigrquery)
library(DBI)
library(googleAuthR)
library(geojsonsf)
library(geojsonR)
# Convert shapefile to geoJSON
stands_sel <- st_read(
"D:/Dropbox/Stinkbug_Ml_detection_CMPC/dashboard/v_08_CMPC/sel_stands_CMPC.shp")
# Open as geoJSON
geo <- sf_geojson(stands_sel)
# Convert geoJSON to data frame
geo_js_df <- as.data.frame(geojson_wkt(geo))
str(geo_js_df)
# 'data.frame': 2 obs. of 17 variables:
# $ SISTEMA_PR: chr "MACRO ESTACA - EUCALIPTO" "SEMENTE - EUCALIPTO"
# $ ESPECIE : chr "SALIGNA" "DUNNI"
# $ ID_UNIQUE : chr "BARBANEGRA159A" "CAMPOSECO016A"
# $ CICLO : num 2 1
# $ LOCALIDADE: chr "BARRA DO RIBEIRO" "DOM FELICIANO"
# $ ROTACAO : num 1 1
# $ CARACTER_1: chr "Produtivo" "Produtivo"
# $ VLR_AREA : num 8.53 28.07
# $ ID_REGIAO : num 11 11
# $ CD_USO_SOL: num 2433 9053
# $ DATA_PLANT: chr "2008/04/15" "2010/04/15"
# $ ID_PROJETO: chr "002" "344"
# $ CARACTERIS: chr "Plantio Comercial" "Plantio Comercial"
# $ PROJETO : chr "BARBA NEGRA" "CAMPO SECO"
# $ ESPACAMENT: chr "3.00 x 2.50" "3.5 x 2.14"
# $ CD_TALHAO : chr "159A" "016A"
# $ geometry :List of 2
# ..$ : 'wkt' chr "MULTIPOLYGON (((-51.2142 -30.3517,-51.2143 -30.3518,-51.2143 -30.3518,-51.2143 -30.3519,-51.2143 -30.3519,-51.2"| __truncated__
# ..$ : 'wkt' chr "MULTIPOLYGON (((-52.3214 -30.4271,-52.3214 -30.4272,-52.3214 -30.4272,-52.3215 -30.4272,-52.3215 -30.4272,-52.3"| __truncated__
# - attr(*, "wkt_column")= chr "geometry"
# Insert information inside BQ
bq_conn <- dbConnect(bigquery(),
project = "my-project",
use_legacy_sql = FALSE
)
# First create the table
players_table = bq_table(project = "my-project", dataset = "stands_ROI_2021", table = "CF_2021")
bq_table_create(x = players_table, fields = as_bq_fields(geo_js_df))
Erro: Unsupported type: list
You can upload data frame with a list-type column on BigQuery by using bq_table_upload() syntax. Try this on your script instead of bq_table_create(),
bq_table_upload(players_table, geo_js_df)
For your reference, I tried this on my end using this sample data with a list-type column:
d <- data.frame(id = 1:2,
name = c("Jon", "Mark"),
children = I(list(c("Mary", "James"),
c("Greta", "Sally")))
)
R console:
Created BQ table:
EDIT:
As per this documentation, FeatureCollection is not yet supported in BigQuery, however there is an ongoing feature request you can find here. Workaround is to convert the GeoJson file to BigQuery new-line-delimited JSON before converting it to dataframe.
To convert GeoJson file to BigQuery new-line-delimited JSON, follow these steps:
Install node.js.
Add packages:
npm install fs JSONStream line-input-stream yargs
Clone the github repository:
git clone https://github.com/mentin/geoscripts.git
Change directory:
cd geoscripts/geojson2bq/
Convert GeoJson file to BigQuery new-line-delimited JSON:
node geojson2bqjson.js sel_stands.geojson > out.json
Using the new-line-delimited JSON file, convert this to dataframe in the R console, then use bq_table_upload() to upload the data to BigQuery.
library(bigrquery)
library(dplyr)
library(tidyverse)
library(jsonlite)
out <- stream_in(file('out.json'))
projectid<-"my-project"
datasetid<-"my-dataset"
bq_conn <- dbConnect(bigquery(),
project = projectid,
dataset = datasetid,
use_legacy_sql = FALSE)
players_table = bq_table(project = "my-project", dataset = "my-dataset", table = "CF_2021_test5")
bq_table_upload(players_table, out)
bq_table_download(players_table)
R console:
BigQuery table:
I am working with "rehh" package of R.
I create an object chr21 of class haplohh from data2haplohh function of the package.
Now when I try to write it to a file:
write.table(chr21, file = "CHR21", append = FALSE, quote = TRUE,sep = "\t", eol="\n", na= "NA", dec=".", row.names=TRUE, col.names=TRUE)
The error I get is:
Error in as.data.frame.default(x[[i]], optional = TRUE) :
cannot coerce class "structure("haplohh", package = "rehh")" to a data.frame
Also when I try to print first 10 rows of chr21,
head(chr21, n=10)
I get this error:
Error in x[seq_len(n)] : object of type 'S4' is not subsettable
OK so am adding the output of str(chr21):
str(chr21)
Formal class 'haplohh' [package "rehh"] with 6 slots
..# haplo : num [1:10, 1:1010554] 0 2 2 2 0 2 0 2 0 2 ...
..# position: num [1:1010554] 9411410 9411645 9411785 9412503 9413228 ...
..# snp.name: chr [1:1010554] "rs78200054" "rs71235074" "rs71235075" "rs71220884" ...
..# chr.name: chr "21"
..# nhap : int 10
..# nsnp : int 1010554
I am a newbie in R, It would be really great If I could get to know where I am going wrong and how to fix this error.
Thanks in advance!
library(rehh)
#Copy example files in the current working directory.
make.example.files()
#Chreate some sampel data
chr12<-data2haplohh(hap_file="bta12_hapguess_switch.out",map_file="map.inp",
min_maf=0.05,popsel=7,chr.name=12,recode.allele=TRUE)
# Look at the structure of the object (in your case it is called chr21)
str(chr12)
Formal class 'haplohh' [package "rehh"] with 6 slots
..# haplo : num [1:280, 1:1202] 2 2 1 2 2 2 1 2 2 2 ...
..# position: num [1:1202] 79823 125974 175087 219152 256896 ...
..# snp.name: chr [1:1202] "F1200140" "F1200150" "F1200170" "F1200180" ...
..# chr.name: chr "12"
..# nhap : int 280
..# nsnp : int 1202
You can extract various components from this object:
# Extract data matrix from it
haplo.matrix <- chr12#haplo
# Extract position
pos <- chr12#position
head(pos)
#[1] 79823 125974 175087 219152 256896 316254
If you need to get data back into a dataframe format you can do the following:
df <- data.frame(chr=chr12#chr.name, snp.name=chr12#snp.name, position=chr12#position, stringsAsFactors=FALSE)
df <- cbind(df, t( chr12#haplo))
Once this is done, you can use head() and other regular R functions.
However if you need to apply the functions from rehh package you should use original chr21 object
I am trying to find a way to retrieve data from Harvard Dataverse website through R. I am using "dataverse" and "dvn" packages, among others. Many of the data files end with ".tab", although they are not formatted as normal tab-delimited text.
I have done this:
library(dataverse)
## 01. Using the dataverse server and making a search
Sys.setenv("DATAVERSE_SERVER" ="dataverse.harvard.edu")
## 02. Loading the dataset that I chose, by url
doi_url <- "https://doi.org/10.7910/DVN/ZTCWYQ"
my_dataset <- get_dataset(doi_url)
## 03. Grabbing the first file of the dataset
## which is named "001_AppendixC.tab"
my_files <- my_dataset$files$label
my_file <- get_file(my_files[1], doi_url)
AppendixC <- tempfile()
writeBin(my_file, AppendixC)
read.table(AppendixC)
> Error in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
> line 1 did not have 2 elements
> In addition: Warning message:
> In read.table(AppendixC) :
> line 1 appears to contain embedded nulls
Any hint?
The problem is that dataverse::get_file() returns the file in a raw binary format. The easiest way to load it into memory is to write it to a tempfile with writeBin() and then read that file with the appropriate import/read function.
Here is a function that should automagically read it into memory:
# Uses rio, which automatically chooses the appropriate import/read
# function based on file type.
library(rio)
install_formats() # only needs to run once after
# pkg installation
load_raw_file <- function(raw, type) {
match.arg(
arg = type,
choices = c(
"csv", "tab", "psc", "tsv", "sas7bdat",
"sav", "dta", "xpt", "por", "xls", "xlsx",
"R", "RData", "rda", "rds", "rec", "mtb",
"feather", "csv.gz", "fwf"
)
)
tmp <- tempfile(fileext = paste0(".", type))
writeBin(as.vector(raw), tmp)
out <- import(tmp)
unlink(tmp)
out
}
Let's try it out with your file, which is a an excel file.
library(dataverse)
raw <- get_file(
"001_AppendixC.tab",
"https://doi.org/10.7910/DVN/ZTCWYQ"
)
data <- load_raw_file(raw, "xlsx")
And look at the data:
str(data)
> 'data.frame': 132 obs. of 17 variables:
> $ Country : chr "Afghanistan" "Albania" "Algeria" "Angola" ...
> $ UN_9193 : chr "37.4" "7.7" "9.1" "65.400000000000006" ...
> $ UN_9901 : chr "46.1" "7.2" "10.7" "50" ...
> $ UN_0709 : chr "24.6" "9.6999999999999993" "7.5" "23.7" ...
> $ UN_1416 : chr "23" "4.9000000000000004" "4.5999999999999996" "14" ...
> $ stu90_94 : chr "51.3" "37.200000000000003" "22.9" "52.9" ...
> $ stu98_02 : chr "54.7" "39.200000000000003" "23.6" "47.1" ...
> $ stu06_10 : chr "51.3" "23.1" "13.2" "29.2" ...
> $ stu12_16 : chr "40.9" "17.899999999999999" "11.7" "37.6" ...
> $ wast90_94: chr "11.5" "9.4" "7.1" "7.9" ...
> $ wast98_02: chr "13.4" "12.2" "3.1" "8.6999999999999993" ...
> $ wast06_10: chr "8.9" "9.4" "4.0999999999999996" "8.1999999999999993" ...
> $ wast12_16: chr "9.5" "6.2" "4.0999999999999996" "4.9000000000000004" ...
> $ UM1992 : chr "16.8" "3.7" "4.5" "22.6" ...
> $ UM2000 : chr "13.7" "2.6" "4" "21.7" ...
> $ UM2008 : chr "11" "1.8" "2.9" "19.2" ...
> $ UM2015 : chr "9.1" "1.4" "2.6" "15.7" ...