I have a dataframe dt like below:
dt <- structure(list(Patient = structure(1:5, .Label = c("Sample-3L-AA1B",
"Sample-4N-A93T", "Sample-4T-AA8H", "Sample-5M-AAT4", "Sample-5M-AAT6"), class = "factor"), years = c(1.3013698630137, 0.4, 1.05479452054795,
0.134246575342466, 0.794520547945205), patient_vital_status = c(0L,
0L, 0L, 1L, 1L), `5S_rRNA` = c(0.772491219057, 1.12342309804,
0.283762608812, 0.882492010705, 0.805980084005), `5_8S_rRNA` = c(0,
0, 0, 0, 0), `7SK` = c(0.075067668297, 0, 0, 0.138592705037,
0.210961230042), A1BG = c(0.0282019073358, 0.169523031145, 0.00835845927105,
0.00515484599363, 0.0470792160901)), row.names = c(NA, 5L), class = "data.frame")
I'm trying to apply following code on the required columns and append the each output into a list.
library(survminer)
library(survival)
# vector with the variables to run through
genes <- colnames(dt[4:7])
datalist = data.frame()
for(i in 1:length(genes)){
surv_rnaseq.cut <- surv_cutpoint(
dt,
time = "years",
event = "patient_vital_status",
variables = c(genes[i]))
surv_rnaseq.cat <- surv_categorize(surv_rnaseq.cut)
fit <- survfit(as.formula(paste0("Surv(years, patient_vital_status) ~", genes[i])),
data = surv_rnaseq.cat)
fr <- data.frame(surv_pvalue(fit, surv_rnaseq.cat))
datalist <- rbind(datalist, fr)
}
I got the following error:
Error in parse(text = x, keep.source = FALSE) :
<text>:1:37: unexpected symbol
1: Surv(years, patient_vital_status) ~5S_rRNA
^
I thought may be the error is due to _ symbol in the name. I even removed that and checked but it didn't work.
Related
Is there an easy way to get ASV richness for each Phylum for each Station using the estimate_richness function in phyloseq? Or is there another simple way of extracting the abundance data for each taxonomic rank and calculating richness that way?
So far I have just been subsetting individual Phyla of interest using for example:
ps.Prymnesiophyceae <- subset_taxa(ps, Phylum == "Prymnesiophyceae")
alpha_diversity<-estimate_richness(ps.Prymnesiophyceae,measure=c("Shannon","Observed"))
H<-alpha_diversity$Shannon
S1<-alpha_diversity$Observed
S<-log(S1)
evenness<-H/S
alpha<-cbind(Shannon=H,Richness=S1,Evenness=evenness,sample_data(Prymnesiophyceae))
But this is rather a pain when having to do it for e.g. the top 20 phyla.
EDIT:
suggestion by #GTM works well until last step. See comment + dput:
> dput(head(sample_names(ps.transect), n=2)) c("2-1-DCM_S21_L001_R1_001.fastq", "2-1-SA_S9_L001_R1_001.fastq" )
> dput(head(alpha, n=2)) structure(list(Observed = c(31, 25), Shannon = c(2.84184012598765,
2.53358345702604), taxon = c("Prymnesiophyceae", "Prymnesiophyceae" ), sample_id = c("X2.1.DCM_S21_L001_R1_001.fastq", "X2.1.SA_S9_L001_R1_001.fastq" ), S = c(3.43398720448515,
3.2188758248682), evenness = c(0.827562817437384,
0.787101955736294)), row.names = c("X2.1.DCM_S21_L001_R1_001.fastq", "X2.1.SA_S9_L001_R1_001.fastq"), class = "data.frame")
> dput(head(smpl_data, n=1)) new("sample_data", .Data = list("001_DCM", 125L, structure(1L, .Label = "DCM", class = "factor"), structure(1L, .Label = "Transect", class = "factor"), structure(1L, .Label = "STZ", class = "factor"),
structure(1L, .Label = "STFW", class = "factor"), "Oligotrophic",
16L, -149.9978333, -29.997, 130.634, 17.1252, 35.4443, 1025.835008,
1.1968, 1e-12, 5.387, 2.8469, 52.26978546, 98.0505, 0, 0,
0.02, 0.9, 0, 0, 2069.47, 8.057, 377.3), names = c("Station_neat", "Depth_our", "Depth_bin", "Loc", "Front", "Water", "Zone", "Bottle", "Lon", "Lat", "pressure..db.", "Temperature", "Salinity", "Density_kgm.3", "Fluorescence_ugL", "PAR", "BottleO2_mLL", "CTDO2._mLL", "OxygenSat_.", "Beam_Transmission", "N_umolL", "NO3_umolL", "PO4_umolL", "SIL_umolL", "NO2_umolL", "NH4_umolL", "DIC_uMkg", "pH", "pCO2_matm"), row.names = "2-1-DCM_S21_L001_R1_001.fastq",
.S3Class = "data.frame")
You can wrap your code in a for loop to do so. I've slightly modified your code to make it a bit more flexible, see below.
require("phyloseq")
require("dplyr")
# Calculate alpha diversity measures for a specific taxon at a specified rank.
# You can pass any parameters that you normally pass to `estimate_richness`
estimate_diversity_for_taxon <- function(ps, taxon_name, tax_rank = "Phylum", ...){
# Subset to taxon of interest
tax_tbl <- as.data.frame(tax_table(ps))
keep <- tax_tbl[,tax_rank] == taxon_name
keep[is.na(keep)] <- FALSE
ps_phylum <- prune_taxa(keep, ps)
# Calculate alpha diversity and generate a table
alpha_diversity <- estimate_richness(ps_phylum, ...)
alpha_diversity$taxon <- taxon_name
alpha_diversity$sample_id <- row.names(alpha_diversity)
return(alpha_diversity)
}
# Load data
data(GlobalPatterns)
ps <- GlobalPatterns
# Estimate alpha diversity for each phylum
phyla <- get_taxa_unique(ps,
taxonomic.rank = 'Phylum')
phyla <- phyla[!is.na(phyla)]
alpha <- data.frame()
for (phylum in phyla){
a <- estimate_diversity_for_taxon(ps = ps,
taxon_name = phylum,
measure = c("Shannon", "Observed"))
alpha <- rbind(alpha, a)
}
# Calculate the additional alpha diversity measures
alpha$S <- log(alpha$Observed)
alpha$evenness <- alpha$Shannon/alpha$S
# Add sample data
smpl_data <- as.data.frame(sample_data(ps))
alpha <- left_join(alpha,
smpl_data,
by = c("sample_id" = "X.SampleID"))
This is a reproducible example with GlobalPatterns. Make sure to alter the code to match your data by replacing X.SampleID in the left join with the name of the column that contains the sample IDs in your sample_data. If there is no such column, you can create it from the row names:
smpl_data <- as.data.frame(sample_data(ps))
smpl_data$sample_id < row.names(smpl_data)
alpha <- left_join(alpha,
smpl_data,
by = c("sample_id" = "sample_id"))
Based on the sample data and code below, I am getting an error while running GWmodel::gwss function to compute the spatial Pearson autocorrelation values and plot it on a map.
Purpose:
To see if there is correlation between AvgTMin and AvgPpt and plot correlation values spatially as a map.
Error from the gwss function:
Error in gw_weight_mat(vdist, bw, kernel, adaptive) :
Expecting a single value: [extent=0].
In addition: Warning message:
In proj4string(data) :
CRS object has comment, which is lost in output; in tests, see
https://cran.r-project.org/web/packages/sp/vignettes/CRS_warnings.html
What could be causing the issue and how can I fix this?
Please note that the dataset is gridded, what that means is that both AvgTMin and AvgPpt have the same coordinates. So, if I am using the wrong function to compute the spatial autocorrelation values between these two variables, kindly suggest an appropriate approach.
Desired Output (screenshot provided from another similar analysis with different variables and non-grid point data):
Sample Data (dummy_sp):
new("SpatialMultiPointsDataFrame", data = structure(list(AvgTMin = c(66.8129691710839,
65.7261495113373, 69.0089753627777, 67.0250748157501, 66.838925075531
), AvgPpt = c(186.947467290438, 111.889091470025, 114.544090270996,
154.866916656494, 122.16354282697), year = 1980:1984, CITYNAME = c("a",
"a", "a", "a", "a")), class = "data.frame", row.names = c(NA,
5L)), coords = list(structure(c(-80.2499999994118, -80.1666666660779,
26.3333333333333, 26.25), dim = c(2L, 2L)), structure(c(-80.2499999994118,
-80.1666666660779, 26.3333333333333, 26.25), dim = c(2L, 2L)),
structure(c(-80.2499999994118, -80.1666666660779, 26.3333333333333,
26.25), dim = c(2L, 2L)), structure(c(-80.2499999994118,
-80.1666666660779, 26.3333333333333, 26.25), dim = c(2L,
2L)), structure(c(-80.2499999994118, -80.1666666660779, 26.3333333333333,
26.25), dim = c(2L, 2L))), bbox = structure(c(-80.2499999994118,
26.25, -80.1666666660779, 26.3333333333333), dim = c(2L, 2L), dimnames = list(
NULL, c("min", "max"))), proj4string = new("CRS", projargs = "+proj=longlat +datum=WGS84 +no_defs"))
Code:
library(GWmodel)
##=============================================================
#quick.map function
##=============================================================
quick.map_function = function(spdf,var,legend.title,main.title)
{
x = spdf#data[,var]
cut.vals = pretty(x)
x.cut = cut(x,cut.vals)
cut.levels = levels(x.cut)
8
cut.band = match(x.cut,cut.levels)
colors = brewer.pal(length(cut.levels), "OrRd")
par(mar=c(1,1,1,1))
plot(Columbus,col="gray10",border="gray50", bg="gray50")
title(main.title)
plot(spdf,add=TRUE,col=colors[cut.band],pch=16, cex=0.7)
legend("topleft",cut.levels,col=colors,pch=16,bty="n",title=legend.title, cex=1)
}
# Using the gwss function
gwss.col = gwss(dummy_sp,
vars = c("AvgTMin", "AvgPpt"),
adaptive = T,
bw = 1,
kernel = "Gaussian")
# Plot the AvgTMin and AvgPpt coefficients results
quick.map_exam2(gwss.col$SDF,"Corr_AcgTMin.AvgPpt","Coefficients","Geographically Weighted Pearson Correlation")
I have a functions which yields 2 dataframes. As functions can only return one object, I combined these dataframes as a list. However, I need to work with both dataframes separately. Is there a way to automatically split the list into the component dataframes, or to write the function in a way that both objects are returned separately?
The function:
install.packages("plyr")
require(plyr)
fun.docmerge <- function(x, y, z, crit, typ, doc = checkmerge) {
mergedat <- paste(deparse(substitute(x)), "+",
deparse(substitute(y)), "=", z)
countdat <- nrow(x)
check_t1 <- data.frame(mergedat, countdat)
z1 <- join(x, y, by = crit, type = typ)
countdat <- nrow(z1)
check_t2 <- data.frame(mergedat, countdat)
doc <- rbind(doc, check_t1, check_t2)
t1<-list()
t1[["checkmerge"]]<-doc
t1[[z]]<-z1
return(t1)
}
This is the call to the function, saving the result list to the new object results.
results <- fun.docmerge(x = df1, y = df2, z = "df3", crit = c("id"), typ = "left")
In the following sample data to replicate the problem:
df1 <- structure(list(id = c("XXX1", "XXX2", "XXX3",
"XXX4"), tr.isincode = c("ISIN1", "ISIN2",
"ISIN3", "ISIN4")), .Names = c("id", "isin"
), row.names = c(NA, 4L), class = "data.frame")
df2 <- structure(list(id= c("XXX1", "XXX5"), wrong= c(1L,
1L)), .Names = c("id", "wrong"), row.names = 1:2, class = "data.frame")
checkmerge <- structure(list(mergedat = structure(integer(0), .Label = character(0), class = "factor"),
countdat = numeric(0)), .Names = c("mergedat", "countdat"
), row.names = integer(0), class = "data.frame")
In the example, a list with the dataframes df3 and checkmerge are returned. I would need both dataframes separately. I know that I could do it via manual assignment (e.g., checkmerge <- results$checkmerge) but I want to eliminate manual changes as much as possible and am therefore looking for an automated way.
I am learning the use of the ifelse function from Zuur et al (2009) A Beginners guide to R. In one exercise, there is a data frame called Owls which contains data about about 27 nests and two night of observations.
structure(list(Nest = structure(c(1L, 1L, 1L, 1L), .Label = "AutavauxTV", class = "factor"),
FoodTreatment = structure(c(1L, 2L, 1L, 1L), .Label = c("Deprived",
"Satiated"), class = "factor"), SexParent = structure(c(1L,
1L, 1L, 1L), .Label = "Male", class = "factor"), ArrivalTime = c(22.25,
22.38, 22.53, 22.56), SiblingNegotiation = c(4L, 0L, 2L,
2L), BroodSize = c(5L, 5L, 5L, 5L), NegPerChick = c(0.8,
0, 0.4, 0.4)), .Names = c("Nest", "FoodTreatment", "SexParent",
"ArrivalTime", "SiblingNegotiation", "BroodSize", "NegPerChick"
), row.names = c(NA, 4L), class = "data.frame")
The two nights differed as to the feeding regime (satiated or deprived) and are indicated in the Foodregime variable. The task is to use ifelse and past functions that make a new categorical variable that defines observations from a single night at a particular nest.
In the solutions the following code is suggested:
Owls <- read.table(file = "Owls.txt", header = TRUE, dec = ".")
ifelse(Owls$FoodTreatment == "Satiated", Owls$NestNight <- paste(Owls$Nest, "1",sep = "_"), Owls$NestNight <- paste(Owls$Nest, "2",sep = "_"))
and apparently it creates a new variable with values the endings of which vary ("-1" or "-2")
however when I call the original dataframe, all "-1" endings in the NestNight variable disappears and are turned to "-2."
Why does this happen? Did the authors miss something from the code or it's me who is not getting it?
Many thanks
EDIT: Sorry, I wanted to give a reproducible example by copying my data using dput but it did not work. If you can let me know how I can correct it so that it appears properly, I'd be grateful too!
Solution
If you do the assignment outside the ifelse structure, it works:
Owls$NestNight <- ifelse(Owls$FoodTreatment == "Satiated",
paste(Owls$Nest, "1",sep = ""),
paste(Owls$Nest, "2",sep = ""))
Explanation
What happens in your case is simply if you would execute the following two lines:
Owls$NestNight <- paste(Owls$Nest, "1",sep = "")
Owls$NestNight <- paste(Owls$Nest, "2",sep = "")
You first assign paste(Owls$Nest, "1",sep = "") to Owls$NestNight and then you reassign paste(Owls$Nest, "2",sep = "") to it. The ifelse is not affected by this, but you don't assign it's result to any variable.
Maybe it is more clear if you test this simple code:
c(a <- 1:5, a <- 6:10) #c is your ifelse, a is your Owls$NestNight
a #[1] 6 7 8 9 10
This may be a very simple question, but I don't see how to answer it.
I have the following reproducible code, where I have two small dataframes that I use to calculate a percentage value based on each column total:
#dataframe x
x <- structure(list(PROV = structure(c(1L, 1L), .Label = "AG", class = "factor"),
APT = structure(1:2, .Label = c("AAA", "BBB"), class = "factor"),
PAX.2013 = c(5L, 4L), PAX.2014 = c(4L, 2L), PAX.2015 = c(4L,0L)),
.Names = c("PROV", "APT", "PAX.2013", "PAX.2014", "PAX.2015"),
row.names = 1:2, class = "data.frame")
#dataframe y
y <- structure(list(PROV = structure(c(1L, 1L), .Label = "AQ", class = "factor"),
APT = structure(1:2, .Label = c("CCC", "AAA"), class = "factor"),
PAX.2013 = c(3L, 7L), PAX.2014 = c(2L, 1L), PAX.2015 = c(0L,3L)),
.Names = c("PROV", "APT", "PAX.2013", "PAX.2014", "PAX.2015"),
row.names = 1:2, class = "data.frame")
#list z (with x and y)
z <- list(x,y)
#percentage value of x and y based on columns total
round(prop.table(as.matrix(z[[1]][3:5]), margin = 2)*100,1)
round(prop.table(as.matrix(z[[2]][3:5]), margin = 2)*100,1)
as you can see, it works just fine.
Now I want to automate for all the list, but I can't figure out how to get the results. This is my simple code:
#for-loop that is not working
for (i in length(z))
{round(prop.table(as.matrix(z[[i]][3:5]), margin = 2)*100,1)}
You have two problems.
First, you have not put a range into your for loop so you are just trying to iterate over a single number and second, you are not assigning your result anywhere on each iteration.
Use 1:length(z) to define a range. Then assign the results to a variable.
This would work:
my_list <- list()
for (i in 1:length(z)){
my_list[[i]] <- round(prop.table(as.matrix(z[[i]][3:5]),
margin = 2)*100,1)
}
my_list
But it would be more efficient and idiomatic to use lapply:
lapply(1:length(z),
function(x) round(prop.table(as.matrix(z[[x]][3:5]), margin = 2)*100,1))
Barring discussions whether for-loops is the best approach, you had two issues. One, your for loop only iterates over 2 (which is length(z)) instead of 1:2. Two, you need to do something with the round(....) statement. In this solution, I added a print statement.
for (i in 1:length(z)){
print(round(prop.table(as.matrix(z[[i]][3:5]), margin = 2)*100,1))
}