Wireframe plot not showing the values [duplicate] - r

This question already has answers here:
R: Plotting a 3D surface from x, y, z
(5 answers)
Closed last year.
I am attempting to complete this wireframe plot, but for however reason it does not show the contents, only the outline of the plot. Below are data and MWE.
What am I doing wrong?
Uncooperative (😉) code:
library(lattice)
wireframe(
data = as.data.frame(a), ncpg12 ~ `S>B` * powg2,
shade = T, aspect = c(1, 1.5, 2), scales = list(arrows = F)
)
Data as wire.csv:
ncpg12,powg2,S>B
9.248631177619243,0.5604918536931173,0.00183393456426873
6.982877749338513,0.3098192715335432,0.01213195467163962
4.701116657737657,0.04999999999999999,0.06639433627763273
5.910427426924798,0.24518128912337697,0.1450446478654616
7.100787486648187,0.410018053934651,0.18041044252588423
6.867220052256016,0.5604978615437913,0.01213195467163977
4.557178781889888,0.3098192714867842,0.03131615691038152
2.256382318979377,0.04999999999999999,0.10386777520189128
3.544175197270306,0.24518128913433623,0.1956947037363749
4.786247297051887,0.4100180539207875,0.23509889193262204
4.47444787576751,0.5604978615358123,0.0663943362776329
2.1492331205663504,0.30981927148918764,0.10386777520189153
0,0.04999999999999999,0.2119180786856567
1.6301662903351826,0.24518128914507187,0.3267560369666459
3.046468068832837,0.4100180539377091,0.37248269710500553
5.731804609367828,0.5604978615356425,0.1450446478654616
3.4916627620168583,0.30981927144570615,0.19569470373637557
1.691074359432605,0.04999999999999999,0.32675603696664685
3.791287623813332,0.24518128912628456,0.4532575404720525
5.415642812939041,0.4100180538993808,0.5011596926023434
6.959612008409749,0.5604978614989732,0.1804104425258848
4.773824852643884,0.30981927143543664,0.2350988919326221
3.149864586948752,0.04999999999999999,0.3724826971050044
5.458051219009576,0.24518128905784495,0.5011596926023425
7.169432426981075,0.4100180538979537,0.5490511783213748
13.743971562458682,0.8475275485721443,0.00146994891912016
9.18463317316673,0.5431311708382447,0.01136908005754479
4.701116657905004,0.04999999999999999,0.06570438744198948
7.351968961351304,0.43391556095132944,0.14532454892964627
9.852982274307578,0.6876116333369463,0.1811890044398896
11.388418467186057,0.8475300860646717,0.01098668767881793
6.766360203153454,0.5431311707908197,0.02979799221541907
2.256382319061231,0.04999999999999999,0.10249311714271682
5.022962834042119,0.4339155609593038,0.19531930144010193
7.600496760083843,0.6876116333470675,0.23521845207420533
8.942010973938523,0.8475300860612255,0.06292370525775945
4.296879777810318,0.5431311707936094,0.10016997191686286
0,0.04999999999999999,0.2086924088558675
3.259433943545446,0.4339155609421589,0.32472076124519667
6.08980889108534,0.6876116333444386,0.37097763013907503
10.033463255787863,0.8475300860611537,0.13933895061160378
5.50333769575991,0.5431311708127886,0.18999713514627453
1.691074359241611,0.04999999999999999,0.3220093974091579
5.616303463568329,0.43391556093219097,0.449989511559725
8.743466363001062,0.687611633342467,0.4984905017778514
11.174209234632144,0.8475300860623024,0.1739366597649558
6.717756648919021,0.5431311708061631,0.22874296954077433
3.149864587112461,0.04999999999999999,0.3672893695383088
7.367578939567466,0.4339155609391685,0.4975672654509311
10.617837423260426,0.6876116333328715,0.5460864990319658

Here's how I would do it.
dat <- read.csv(textConnection("'ncpg12','powg2','S>B',
9.248631177619243,0.5604918536931173,0.00183393456426873
6.982877749338513,0.3098192715335432,0.01213195467163962
4.701116657737657,0.04999999999999999,0.06639433627763273
5.910427426924798,0.24518128912337697,0.1450446478654616
7.100787486648187,0.410018053934651,0.18041044252588423
6.867220052256016,0.5604978615437913,0.01213195467163977
4.557178781889888,0.3098192714867842,0.03131615691038152
2.256382318979377,0.04999999999999999,0.10386777520189128
3.544175197270306,0.24518128913433623,0.1956947037363749
4.786247297051887,0.4100180539207875,0.23509889193262204
4.47444787576751,0.5604978615358123,0.0663943362776329
2.1492331205663504,0.30981927148918764,0.10386777520189153
0,0.04999999999999999,0.2119180786856567
1.6301662903351826,0.24518128914507187,0.3267560369666459
3.046468068832837,0.4100180539377091,0.37248269710500553
5.731804609367828,0.5604978615356425,0.1450446478654616
3.4916627620168583,0.30981927144570615,0.19569470373637557
1.691074359432605,0.04999999999999999,0.32675603696664685
3.791287623813332,0.24518128912628456,0.4532575404720525
5.415642812939041,0.4100180538993808,0.5011596926023434
6.959612008409749,0.5604978614989732,0.1804104425258848
4.773824852643884,0.30981927143543664,0.2350988919326221
3.149864586948752,0.04999999999999999,0.3724826971050044
5.458051219009576,0.24518128905784495,0.5011596926023425
7.169432426981075,0.4100180538979537,0.5490511783213748
13.743971562458682,0.8475275485721443,0.00146994891912016
9.18463317316673,0.5431311708382447,0.01136908005754479
4.701116657905004,0.04999999999999999,0.06570438744198948
7.351968961351304,0.43391556095132944,0.14532454892964627
9.852982274307578,0.6876116333369463,0.1811890044398896
11.388418467186057,0.8475300860646717,0.01098668767881793
6.766360203153454,0.5431311707908197,0.02979799221541907
2.256382319061231,0.04999999999999999,0.10249311714271682
5.022962834042119,0.4339155609593038,0.19531930144010193
7.600496760083843,0.6876116333470675,0.23521845207420533
8.942010973938523,0.8475300860612255,0.06292370525775945
4.296879777810318,0.5431311707936094,0.10016997191686286
0,0.04999999999999999,0.2086924088558675
3.259433943545446,0.4339155609421589,0.32472076124519667
6.08980889108534,0.6876116333444386,0.37097763013907503
10.033463255787863,0.8475300860611537,0.13933895061160378
5.50333769575991,0.5431311708127886,0.18999713514627453
1.691074359241611,0.04999999999999999,0.3220093974091579
5.616303463568329,0.43391556093219097,0.449989511559725
8.743466363001062,0.687611633342467,0.4984905017778514
11.174209234632144,0.8475300860623024,0.1739366597649558
6.717756648919021,0.5431311708061631,0.22874296954077433
3.149864587112461,0.04999999999999999,0.3672893695383088
7.367578939567466,0.4339155609391685,0.4975672654509311
10.617837423260426,0.6876116333328715,0.5460864990319658"), header=TRUE)
names(dat) <- c("ncpg12", "powg2", "SB")
library(rgl)
s <- interp(dat$powg2, dat$SB, dat$ncpg12)
eg <- expand.grid(xind = 1:40,
yind = 1:40)
eg$powg2 <- s$x[eg$xind]
eg$SB <- s$y[eg$yind]
eg$ncpg12 <- c(s$z)
library(lattice)
wireframe(
data = eg,
ncpg12 ~ SB * powg2,
shade = T, aspect = c(1, 1.5, 2), scales = list(arrows = F)
)

Related

Interactively identify 3D object in rgl plot

I want to identify 3d cylinders in an rgl plot to obtain one attribute of the nearest / selected cylinder. I tried using labels to simply spell out the attribute, but I work on data with more than 10.000 cylinders. Therefore, it gets so crowded that the labels are unreadable and it takes ages to render.
I tried to understand the documentation of rgl and I guess the solution to my issue is selecting the cylinder in the plot manually. I believe the function selectpoints3d() is probably the way to go. I believe it returns all vertices within the drawn rectangle, but I don't know how to go back to the cylinder data? I could calculate which cylinder is closest to the mean of the selected vertices, but this seems like a "quick & dirty" way to do the job.
Is there a better way to go? I noticed the argument value=FALSE to get the indices only, but I don't know how to go back to the cylinders.
Here is some dummy data and my code:
# dummy data
cylinder <- data.frame(
start_X = rep(1:3, 2)*2,
start_Y = rep(1:2, each = 3)*2,
start_Z = 0,
end_X = rep(1:3, 2)*2 + round(runif(6, -1, 1), 2),
end_Y = rep(1:2, each = 3)*2 + round(runif(6, -1, 1), 2),
end_Z = 0.5,
radius = 0.25,
attribute = sample(letters[1:6], 6)
)
# calculate centers
cylinder$center_X <- rowMeans(cylinder[,c("start_X", "end_X")])
cylinder$center_Y <- rowMeans(cylinder[,c("start_Y", "end_Y")])
cylinder$center_Z <- rowMeans(cylinder[,c("start_Z", "end_Z")])
# create cylinders
cylinder_list <- list()
for (i in 1:nrow(cylinder)) {
cylinder_list[[i]] <- cylinder3d(
center = cbind(
c(cylinder$start_X[i], cylinder$end_X[i]),
c(cylinder$start_Y[i], cylinder$end_Y[i]),
c(cylinder$start_Z[i], cylinder$end_Z[i])),
radius = cylinder$radius[i],
closed = -2)
}
# plot cylinders
open3d()
par3d()
shade3d(shapelist3d(cylinder_list, plot = FALSE), col = "blue")
text3d(cylinder$center_X+0.5, cylinder$center_Y+0.5, cylinder$center_Z+0.5, cylinder$attribute, color="red")
# get attribute
nearby <- selectpoints3d(value=TRUE, button = "right")
nearby <- colMeans(nearby)
cylinder$dist <- sqrt(
(nearby["x"]-cylinder$center_X)**2 +
(nearby["y"]-cylinder$center_Y)**2 +
(nearby["z"]-cylinder$center_Z)**2)
cylinder$attribute[which.min(cylinder$dist)]
If you call selectpoints3d(value = FALSE), you get two columns. The first column is the id of the object that was found. Your cylinders get two ids each. One way to mark the cylinders is to use "tags". For example, this modification of your code:
# dummy data
cylinder <- data.frame(
start_X = rep(1:3, 2)*2,
start_Y = rep(1:2, each = 3)*2,
start_Z = 0,
end_X = rep(1:3, 2)*2 + round(runif(6, -1, 1), 2),
end_Y = rep(1:2, each = 3)*2 + round(runif(6, -1, 1), 2),
end_Z = 0.5,
radius = 0.25,
attribute = sample(letters[1:6], 6)
)
# calculate centers
cylinder$center_X <- rowMeans(cylinder[,c("start_X", "end_X")])
cylinder$center_Y <- rowMeans(cylinder[,c("start_Y", "end_Y")])
cylinder$center_Z <- rowMeans(cylinder[,c("start_Z", "end_Z")])
# create cylinders
cylinder_list <- list()
for (i in 1:nrow(cylinder)) {
cylinder_list[[i]] <- cylinder3d(
center = cbind(
c(cylinder$start_X[i], cylinder$end_X[i]),
c(cylinder$start_Y[i], cylinder$end_Y[i]),
c(cylinder$start_Z[i], cylinder$end_Z[i])),
radius = cylinder$radius[i],
closed = -2)
# Add tag here:
cylinder_list[[i]]$material$tag <- cylinder$attribute[i]
}
# plot cylinders
open3d()
par3d()
shade3d(shapelist3d(cylinder_list, plot = FALSE), col = "blue")
text3d(cylinder$center_X+0.5, cylinder$center_Y+0.5, cylinder$center_Z+0.5, cylinder$attribute, color="red")
# Don't get values, get the ids
nearby <- selectpoints3d(value=FALSE, button = "right", closest = FALSE)
ids <- nearby[, "id"]
# Convert them to tags. If you select one of the labels, you'll get
# a blank in the list of tags, because we didn't tag the text.
unique(tagged3d(id = ids))
When I was trying this, I found that using closest = TRUE in selectpoints3d seemed to get too many ids; there may be a bug there.

plot(var()) displays two different plots, how do I merge them into one? Also having two y axis

> dput(head(inputData))
structure(list(Date = c("2018:07:00", "2018:06:00", "2018:05:00",
"2018:04:00", "2018:03:00", "2018:02:00"), IIP = c(125.8, 127.5,
129.7, 122.6, 140.3, 127.4), CPI = c(139.8, 138.5, 137.8, 137.1,
136.5, 136.4), `Term Spread` = c(1.580025, 1.89438, 2.020112,
1.899074, 1.470544, 1.776862), RealMoney = c(142713.9916, 140728.6495,
140032.2762, 139845.5215, 139816.4682, 139625.865), NSE50 = c(10991.15682,
10742.97381, 10664.44773, 10472.93333, 10232.61842, 10533.10526
), CallMoneyRate = c(6.161175, 6.10112, 5.912088, 5.902226, 5.949956,
5.925538), STCreditSpread = c(-0.4977, -0.3619, 0.4923, 0.1592,
0.3819, -0.1363)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
I want to make my autoregressive plot like this plot:
#------> importing all libraries
library(readr)
install.packages("lubridtae")
library("lubridate")
install.packages("forecast")
library('ggplot2')
library('fpp')
library('forecast')
library('tseries')
#--------->reading data
inputData <- read_csv("C:/Users/sanat/Downloads/exercise_1.csv")
#--------->calculating the lag=1 for NSE50
diff_NSE50<-(diff(inputData$NSE50, lag = 1, differences = 1)/lag(inputData$NSE50))
diff_RealM2<-(diff(inputData$RealMoney, lag = 1, differences = 1)/lag(inputData$RealMoney))
plot.ts(diff_NSE50)
#--------->
lm_fit = dynlm(IIP ~ CallMoneyRate + STCreditSpread + diff_NSE50 + diff_RealM2, data = inputData)
summary(lm_fit)
#--------->
inputData_ts = ts(inputData, frequency = 12, start = 2012)
#--------->area of my doubt is here
VAR_data <- window(ts.union(ts(inputData$IIP), ts(inputData$CallMoneyRate)))
VAR_est <- VAR(y = VAR_data, p = 12)
plot(VAR_est)
I want to my plots to get plotted together in same plot. How do I serparate the var() plots to two separate ones.
Current plot:
My dataset :
dataset
Okay, so this still needs some work, but it should set the right framework for you. I would look more into working with the ggplot2 for future.
Few extra packages needed, namely library(vars) and library(dynlm).
Starting from,
VAR_est <- VAR(y = VAR_data, p = 12)
Now we extract the values we want from the VAR_est object.
y <- as.numeric(VAR_est$y[,1])
z <- as.numeric(VAR_est$y[,2])
x <- 1:length(y)
## second data set on a very different scale
par(mar = c(5, 4, 4, 4) + 0.3) # Leave space for z axis
plot(x, y, type = "l") # first plot
par(new = TRUE)
plot(x, z, type = "l", axes = FALSE, bty = "n", xlab = "", ylab = "")
axis(side=4, at = pretty(range(z)))
mtext("z", side=4, line=3)
I will leave you to add the dotted lines on etc...
Hint: Decompose the VAR_est object, for example, VAR_est$datamat, then see which bit of data corresponds to the part of the plot you want.
Used some of this

Partial Row Labels Heatmap - R

I was wondering if anyone knows of a package that allows partial row labeling of heatmaps. I am currently using pheatmap() to construct my heatmaps, but I can use any package that has this functionality.
I have plots with many rows of differentially expressed genes and I would like to label a subset of them. There are two main things to consider (that I can think of):
The placement of the text annotation depends on the height of the row. If the rows are too narrow, then the text label will be ambiguous without some sort of pointer.
If multiple adjacent rows are significant (i.e. will be labelled), then these will need to be offset, and again, a pointer will be needed.
Below is an example of a partial solution that really only gets maybe halfway there, but I hope illustrates what I'd like to be able to do.
set.seed(1)
require(pheatmap)
require(RColorBrewer)
require(grid)
### Data to plot
data_mat <- matrix(sample(1:10000, 300), nrow = 50, ncol = 6)
rownames(data_mat) <- paste0("Gene", 1:50)
colnames(data_mat) <- c(paste0("A", 1:3), paste0("B", 1:3))
### Set how many genes to annotate
### TRUE - make enough labels that some overlap
### FALSE - no overlap
tooMany <- T
### Select a few genes to annotate
if (tooMany) {
sigGenes_v <- paste0("Gene", c(5,20,26,42,47,16,28))
newMain_v <- "Too Many Labels"
} else {
sigGenes_v <- paste0("Gene", c(5,20,26,42))
newMain_v <- "OK Labels"
}
### Make color list
colors_v <- brewer.pal(8, "Dark2")
colors_v <- colors_v[c(1:length(sigGenes_v), 8)]
names(colors_v) <- c(sigGenes_v, "No")
annColors_lsv <- list("Sig" = colors_v)
### Column Metadata
colMeta_df <- data.frame(Treatment = c(rep("A", 3), rep("B", 3)),
Replicate = c(rep(1:3, 2)),
stringsAsFactors = F,
row.names = colnames(data_mat))
### Row metadata
rowMeta_df <- data.frame(Sig = rep("No", 50),
stringsAsFactors = F,
row.names = rownames(data_mat))
for (gene_v in sigGenes_v) rowMeta_df[rownames(rowMeta_df) == gene_v, "Sig"] <- gene_v
### Heatmap
heat <- pheatmap(data_mat,
annotation_row = rowMeta_df,
annotation_col = colMeta_df,
annotation_colors = annColors_lsv,
cellwidth = 10,
main = "Original Heat")
### Get order of genes after clustering
genesInHeatOrder_v <- heat$tree_row$labels[heat$tree_row$order]
whichSigInHeatOrder_v <- which(genesInHeatOrder_v %in% sigGenes_v)
whichSigInHeatOrderLabels_v <- genesInHeatOrder_v[whichSigInHeatOrder_v]
sigY <- 1 - (0.02 * whichSigInHeatOrder_v)
### Change title
whichMainGrob_v <- which(heat$gtable$layout$name == "main")
heat$gtable$grobs[[whichMainGrob_v]] <- textGrob(label = newMain_v,
gp = gpar(fontsize = 16))
### Remove rows
whichRowGrob_v <- which(heat$gtable$layout$name == "row_names")
heat$gtable$grobs[[whichRowGrob_v]] <- textGrob(label = whichSigInHeatOrderLabels_v,
y = sigY,
vjust = 1)
grid.newpage()
grid.draw(heat)
Here are a few outputs:
original heatmap:
ok labels:
ok labels, with flags:
too many labels
too many labels, with flags
The "with flags" outputs are the desired final results.
I just saved these as images from the Rstudio plot viewer. I recognize that I could save them as pdfs and provide a larger file size to get rid of the label overlap, but then the individual cells would be larger than I want.
Based on your code, you seem fairly comfortable with gtables & grobs. A (relatively) straightforward way to achieve the look you want is to zoom in on the row label grob, & make some changes there:
replace unwanted labels with "";
evenly spread out labels within the available space;
add line segments joining the old and new label positions.
I wrote a wrapper function for this, which works as follows:
# heat refers to the original heatmap produced from the pheatmap() function
# kept.labels should be a vector of labels you wish to show
# repel.degree is a number in the range [0, 1], controlling how much the
# labels are spread out from one another
add.flag(heat,
kept.labels = sigGenes_v,
repel.degree = 0)
add.flag(heat,
kept.labels = sigGenes_v,
repel.degree = 0.5)
add.flag(heat,
kept.labels = sigGenes_v,
repel.degree = 1)
Function (explanations in annotations):
add.flag <- function(pheatmap,
kept.labels,
repel.degree) {
# repel.degree = number within [0, 1], which controls how much
# space to allocate for repelling labels.
## repel.degree = 0: spread out labels over existing range of kept labels
## repel.degree = 1: spread out labels over the full y-axis
heatmap <- pheatmap$gtable
new.label <- heatmap$grobs[[which(heatmap$layout$name == "row_names")]]
# keep only labels in kept.labels, replace the rest with ""
new.label$label <- ifelse(new.label$label %in% kept.labels,
new.label$label, "")
# calculate evenly spaced out y-axis positions
repelled.y <- function(d, d.select, k = repel.degree){
# d = vector of distances for labels
# d.select = vector of T/F for which labels are significant
# recursive function to get current label positions
# (note the unit is "npc" for all components of each distance)
strip.npc <- function(dd){
if(!"unit.arithmetic" %in% class(dd)) {
return(as.numeric(dd))
}
d1 <- strip.npc(dd$arg1)
d2 <- strip.npc(dd$arg2)
fn <- dd$fname
return(lazyeval::lazy_eval(paste(d1, fn, d2)))
}
full.range <- sapply(seq_along(d), function(i) strip.npc(d[i]))
selected.range <- sapply(seq_along(d[d.select]), function(i) strip.npc(d[d.select][i]))
return(unit(seq(from = max(selected.range) + k*(max(full.range) - max(selected.range)),
to = min(selected.range) - k*(min(selected.range) - min(full.range)),
length.out = sum(d.select)),
"npc"))
}
new.y.positions <- repelled.y(new.label$y,
d.select = new.label$label != "")
new.flag <- segmentsGrob(x0 = new.label$x,
x1 = new.label$x + unit(0.15, "npc"),
y0 = new.label$y[new.label$label != ""],
y1 = new.y.positions)
# shift position for selected labels
new.label$x <- new.label$x + unit(0.2, "npc")
new.label$y[new.label$label != ""] <- new.y.positions
# add flag to heatmap
heatmap <- gtable::gtable_add_grob(x = heatmap,
grobs = new.flag,
t = 4,
l = 4
)
# replace label positions in heatmap
heatmap$grobs[[which(heatmap$layout$name == "row_names")]] <- new.label
# plot result
grid.newpage()
grid.draw(heatmap)
# return a copy of the heatmap invisibly
invisible(heatmap)
}

Saving multiply pdf plots r

I have made a loop for making multiply plots, however i have no way of saving them, my code looks like this:
#----------------------------------------------------------------------------------------#
# RING data: Mikkel
#----------------------------------------------------------------------------------------#
# Set working directory
setwd()
#### Read data & Converting factors ####
dat <- read.table("Complete RING.txt", header =TRUE)
str(dat)
dat$Vial <- as.factor(dat$Vial)
dat$Line <- as.factor(dat$Line)
dat$Fly <- as.factor(dat$Fly)
dat$Temp <- as.factor(dat$Temp)
str(dat)
datSUM <- summaryBy(X0.5_sec+X1_sec+X1.5_sec+X2_sec+X2.5_sec+X3_sec~Vial_nr+Concentration+Sex+Line+Vial+Temp,data=dat, FUN=sum)
fl<-levels(datSUM$Line)
colors = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3")
meltet <- melt(datSUM, id=c("Concentration","Sex","Line","Vial", "Temp", "Vial_nr"))
levels(meltet$variable) <- c('0,5 sec', '1 sec', '1,5 sec', '2 sec', '2,5 sec', '3 sec')
meltet20 <- subset(meltet, Line=="20")
meltet20$variable <- as.factor(meltet20$variable)
AllConcentrations <- levels(meltet20$Concentration)
for (i in AllConcentrations) {
meltet.i <- meltet20[meltet20$Concentration ==i,]
quartz()
print(dotplot(value~variable|Temp, group=Sex, data = meltet.i ,xlab="Time", ylab="Total height pr vial [mm above buttom]", main=paste('Line 20 concentration ', meltet.i$Concentration[1]),
key = list(points = list(col = colors[1:2], pch = c(1, 2)),
text = list(c("Female", "Male")),
space = "top"), col = colors, pch =c(1, 2))) }
I have tried with the quartz.save function, but that just overwrites the files. Im using a mac if that makes any difference.
When I want to save multiple plots in a loop I tend to do something like...
for(i in AllConcentrations){
meltet.i <- meltet20[meltet20$Concentration ==i,]
pdf(paste("my_filename", i, ".pdf", sep = ""))
dotplot(value~variable|Temp, group=Sex, data = meltet.i ,xlab="Time", ylab="Total height pr vial [mm above buttom]", main=paste('Line 20 concentration ', meltet.i$Concentration[1]),
key = list(points = list(col = colors[1:2], pch = c(1, 2)),
text = list(c("Female", "Male")),
space = "top"), col = colors, pch =c(1, 2))
dev.off()
}
This will create a pdf file for every level in AllConcentrations and save it in your working directory. It will paste together my_filename, the number of the iteration i, and then .pdf together to make each file unique. Of course, you will want to adjust height and width in the pdf function.

box plot using column of different length

I want to do some box plots, but I have data with a different number of rows for each column.
My data looks like:
OT1 OT2 OT3 OT4 OT5 OT6
22,6130653 16,6666667 20,259481 9,7431602 0,2777778 16,0678643
21,1122919 32,2946176 11,396648 10,9458023 4,7128509 10,8938547
23,5119048 19,5360195 23,9327541 39,5634921 0,6715507 12,2591613
16,9880885 39,5365943 7,7568134 22,7453205 3,6410445 11,7610063
32,768937 25,2897351 9,6288027 4,1629535 3,7251656
40,7819933 15,6320021 5,9171598
23,7961828 14,3728125 2,1887585
I'd like to have a box plot for each column (OT1, OT2…), but with the first three and the last three grouped together.
I tried:
>mydata <- read.csv('L5.txt', header = T, sep = "\t")
>mydata_t <- t(mydata)
>boxplot(mydata_t, ylab = "OTU abundance (%)",las=2, at=c(1,2,3 5,6,7))
But it didn't work…
How can I do?
Thanks!
Combining both answers and extenting Henrik's answer, you can also group the OT's together in boxplot() as well:
dat <- read.table(text='OT1 OT2 OT3 OT4 OT5 OT6
22,6130653 16,6666667 20,259481 9,7431602 0,2777778 16,0678643
21,1122919 32,2946176 11,396648 10,9458023 4,7128509 10,8938547
23,5119048 19,5360195 23,9327541 39,5634921 0,6715507 12,2591613
16,9880885 39,5365943 7,7568134 22,7453205 3,6410445 11,7610063
32,768937 25,2897351 9,6288027 4,1629535 3,7251656
40,7819933 15,6320021 5,9171598
23,7961828 14,3728125 2,1887585',header=TRUE,fill=TRUE)
dat <- sapply(dat,function(x)as.numeric(gsub(',','.',x)))
dat.m <- melt(dat)
dat.m <- transform(dat.m,group=ifelse(grepl('1|2|3','4|5|6'),
'group1','group2'))
as.factor(dat.m$X2)
boxplot(dat.m$value~dat.m$X2,data=dat.m,
axes = FALSE,
at = 1:6 + c(0.2, 0, -0.2),
col = rainbow(6))
axis(side = 1, at = c(2, 5), labels = c("Group_1", "Group_2"))
axis(side = 2, at = seq(0, 40, by = 10))
legend("topright", legend = c("OT1", "OT2", "OT3", "OT4", "OT5", "OT6"), fill = rainbow(6))
abline(v = 3.5, col = "grey")
box()
Not easy to group boxplots using R basic plots, better to use ggplot2 here. Whatever the difficulty here is how to reformat your data and reshape them in the long format.
dat <- read.table(text='OT1 OT2 OT3 OT4 OT5 OT6
22,6130653 16,6666667 20,259481 9,7431602 0,2777778 16,0678643
21,1122919 32,2946176 11,396648 10,9458023 4,7128509 10,8938547
23,5119048 19,5360195 23,9327541 39,5634921 0,6715507 12,2591613
16,9880885 39,5365943 7,7568134 22,7453205 3,6410445 11,7610063
32,768937 25,2897351 9,6288027 4,1629535 3,7251656
40,7819933 15,6320021 5,9171598
23,7961828 14,3728125 2,1887585',header=TRUE,fill=TRUE)
dat = sapply(dat,function(x)as.numeric(gsub(',','.',x)))
dat.m <- melt(dat)
dat.m <- transform(dat.m,group=ifelse(grepl('1|2|3',Var2),
'group1','group2'))
ggplot(dat.m)+
geom_boxplot(aes(x=group,y=value,fill=Var2))
Or with boxplot, using #agstudy's 'dat':
df <- melt(dat)
boxplot(value ~ Var2, data = df, at = 1:6 + c(0.2, 0, -0.2))

Resources