Boxplot graph getting skew - r

I'm really new in R statistics and i just bumped into a problem when it comes to creating a box plot. I have used the following script that i have created:
# Reset R's braingetwd
rm(list=ls())
# Tells R where to look
setwd("/Users/Axel/Desktop/Kandidatarbete/Data")
# Confirms R is looking at the right place
getwd()
# Read data
read.table("migration_test_graph.txt")
# Assign a name to the data
migrationtest5<- read.table("migration_test_graph.txt", ,col.names=c('treatment','migration'), header=TRUE, sep="\t", na.strings="NA", dec=",", strip.white=TRUE)
mzmz=migrationtest5[which(migrationtest5$treatment == "MzMz"),]
mzct=migrationtest5[which(migrationtest5$treatment == "MzCt"),]
ctct=migrationtest5[which(migrationtest5$treatment == "CtCt"),]
ctmz=migrationtest5[which(migrationtest5$treatment == "CtMz"),]
#plot data (migration in function of index)
boxplot(migration~treatment, data=migrationtest5, boxwex=0.5, varwidth=TRUE, staplewex=FALSE, outline=TRUE, outpch=20, cex= 0.3, range=1, title=FALSE, whisklty=19, col=c("darkolivegreen","darkolivegreen","darkolivegreen3", "darkolivegreen3"), xlab="", ylab="Migration index", las=1, ylim=c(0, 1), cex.lab=1, cex.axis=0.75, axes=FALSE, border=NA, medcol="white", whiskcol="black", outcol="black", whisklwd=2, medlwd=4)
axis(1, at=seq(0, 5, by=1), cex.axis=1, labels=c("", "CtCt","MzCt", "CtMz", "MzMz",""), lwd=2)
axis(2, at=seq(0,1, by=0.2), las=2, cex.axis=1, lwd=2)
The problem is that the whole graph looks skew and the data are located on the wrong places. Anyone got a clue what might be the problem? The big box plots are supposed to be located above the CtMz and MzMz and the small ones above the MzCt and CtCt.
Thanks in advance
Axel

Related

How to put statistical information on the output diagram with hist()?

I made the following histogram. I want to display a stats box with a border here, like the image on this sites image, and write various information there. In addition, it is assumed that you will also write legends for multiple histograms.
As a result of research, I found that ROOT has a similar function, but I would like to realize this with R. How can I do this with R?
The xlsx file I used is large, so I uploaded it here.
library(readxl)
file = read_excel("./data.xlsx")
summary(file)
data = file[["foo[%]"]]
par(las=1, family="Century gothic", xaxs="i", yaxs="i", cex.main=3, cex.lab=1.1, cex.axis=1.1, font.lab=2, font.axis=2)
h = hist(data, yaxt="n", tck=0.03, breaks=seq(18,18.35,0.01), main=NA, xlab="xaxis", ylab="freq", border=NA, col="#00000000", ylim=c(0,100))
axis(2, tck=0.03, at=c(0,10,20,30,40,50,60,70,80,90,100))
grid()
lines(rep(h$breaks, each=2)[-c(1,2*length(h$breaks))], rep(h$counts, each=2), lwd=3)
box()
#jay.sf's suggestion is a little tricky to implement. Here's an example that comes close, but it's not perfect: the spacing is wrong. Maybe someone can improve it?
statname <- expression("Entries", "Mean", "Std Dev", chi^2/"ndf", "Prob", "Constant", "Slope")
statvalue <- expression(5000, 19.59, 18.61, 131.1/115, 0.1447, 5.54 %+-% 0.04,
-0.0514 %+-% 0.0011)
plot(1)
legend("topright", title = "hB",
legend = c(statname, statvalue),
ncol = 2,
x.intersp = 0)
Created on 2021-02-21 by the reprex package (v0.3.0)
It uses expression() for the entries because you have math in them; you might be able to do it reasonably well with special characters instead. I couldn't find a way to set the spacing of the two columns differently: your example had the names flush left and the values flush right. Nor could I set the column widths separately. Probably if you really want to fine tune this, you'll have to write your own function based on the legend function.

Turn off axes in r

I've generated a plot of an RDA using R. When I add my biplot on to the RDA, it adds new axes that I would like to turn off. Here is some example data:
Site<-c(1,2,3,4,5,6)
inv1<-c(34,67,78,45,677,23)
inv2<-c(45,5,55,56,7,8)
inv3<-c(6,7,4,12,7,8)
depth<-c(3,1,6,7,10,57)
exampledata_exp<-data.frame(Site,depth)
exampledata_invs<-data.frame(inv1,inv2,inv3)
dpRDA.out<-capscale(exampledata_invs~depth+Site, data=exampledata_exp, distance="bray")
summary(dpRDA.out)
plot(dpRDA.out, display=c("sites"), type="n", scaling=1, cex=1, family="serif")#this is the way I want the plot borders to look
points(dpRDA.out, display = c("sites"))
points(dpRDA.out, display="bp", col="black", scaling=1, family="serif")
The last bit of code adds on a new axes. I have tried axes=FALSE, xaxt="n", axes(side=3, lwd=0). Any suggestions greatly appreciated.
libraries currently loaded:
library(vegan)
library(reshape)
library(ggplot2)
library(plyr)
library(MASS)
library(tables)
library(matrixStats)
You can switch off the new axes by setting the axis.bp parameter to FALSE in the last line.
Try:
points(dpRDA.out, display="bp", col="black", scaling=1, family="serif", axis.bp = FALSE)

Plotting several variables on the same scale in R

I've tried over and over to solve this issue but I can't get it down. I have estimated a Beta-t-EGARCH model and a GARCH-t model in R and now I need to plot the results over the same plot. The final result is horrible, since the variables don't share the same scale on the y axis. I'm new to R, so please don't blame me :).
Here's the code:
library(quantmod)
library(betategarch)
library(fGarch)
library(ggplot2)
getSymbols("GOOG",src="yahoo")
google_ret <- abs(periodReturn(GOOG, period="daily", subset=NULL, type="log"))-mean(abs(periodReturn(GOOG, period="daily", subset=NULL, type="log")))
googcomp <- tegarch(google_ret, asym=FALSE, skew=FALSE)
goog1stdev <- fitted(googcomp)
#now we try to fit a standard GARCH-t model
googgarch <- garchFit(data=google_ret, cond.dist="sstd")
googgarch2 <- garchFit(data=google_ret, cond.dist="sstd", include.mean = FALSE, include.delta = FALSE, include.skew = FALSE, include.shape = FALSE, leverage = FALSE, trace = TRUE)
volatility <- volatility(googgarch2, type = "sigma")
plot(google_ret)
par(new=TRUE)
plot(googgarch2, which=2)
par(new=TRUE)
plot(goog1stdev, col="red")
The final result is a plot completely out of scale on the y axis, with variables that have lower values plotted above higher ones. Thanks a lot to anybody that wants to help me!
The recommended approach is to plot them as different plots stacked on top of each other:
layout(matrix(1:3,3))
plot(google_ret)
plot(googgarch2, which=2)
plot(goog1stdev, col="red")
You can get rid of the whitespace with calls to par("mar") to adjust margin sizes:
opar=par(mar=par("mar") -c(1,0,3,0)) # opar will then let your restore previous values
..... plotting efforts
par(opar)
I don't know your domain very much but if you cna use shifted y-ordinates then this produces a somewhat cleaned up version with overlayed plots:
png()
plot(google_ret, ylim=c(0,1), ylab="ylab="Google Returns(black); GGarch x10 +0.5 (blue); STD + 0.3(red)" )
par(new=TRUE)
plot(googgarch2#data +.5, type="l", col="blue",axes=FALSE, ylab="", main="",ylim=c(0, 1)) ;abline(h=.5, col="blue")
par(new=TRUE);
plot( 10*coredata(goog1stdev) + .3, col="red", type="l", axes=FALSE, main="",ylim=c(0,1), ylab=""); abline(h=.3, col="red")
dev.off()

(R) Axis widths in gbm.plot

Hoping for some pointers or some experiences insight as i'm literally losing my mind over this, been trying for 2 full days to set up the right values to have a function spit out clean simple line plots from the gbm.plot function (packages dismo & gbm).
Here's where I start. bty=n in par to turn off the box & leave me with only left & bottom axes. Gbm.plot typically spits out one plot per explanatory variable, so usually 6 plots etc, but I'm tweaking it to do one per variable & looping it. I've removed the loop & lots of other code so it's easy to see what's going on.
png(filename = "whatever.png",width=4*480, height=4*480, units="px", pointsize=80, bg="white", res = NA, family="", type="cairo-png")
par(mar=c(2.6,2,0.4,0.5), fig=c(0,1,0.1,1), las=1, bty="n", mgp=c(1.6,0.5,0))
gbm.plot(my_gbm_model,
n.plots=1,
plot.layout = c(1,1),
y.label = "",
write.title=F,
variable.no = 1, #this is part of the multiple plots thing, calls the explanatory variable
lwd=8, #this controls the width of the main result line ONLY
rug=F)
dev.off()
So this is what the starting condition looks like. Aim: make the axes & ticks thicker. That's it.
Putting "lwd=20" in par does nothing.
Adding axes=F into gbm.plot() turns the axes and their numbers off. So I conclude that the control of these axes is handled by gbm.plot, not par. Here's where it get's frustrating and crap. Accepted wisdom from searches says that lwd should control this but it only controls the wiggly centre line as per my note above. So maybe I could add axis(side=1, lwd=8) into gbm.plot() ?
It runs but inexplicably adds a smoother! (which is very thin & hard to see on the web but it's there, I promise). It adds these warnings:
In if (smooth & is.vector(predictors[[j]])) { ... :
the condition has length > 1 and only the first element will be used
Fine, R's going to be a dick for seemingly no reason, I'll keep plugging the leaks as they come up. New code with axis as before and now smoother turned off:
png(filename = "whatever.png",width=4*480, height=4*480, units="px", pointsize=80, bg="white", res = NA, family="", type="cairo-png")
par(mar=c(2.6,2,0.4,0.5), fig=c(0,1,0.1,1), las=1, bty="n", mgp=c(1.6,0.5,0))
gbm.plot(my_gbm_model,
n.plots=1,
plot.layout = c(1,1),
y.label = "",
write.title=F,
variable.no = 1,
lwd=8,
rug=F,
smooth=F,
axis(side=1,lwd=8))
dev.off()
Gives error:
Error in axis(side = 1, lwd = 8) : plot.new has not been called yet
So it's CLEARLY drawing axes within plot since I can't affect the axes from par and I can turn them off in plot. I can do what I want and make one axis bold, but that results in a smoother and warnings. I can turn the smoother off, but then it fails because it says plot.new hadn't been called. And this doesn't even account for the other axis I have to deal with, which also causes the plot.new failure if I call 2 axis sequentially and allow the smoother.
Am I the butt of a big joke here, or am I missing something obvious? It took me long enough to work out that par is supposed to be before all plots unless you're outputting them with png etc in which case it has to be between png & plot - unbelievably this info isn't in ?par. I know I'm going off topic by ranting, sorry, but yeah, 2 full days. Has this been everyone's experience of plotting in R?
I'm going to open the vodka in the freezer. I appreciate I've not put the full reproducible code here, apologies, I can do if absolutely necessary, but it's such a huge timesuck to get to reproducible stage and I'm hoping someone can see a basic logical/coding failure screaming out at them from what I've given.
Thanks guys.
EDIT: reproducibility
core data csv: https://drive.google.com/file/d/0B6LsdZetdypkWnBJVDJ5U3l4UFU
(I've tried to make these data reproducible before and I can't work out how to do so)
samples<-read.csv("data.csv", header = TRUE, row.names=NULL)
my_gbm_model<-gbm.step(data=samples, gbm.x=1:6, gbm.y=7, family = "bernoulli", tree.complexity = 2, learning.rate = 0.01, bag.fraction = 0.5))
Here's what will widen your axis ticks:
..... , lwd.ticks=4 , ...
I predict on the basis of no testing because I keep getting errors with what limited code you have provided) that it will get handled correctly in either gbm.plot or in a subsequent axis call. There will need to be a subsequent axis call, two of them in fact (because as you noted 'lwd' gets passed around indiscriminately):
png(filename = "whatever.png",width=4*480, height=4*480, units="px", pointsize=80, bg="white", res = NA, family="", type="cairo-png")
par(mar=c(2.6,2,0.4,0.5), fig=c(0,1,0.1,1), las=1, bty="n", mgp=c(1.6,0.5,0))
gbm.plot(my_gbm_model,
n.plots=1,
plot.layout = c(1,1),
y.label = "",
write.title=F,
variable.no = 1,
lwd=8,
rug=F,
smooth=F, axes="F",
axis(side=1,lwd=8))
axis(1, lwd.ticks=4, lwd=4)
# the only way to prevent `lwd` from also affecting plot line
axis(2, lwd.ticks=4, lwd=4)
dev.off()
This is what I see with a simple example:
png(); Speed <- cars$speed
Distance <- cars$dist
plot(Speed, Distance,
panel.first = lines(stats::lowess(Speed, Distance), lty = "dashed"),
pch = 0, cex = 1.2, col = "blue", axes=FALSE)
axis(1, lwd.ticks=4, lwd=4)
axis(2, lwd.ticks=4, lwd=4)
dev.off()

plot several linegraphs in one image using R

I am an absolute beginner in R. so this is probably a stupid question.
I have a table like this (csv format):
,1A+,2A+,3A-,3A+,5A-,5A+,6A-,6A+,7A-,7A+
6,4.530309305,5.520356001,3.437626731,5.146758132,,4.355022819,,4.191337618,,4.076583859
10,8.697814022,9.765817956,,9.636004092,3.725756716,8.600484774,3.457423715,8.358842335,2.246622784,7.244668991
12,,,8.176341701,,,,,,,
17,,,,,6.24785396,,5.077069513,,3.137524578
I want to create a line graph in R plotting all the different Y values (1A+, 2A+, etc) vs the Y values (6,10,12,17).
I am doing:
new_curves <- read.csv("new_curves_R.csv", as.is = TRUE)
g_range <- range(0,new_curves$X)
axis(2, las=1, at=4*0:g_range[2])
plot(new_curves$X1A.,new_curves$X,type="o", col="blue")
legend(1, g_range[2], c("new_curves$X1A."), cex=0.8, col=c("blue"));
title(xlab="Days", col.lab=rgb(0,0.5,0))
title(ylab="Total", col.lab=rgb(0,0.5,0))
However, this (obviously) only plots the first datapoint. (the legend is not working for some reason either). I am guessing I need some sort of for loop to add each Y value to the graph recursively. Likewise, a loop would be needed to make the legend.
thanks
dat <- read.table(text=", 1A+,2A+,3A-,3A+,5A-,5A+,6A-,6A+,7A-,7A+
6,4.530309305,5.520356001,3.437626731,5.146758132,,4.355022819,,4.191337618,,4.076583859
10,8.697814022,9.765817956,,9.636004092,3.725756716,8.600484774,3.457423715,8.358842335,2.246622784,7.244668991
12,,,8.176341701,,,,,,,
17,,,,,6.24785396,,5.077069513,,3.137524578", header=TRUE, sep=",", fill=TRUE)
matplot(dat[1], dat[-1])

Resources