I saw a beautiful plot and I'd like to recreate it. Here's an example showing what I've got so far:
# kernel density scatterplot
library(RColorBrewer)
library(MASS)
greyscale <- rev(brewer.pal(4, "Greys"))
x <- rnorm(20000, mean=5, sd=4.5); x <- x[x>0]
y <- x + rnorm(length(x), mean=.2, sd=.4)
z <- kde2d(x, y, n=100)
plot(x, y, pch=".", col="hotpink")
contour(z, drawlabels=FALSE, nlevels=4, col=greyscale, add=T)
abline(c(0,1), lty=1, lwd=2)
abline(lm(y~x), lty=2, lwd=2)
I'm struggling to fill the contours with colour. Is this a job for smoothScatter or another package? I suspect it might be down to my use of kde2d and, if so, can someone please explain this function or link me to a good tutorial?
Many thanks!
P.S. the final image should be greyscale
Seems like you want a filled contour rather than jus a contour. Perhaps
library(RColorBrewer)
library(MASS)
greyscale <-brewer.pal(5, "Greys")
x <- rnorm(20000, mean=5, sd=4.5); x <- x[x>0]
y <- x + rnorm(length(x), mean=.2, sd=.4)
z <- kde2d(x, y, n=100)
filled.contour(z, nlevels=4, col=greyscale, plot.axes = {
axis(1); axis(2)
#points(x, y, pch=".", col="hotpink")
abline(c(0,1), lty=1, lwd=2)
abline(lm(y~x), lty=2, lwd=2)
})
which gives
Related
Given two variables, x and y, I run a dynlm regression on the variables and would like to plot the fitted model against one of the variables and the residual on the bottom showing how the actual data line differs from the predicting line. I've seen it done before and I've done it before, but for the life of me I can't remember how to do it or find anything that explains it.
This gets me into the ballpark where I have a model and two variables, but I can't get the type of graph I want.
library(dynlm)
x <- rnorm(100)
y <- rnorm(100)
model <- dynlm(x ~ y)
plot(x, type="l", col="red")
lines(y, type="l", col="blue")
I want to generate a graph that looks like this where you see the model and the real data overlaying each other and the residual plotted as a separate graph on the bottom showing how the real data and the model deviate.
This should do the trick:
library(dynlm)
set.seed(771104)
x <- 5 + seq(1, 10, len=100) + rnorm(100)
y <- x + rnorm(100)
model <- dynlm(x ~ y)
par(oma=c(1,1,1,2))
plotModel(x, model) # works with models which accept 'predict' and 'residuals'
and this is the code for plotModel,
plotModel = function(x, model) {
ymodel1 = range(x, fitted(model), na.rm=TRUE)
ymodel2 = c(2*ymodel1[1]-ymodel1[2], ymodel1[2])
yres1 = range(residuals(model), na.rm=TRUE)
yres2 = c(yres1[1], 2*yres1[2]-yres1[1])
plot(x, type="l", col="red", lwd=2, ylim=ymodel2, axes=FALSE,
ylab="", xlab="")
axis(1)
mtext("residuals", 1, adj=0.5, line=2.5)
axis(2, at=pretty(ymodel1))
mtext("observed/modeled", 2, adj=0.75, line=2.5)
lines(fitted(model), col="green", lwd=2)
par(new=TRUE)
plot(residuals(model), col="blue", type="l", ylim=yres2, axes=FALSE,
ylab="", xlab="")
axis(4, at=pretty(yres1))
mtext("residuals", 4, adj=0.25, line=2.5)
abline(h=quantile(residuals(model), probs=c(0.1,0.9)), lty=2, col="gray")
abline(h=0)
box()
}
what you're looking for is resid(model). Try this:
library(dynlm)
x <- 10+rnorm(100)
y <- 10+rnorm(100)
model <- dynlm(x ~ y)
plot(x, type="l", col="red", ylim=c(min(c(x,y,resid(model))), max(c(x,y,resid(model)))))
lines(y, type="l", col="green")
lines(resid(model), type="l", col="blue")
I have a dataset called dataframe (a 2d table) and a best fit curve as:
scatter.smooth(dataframe, xlab="", ylab="")
What code would I need to realize and evaluate (get numerical value of) a Y value on that best fit curve at a single x value?
Try
set.seed(1)
dataframe <- data.frame(x=runif(100), y=runif(100))
scatter.smooth(dataframe, xlab="", ylab="")
res <- with(dataframe, loess.smooth(x, y, evaluation = 200))
lengths(res)
# x y
# 200 200
x <- 0.5
y <- res$y[res$x>=x][1]
points(x, y, col="blue", pch = 19, cex=2)
I borrowed this code from statmethods[dot]net. The result is a coloured area under a normal distribution.
mean=100; sd=15
lb=80; ub=120
x <- seq(-4,4,length=100)*sd + mean
hx <- dnorm(x,mean,sd)
plot(x, hx, type="n", xlab="IQ Values", ylab="Density",
main="Normal Distribution", axes=FALSE)
i <- x >= lb & x <= ub
lines(x, hx)
polygon(c(lb,x[i],ub), c(0,hx[i],0), col="red")
area <- pnorm(ub, mean, sd) - pnorm(lb, mean, sd)
result <- paste("P(",lb,"< IQ <",ub,") =",
signif(area, digits=3))
mtext(result,2)
I was wondering if it is possible to select an image as the colour of the red polygon?
Many thanks!
Working with your code, I basically suggest plotting an image (I used a random image below) using the png package (based on advice from In R, how to plot with a png as background?) and then plot your lines over that.
mean=100; sd=15
lb=80; ub=120
x <- seq(-4,4,length=100)*sd + mean
hx <- dnorm(x,mean,sd)
# load package and an image
library(png)
ima <- readPNG("Red_Hot_Sun.PNG")
# plot an empty plot with your labels, etc.
plot(1,xlim=c(min(x),max(x)), type="n", xlab="IQ Values", ylab="Density",
main="Normal Distribution", axes=FALSE)
# put in the image
lim <- par()
rasterImage(ima, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4])
# add your plot
par(new=TRUE)
plot(x, hx, xlim=c(min(x),max(x)), type="l", xlab="", ylab="", axes=FALSE)
i <- x >= lb & x <= ub
lines(x, hx)
# add a polygon to cover the background above plot
polygon(c(x,180,180,20,20), c(hx,0,1,1,0), col="white")
# add polygons to cover the areas under the plot you don't want visible
polygon(c(-20,-20,x[x<=lb],lb), c(-10,min(hx),hx[x<=lb],-10), col="white")
polygon(c(ub,x[x>=ub],200,200), c(-1,hx[x>=ub],min(hx),-1), col="white")
# add your extra text
area <- pnorm(ub, mean, sd) - pnorm(lb, mean, sd)
result <- paste("P(",lb,"< IQ <",ub,") =",
signif(area, digits=3))
mtext(result,2)
Gives you:
the type argument to xyplot() can take "s" for "steps." From help(plot):
The two step types differ in their x-y preference: Going from
(x1,y1) to (x2,y2) with x1 < x2, 'type = "s"' moves first
horizontal, then vertical, whereas 'type = "S"' moves the other
way around.
i.e. if you use type="s", the horizontal part of the step has its left end attached to the data point, while type="S" has its right end attached to the data point.
library(lattice)
set.seed(12345)
num.points <- 10
my.df <- data.frame(x=sort(sample(1:100, num.points)),
y=sample(1:40, num.points, replace=TRUE))
xyplot(y~x, data=my.df, type=c("p","s"), col="blue", main='type="s"')
xyplot(y~x, data=my.df, type=c("p","S"), col="red", main='type="S"')
How could one achieve a "step" plot, where the vertical motion happens between data points points, i.e. at x1 + (x2-x1)/2, so that the horizontal part of the step is centered on the data point?
Edited to include some example code. better late than never I suppose.
I am using excellent #nico answer to give its lattice version. Even I am ok with #Dwin because the question don't supply a reproducible example, but customizing lattice panel is sometimes challenging.
The idea is to use panel.segments which is the equivalent of segments of base graphics.
library(lattice)
xyplot(y~x,
panel =function(...){
ll <- list(...)
x <- ll$x
y <- ll$y
x.start <- x - (c(0, diff(x)/2))
x.end <- x + (c(diff(x)/2, 0))
panel.segments(x.start, y, x.end, y, col="orange", lwd=2)
panel.segments(x.end[-length(x.end)], y[1:(length(y)-1)],
x.end[-length(x.end)], y[-1], col="orange", lwd=2)
## this is optional just to compare with type s
panel.xyplot(...,type='s')
## and type S
panel.xyplot(...,type='S')
})
This is a base graphics solution, as I am not too much of an expert in lattice.
Essentially you can use segments to draw first the horizontal, then the vertical steps, passing the shifted coordinates as a vector.
Here is an example:
set.seed(12345)
# Generate some data
num.points <- 10
x <- sort(sample(1:100, num.points))
y <- sample(1:40, num.points, replace=T)
# Plot the data with style = "s" and "S"
par(mfrow=c(1,3))
plot(x, y, "s", col="red", lwd=2, las=1,
main="Style: 's'", xlim=c(0, 100))
points(x, y, pch=19, col="red", cex=0.8)
plot(x, y, "S", col="blue", lwd=2, las=1,
main="Style: 'S'", xlim=c(0, 100))
points(x, y, pch=19, col="blue", cex=0.8)
# Now plot our points
plot(x, y, pch=19, col="orange", cex=0.8, las=1,
main="Centered steps", xlim=c(0, 100))
# Calculate the starting and ending points of the
# horizontal segments, by shifting the x coordinates
# by half the difference with the next point
# Note we leave the first and last point as starting and
# ending points
x.start <- x - (c(0, diff(x)/2))
x.end <- x + (c(diff(x)/2, 0))
# Now draw the horizontal segments
segments(x.start, y, x.end, y, col="orange", lwd=2)
# and the vertical ones (no need to draw the last one)
segments(x.end[-length(x.end)], y[1:(length(y)-1)],
x.end[-length(x.end)], y[-1], col="orange", lwd=2)
Here is the result:
I have 3 data ranges using to plot in R:
x <- c(1,2,3,4,5)
y <- c(2,4,6,8,10)
z <- c(100,240,480,580,880)
How to plot a 3D graphic with those data in R (a 3d scatterplot) ?
There are many examples of this available with a bit of searching.
Some ideas:
install.packages("scatterplot3d")
library(scatterplot3d)
s3d <-scatterplot3d(x,y,z, pch=16, highlight.3d=TRUE,
type="h", main="3D Scatterplot")
Sometimes it is nice if you can rotate it:
install.packages("rgl")
library(rgl)
plot3d(x, y, z, col="red", size=3)
If you're looking for another option, you could use the plotly package for R.
library(plotly)
x <- c(1,2,3,4,5)
y <- c(2,4,6,8,10)
z <- c(100,240,480,580,880)
plot_ly(x = x, y = y, z = z, type="scatter3d", mode="markers")