Related
I'm having multiple data frames where the first column (in the end filled with NA's) is the wavenumber and the other columns are my variables of the specific wavenumber for multiple observations.
Is there a possibility to plot the columns in a way that my first column holds the variables for the x-axis and the other are plotted into one big plot with their respective y-values?
I already tried "matplot" (resulting in "numbers" instead of points),
matplot(df[,1],df[,3:5],xlab = "Wavelength [nm]", ylab = "Absorbance")
different sets of "xyplot" (no possibility to give more than one y-value), but none seem to work (on my level of knowledge on R).
The final result should look like this:
Thanks for any help!
You could always make your own function to do this ;I make such functions on a regular basis when nothing really fits my needs.
I put this together rather quickly but you can adapt it to your needs.
# generate data
set.seed(6)
n <- 50
dat <- data.frame(x1=seq(1,100, length.out = n),
x2=seq(1,20, length.out = n)+rnorm(n),
x3=seq(1,20, length.out = n)+rnorm(n, mean = 3),
x4=seq(1,20, length.out = n)+rnorm(n, mean = 5))
# make some NAs at the end
dat[45:n,2] <- NA
dat[30:n,3] <- NA
plot_multi <- function(df, x=1, y=2, cols=y,
xlim=range(df[,x], na.rm = T),
ylim=range(df[,y], na.rm = T),
main="", xlab="", ylab="", ...){
# setup plot frame
plot(NULL,
xlim=xlim,
ylim=ylim,
main=main, xlab=xlab, ylab=ylab)
# plot all your y's against your x
pb <- sapply(seq_along(y), function(i){
points(df[,c(x, y[i])], col=cols[i], ...)
})
}
plot_multi(dat, y=2:4, type='l', lwd=3, main = ":)",
xlab = "Wavelength", ylab = "Absorbance")
Results in :
EDIT
I actually found your dataset online by chance, so I'll include how to plot it as well using my code above.
file <- 'http://openmv.net/file/tablet-spectra.csv'
spectra <- read.csv(file, header = FALSE)
# remove box label
spectra <- spectra[,-1]
# add the 'wavelength' and rotate the df
# (i didn't find the actual wavelength values, but hey).
spectra <- cbind(1:ncol(spectra), t(spectra))
plot_multi(spectra, y=2:ncol(spectra), cols = rainbow(ncol(spectra)),
type='l', main=":))", ylab="Absorbance", xlab = "'Wavelength'")
You could use the pavo R package, which is made to deal with spectral data (full disclosure, I'm one of the maintainers):
library(pavo)
df <- t(read.csv("http://openmv.net/file/tablet-spectra.csv", header = FALSE))
df <- df[-1, ]
df <- apply(df, 2, as.numeric)
df <- cbind(wl = seq_len(nrow(df)),
df)
df <- as.rspec(df)
#> wavelengths found in column 1
plot(df, ylab = "Absorbance", col = rainbow(3))
Created on 2019-07-26 by the reprex package (v0.3.0)
I am looking for a general solution to create bivariate choropleth maps in R using raster files.
I have found the following code here which nearly does what I need but it is limited: it can only handle data which are between 0 and 1 on both axes. In my specific use-case one axis spans 0-1 while another spans between -1 and 1. Regardless as to my specific use-case, I think a more general function which can handle different data ranges would be useful to many people.
I have already tried updating the code within the function colmat to handle negative data but for the life of me cannot get it to work. In the interests of clarity I have avoided posting all of my failed attempts and have insread copied below the code I found at the link above in the hope that someone may be able to offer a solution.
The current code first creates a colour matrix using colmat. The colour matrix generated is then used in bivariate.map along with your two raster files containing the data. I think the ideal solution would be to create the colour matrix based on the two rasters first (so that it can correctly bin the data based on your actual data, not the current solution which is between 0 and 1).
````
library(classInt)
library(raster)
library(rgdal)
library(dismo)
library(XML)
library(maps)
library(sp)
# Creates dummy rasters
rasterx<- raster(matrix(rnorm(400),5,5))
rasterx[rasterx <=0]<-1
rastery<- raster(matrix(rnorm(400),5,5))
# This function creates a colour matrix
# At present it cannot handle negative values i.e. the matrix spans from 0 to 1 along both axes
colmat<-function(nquantiles=10, upperleft=rgb(0,150,235, maxColorValue=255), upperright=rgb(130,0,80, maxColorValue=255), bottomleft="grey", bottomright=rgb(255,230,15, maxColorValue=255), xlab="x label", ylab="y label"){
my.data<-seq(0,1,.01)
my.class<-classIntervals(my.data,n=nquantiles,style="quantile")
my.pal.1<-findColours(my.class,c(upperleft,bottomleft))
my.pal.2<-findColours(my.class,c(upperright, bottomright))
col.matrix<-matrix(nrow = 101, ncol = 101, NA)
for(i in 1:101){
my.col<-c(paste(my.pal.1[i]),paste(my.pal.2[i]))
col.matrix[102-i,]<-findColours(my.class,my.col)
}
plot(c(1,1),pch=19,col=my.pal.1, cex=0.5,xlim=c(0,1),ylim=c(0,1),frame.plot=F, xlab=xlab, ylab=ylab,cex.lab=1.3)
for(i in 1:101){
col.temp<-col.matrix[i-1,]
points(my.data,rep((i-1)/100,101),pch=15,col=col.temp, cex=1)
}
seqs<-seq(0,100,(100/nquantiles))
seqs[1]<-1
col.matrix<-col.matrix[c(seqs), c(seqs)]
}
# Creates colour matrix
col.matrix<-colmat(nquantiles=2, upperleft="blue", upperright="yellow", bottomleft="green", bottomright="red", xlab="Species Richness", ylab="Change in activity hours")
# Function to create bivariate map, given the colour ramp created previously
bivariate.map<-function(rasterx, rastery, colormatrix=col.matrix, nquantiles=10){
quanmean<-getValues(rasterx)
temp<-data.frame(quanmean, quantile=rep(NA, length(quanmean)))
brks<-with(temp, quantile(temp,na.rm=TRUE, probs = c(seq(0,1,1/nquantiles))))
r1<-within(temp, quantile <- cut(quanmean, breaks = brks, labels = 2:length(brks),include.lowest = TRUE))
quantr<-data.frame(r1[,2])
quanvar<-getValues(rastery)
temp<-data.frame(quanvar, quantile=rep(NA, length(quanvar)))
brks<-with(temp, quantile(temp,na.rm=TRUE, probs = c(seq(0,1,1/nquantiles))))
r2<-within(temp, quantile <- cut(quanvar, breaks = brks, labels = 2:length(brks),include.lowest = TRUE))
quantr2<-data.frame(r2[,2])
as.numeric.factor<-function(x) {as.numeric(levels(x))[x]}
col.matrix2<-colormatrix
cn<-unique(colormatrix)
for(i in 1:length(col.matrix2)){
ifelse(is.na(col.matrix2[i]),col.matrix2[i]<-1,col.matrix2[i]<-which(col.matrix2[i]==cn)[1])
}
cols<-numeric(length(quantr[,1]))
for(i in 1:length(quantr[,1])){
a<-as.numeric.factor(quantr[i,1])
b<-as.numeric.factor(quantr2[i,1])
cols[i]<-as.numeric(col.matrix2[b,a])}
r<-rasterx
r[1:length(r)]<-cols
return(r)
}
# Creates map
bivmap<-bivariate.map(rasterx,rastery, colormatrix=col.matrix, nquantiles=2)
# Plots a map
plot(bivmap,frame.plot=F,axes=F,box=F,add=F,legend=F,col=as.vector(col.matrix)) ````
Ideally,a more general function would take two raster files, determine the data ranges of both and then create a bivariate chorpleth map based on the number of bins/quantiles specified by the user.
Here are some ideas based on your code
Three functions
makeCM <- function(breaks=10, upperleft, upperright, lowerleft, lowerright) {
m <- matrix(ncol=breaks, nrow=breaks)
b <- breaks-1
b <- (0:b)/b
col1 <- rgb(colorRamp(c(upperleft, lowerleft))(b), max=255)
col2 <- rgb(colorRamp(c(upperright, lowerright))(b), max=255)
cm <- apply(cbind(col1, col2), 1, function(i) rgb(colorRamp(i)(b), max=255))
cm[, ncol(cm):1 ]
}
plotCM <- function(cm, xlab="", ylab="", main="") {
n <- cm
n <- matrix(1:length(cm), nrow=nrow(cm), byrow=TRUE)
r <- raster(n)
cm <- cm[, ncol(cm):1 ]
image(r, col=cm, axes=FALSE, xlab=xlab, ylab=ylab, main=main)
}
rasterCM <- function(x, y, n) {
q1 <- quantile(x, seq(0,1,1/(n)))
q2 <- quantile(y, seq(0,1,1/(n)))
r1 <- cut(x, q1, include.lowest=TRUE)
r2 <- cut(y, q2, include.lowest=TRUE)
overlay(r1, r2, fun=function(i, j) {
(j-1) * n + i
})
}
Example data
library(raster)
set.seed(42)
r <- raster(ncol=50, nrow=50, xmn=0, xmx=10, ymn=0,ymx=10, crs="+proj=utm +zone=1")
x <- init(r, "x") * runif(ncell(r), .5, 1)
y <- init(r, "y") * runif(ncell(r), .5, 1)
And now used the functions
breaks <- 5
cmat <- makeCM(breaks, "blue", "yellow", "green", "red")
xy <- rasterCM(x, y, breaks)
par(mfrow=c(2,2), mai=c(.5,.5,.5,.5), las=1)
plot(x)
plot(y)
par(mai=c(1,1,1,1))
plotCM(cmat, "var1", "var2", "legend")
par(mai=c(.5,.5,.5,.5))
image(xy, col=cmat, las=1)
I have a function that uses matplot to plot some data. Data structure is like this:
test = data.frame(x = 1:10, a = 1:10, b = 11:20)
matplot(test[,-1])
matlines(test[,1], test[,-1])
So far so good. However, if there are missing values in the data set, then there are gaps in the resulting plot, and I would like to avoid those by connecting the edges of the gaps.
test$a[3:4] = NA
test$b[7] = NA
matplot(test[,-1])
matlines(test[,1], test[,-1])
In the real situation this is inside a function, the dimension of the matrix is bigger and the number of rows, columns and the position of the non-overlapping missing values may change between different calls, so I'd like to find a solution that could handle this in a flexible way. I also need to use matlines
I was thinking maybe filling in the gaps with intrapolated data, but maybe there is a better solution.
I came across this exact situation today, but I didn't want to interpolate values - I just wanted the lines to "span the gaps", so to speak. I came up with a solution that, in my opinion, is more elegant than interpolating, so I thought I'd post it even though the question is rather old.
The problem causing the gaps is that there are NAs between consecutive values. So my solution is to 'shift' the column values so that there are no NA gaps. For example, a column consisting of c(1,2,NA,NA,5) would become c(1,2,5,NA,NA). I do this with a function called shift_vec_na() in an apply() loop. The x values also need to be adjusted, so we can make the x values into a matrix using the same principle, but using the columns of the y matrix to determine which values to shift.
Here's the code for the functions:
# x -> vector
# bool -> boolean vector; must be same length as x. The values of x where bool
# is TRUE will be 'shifted' to the front of the vector, and the back of the
# vector will be all NA (i.e. the number of NAs in the resulting vector is
# sum(!bool))
# returns the 'shifted' vector (will be the same length as x)
shift_vec_na <- function(x, bool){
n <- sum(bool)
if(n < length(x)){
x[1:n] <- x[bool]
x[(n + 1):length(x)] <- NA
}
return(x)
}
# x -> vector
# y -> matrix, where nrow(y) == length(x)
# returns a list of two elements ('x' and 'y') that contain the 'adjusted'
# values that can be used with 'matplot()'
adj_data_matplot <- function(x, y){
y2 <- apply(y, 2, function(col_i){
return(shift_vec_na(col_i, !is.na(col_i)))
})
x2 <- apply(y, 2, function(col_i){
return(shift_vec_na(x, !is.na(col_i)))
})
return(list(x = x2, y = y2))
}
Then, using the sample data:
test <- data.frame(x = 1:10, a = 1:10, b = 11:20)
test$a[3:4] <- NA
test$b[7] <- NA
lst <- adj_data_matplot(test[,1], test[,-1])
matplot(lst$x, lst$y, type = "b")
You could use the na.interpolation function from the imputeTS package:
test = data.frame(x = 1:10, a = 1:10, b = 11:20)
test$a[3:4] = NA
test$b[7] = NA
matplot(test[,-1])
matlines(test[,1], test[,-1])
library('imputeTS')
test <- na.interpolation(test, option = "linear")
matplot(test[,-1])
matlines(test[,1], test[,-1])
Had also the same issue today. In my context I was not permitted to interpolate. I am providing here a minimal, but sufficiently general working example of what I did. I hope it helps someone:
mymatplot <- function(data, main=NULL, xlab=NULL, ylab=NULL,...){
#graphical set up of the window
plot.new()
plot.window(xlim=c(1,ncol(data)), ylim=range(data, na.rm=TRUE))
mtext(text = xlab,side = 1, line = 3)
mtext(text = ylab,side = 2, line = 3)
mtext(text = main,side = 3, line = 0)
axis(1L)
axis(2L)
#plot the data
for(i in 1:nrow(data)){
nin.na <- !is.na(data[i,])
lines(x=which(nin.na), y=data[i,nin.na], col = i,...)
}
}
The core 'trick' is in x=which(nin.na). It aligns the data points of the line consistently with the indices of the x axis.
The lines
plot.new()
plot.window(xlim=c(1,ncol(data)), ylim=range(data, na.rm=TRUE))
mtext(text = xlab,side = 1, line = 3)
mtext(text = ylab,side = 2, line = 3)
mtext(text = main,side = 3, line = 0)
axis(1L)
axis(2L)`
draw the graphical part of the window.
range(data, na.rm=TRUE) adapts the plot to a proper size being able to include all data points.
mtext(...) is used to label the axes and provides the main title. The axes themselves are drawn by the axis(...) command.
The following for-loop plots the data.
The function head of mymatplot provides the ... argument for an optional passage of typical plot parameters as lty, lwt, cex etc. via . Those will be passed on to the lines.
At last word on the choice of colors - they are up to your flavor.
Using the code below, I am trying to get density plot for different distributions.
dens <- apply(df[,c(7,9,12,14,16,18)], 2, density)
plot(NA, xlim=range(sapply(dens, "[", "x")), ylim=range(sapply(dens, "[", "y")))
mapply(lines, dens, col=1:length(dens))
legend("topright", legend=names(dens), fill=1:length(dens),bty = "n",lwd=1, cex=0.7)
The maximum upper limit for all variables is 5. But I got lines exceeded the 5. What do I need to change in my code to fix the plot?
By default, density will extend the range so that the density curve approaches 0 at the extreme. Do you want to restrict the curve to the range of you data? If so, you need use from and to arguments inside density().
x_range <- range(df[,c(7,9,12,14,16,18)])
dens <- apply(df[,c(7,9,12,14,16,18)], 2, density, from = x_range[1], to = x_range[2])
Perhaps it is better to provide a reproducible example.
set.seed(0); X <- matrix(rnorm(300), 100, 3)
## range of your data
x_range <- range(X)
# [1] -2.904899 2.658658
## default handling of `density`
dens <- apply(X, 2, density)
range(sapply(dens, "[[", "x"))
# [1] -3.922346 3.696451
## customized `from` and `to`
dens <- apply(X, 2, density, from = x_range[1], to = x_range[2])
range(sapply(dens, "[[", "x"))
# [1] -2.904899 2.658658
Actually I try to plot a figure but it puts and shows all the columns(lines) on each other so it is not representative. I try to make a simulated data and show you how I plot it, and also show you what I want
I don't know how to make a data like example i show below but here what I do
set.seed(1)
M <- matrix(rnorm(20),20,5)
x <- as.matrix(sort(runif(20, 5.0, 7.5)))
df <- as.data.frame(cbind(x,M))
After making the data frame, I will plot all columns versus the first one by melting it and using ggplot
require(ggplot2)
require(reshape)
dff <- melt(df , id.vars = 'V1')
b <- ggplot(dff, aes(V1,value)) + geom_line(aes(colour = variable))
I want to have specific distance between each line (in this case we have 6) something like below. in one dimension it is V1, in another dimension it is the number of column. I don't care about the function , I just want the photo
This solution uses rgl and produces this plot:
It uses this function that accepts 3 arguments:
df : a data.frame just like your 'M' above
x : a numeric vector (or a 1-coldata.frame`) for the x-axis
cols : (optionnal) a vector of colours to repeat. If missing, black line are drawn
Here is the function:
nik_plot <- function(df, x, cols){
require(rgl)
# if a data.frame is
if (is.data.frame(x) && ncol(x)==1)
x <- as.numeric(x[, 1])
# prepare a vector of colors
if (missing(cols))
cols <- rep_len("#000000", nrow(df))
else
cols <- rep_len(cols, nrow(df))
# initialize an empty 3D plot
plot3d(NA, xlim=range(x), ylim=c(1, ncol(df)-1), zlim=range(df), xlab="Mass/Charge (M/Z)", ylab="Time", zlab="Ion Spectra", box=FALSE)
# draw lines, silently
silence_please <- sapply(1:ncol(df), function(i) lines3d(x=x, y=i, z=df[, i], col=cols[i]))
}
Note that you can remove require(rgl) from the function and library(rgl) somewhere in your script, eg at the beginning.
If you don't have rgl installed, then install.packages("rgl").
Black lines, the default, may produce some moiré effect, but a repeating color palette is worse. This may be brain-dependant. A single colour would also avoid introducing an artificial dimension (and a strong one).
An example below:
# black lines
nik_plot(M, x)
# as in the image above
nik_plot(M, x, "grey40")
# an unreadable rainbow
nik_plot(M, x, rainbow(12))
The 3D window can be navigated with the mouse.
Do you need something else?
EDIT
You can build your second plot with the function below. The range of your data is so large, and I think the whole idea behind shifting upwards every line, prevent having an y-axis with a reliable scale. Here I have normalized all signals (0 <= signal <= 1). Also the parameter gap can be use to play with this. We could disconnect the two behaviors but I think it's nice. Try different values of gap and see examples below.
df : a data.frame just like your 'M' above
x : a numeric vector (or a 1-coldata.frame`) for the x-axis
cols : (optionnal) a vector of colours to repeat. If missing, black line are drawn
gap : gap factor between individual lines
more_gap_each: every n lines, a bigger gap is produced...
more_gap_relative: ... and will be gap x more_gap_relative wide
Here is the function:
nik_plot2D <- function(df, x, cols, gap=10, more_gap_each=1, more_gap_relative=0){
if (is.data.frame(x) && ncol(x)==1)
x <- as.numeric(x[, 1])
# we normalize ( 0 <= signal <= 1)
df <- df-min(df)
df <- (df/max(df))
# we prepare a vector of colors
if (missing(cols))
cols <- rep_len("#00000055", nrow(df))
else
cols <- rep_len(cols, nrow(df))
# we prepare gap handling. there is probably more elegant
gaps <- 1
for (i in 2:ncol(df))
gaps[i] <- gaps[i - 1] + 1/gap + ifelse((i %% more_gap_each) == 0, (1/gap)*more_gap_relative, 0)
# we initialize the plot
plot(NA, xlim=range(x), ylim=c(min(df), 1+max(gaps)), xlab="Time", ylab="", axes=FALSE, mar=rep(0, 4))
axis(1)
# finally, the lines
silent <- lapply(1:ncol(df), function(i) lines(x, df[, i] + gaps[i], col=cols[i]))
}
We can use it with (default):
nik_plot2D(M, x) # gap=10
And you obtain this plot:
or:
nik_plot2D(M, x, 50)
or, with colors:
nik_plot2D(M, x, gap=20, cols=1:3)
nik_plot2D(M, x, gap=20, cols=rep(1:3, each=5))
or, still with colours and but with larger gaps:
nik_plot2D(M, x, gap=20, cols=terrain.colors(10), more_gap_each = 1, more_gap_relative = 0) # no gap by default
nik_plot2D(M, x, gap=20, cols=terrain.colors(10), more_gap_each = 10, more_gap_relative = 4) # large gaps every 10 lines
nik_plot2D(M, x, gap=20, cols=terrain.colors(10), more_gap_each = 5, more_gap_relative = 2) # small gaps every 5 lines
As other have pointed out, your data have very large peaks and it's not clear whether you want to allow some curves to overlap,
m <- read.table("~/Downloads/M.txt", head=T)
fudge <- 0.05
shifty <- function(m, fudge=1){
shifts <- fudge * max(abs(apply(m, 2, diff))) * seq(0, ncol(m)-1)
m + matrix(shifts, nrow=nrow(m), ncol=ncol(m), byrow=TRUE)
}
par(mfrow=c(1,2), mar=c(0,0,1,0))
cols <- colorRampPalette(blues9[4:9])(ncol(m))
matplot(shifty(m), t="l", lty=1, bty="n", yaxt="n", xaxt="n", ylab="", col=cols)
title("no overlap")
matplot(shifty(m, 0.05), t="l", lty=1, bty="n", yaxt="n", xaxt="n", ylab="", col=cols)
title("some overlap")
Alternatively, some outlier/peak detection scheme could be used to filter them out before calculating the shift between curves,
library(outliers)
shifty2 <- function(m, outliers = 10){
tmp <- m
for(ii in seq_len(outliers)) tmp <- rm.outlier(tmp, median = TRUE)
shifts <- max(abs(apply(tmp, 2, diff))) * seq(0, ncol(m)-1)
m + matrix(shifts, nrow=nrow(m), ncol=ncol(m), byrow=TRUE)
}
matplot(shifty2(m), t="l", lty=1, bty="n", yaxt="n", xaxt="n", ylab="", col=cols)
(there are probably good algorithms to decide which points to remove, but I don't know them)
For 3D plotting I prefer the rgl package. This should be close to your desired solution.
The color of each scan changes on every third one.
library(rgl)
M<-read.table("M.txt", sep="\t", header = TRUE, colClasses = "numeric")
x<-read.table("x.txt", sep="\t", header = TRUE)
n<-ncol(M)
M[M<1]<-1
plot3d(x='', xlim=range(x$Time), ylim=c(1, n), zlim=(range(M)), box=FALSE)
sapply(seq(1,n), function(t){lines3d(x$Time, y=t*10, z=(M[,t])/10000, col=t/3+1)})
title3d(xlab="scan", ylab="time", zlab="intensity")
title3d(main ="Extracted Spectra Subset")
axes3d()
#axis3d(edge="x")
#axis3d(edge="y")
#axis3d(edge="z")
Do the huge differences in magnitude of the data points, I needed to scale some factors to make a readable graph. The intensity of the goes from 0 to nearly 1,000,000, thus distorting the graph. Attempted to normalize by taking the ln, but plot became unreadable.