How to make inactive multiple line R code in a script - r

I do have a long R script and I don't want to delete a part of this R script. Is there any way to avoid running this multiple line R code in the script or should I need to insert # at the beginning of each of these lines?
I don't want to run this whole below code but want to run the code which follow it?
alpha = 0.01
sigtabresTreatment6Genus_T1_vs_T3 = resTreatment6Genus_T1_vs_T3[which(resTreatment6Genus_T1_vs_T3$padj < alpha), ]
sigtabresTreatment6Genus_T1_vs_T3 = cbind(as(sigtabresTreatment6Genus_T1_vs_T3, "data.frame"), as(tax_table(physeq6Genus)[rownames(sigtabresTreatment6Genus_T1_vs_T3), ], "matrix"))
head(sigtabresTreatment6Genus_T1_vs_T3)
dim(sigtabresTreatment6Genus_T1_vs_T3)
library("ggplot2")
theme_set(theme_bw())
scale_fill_discrete <- function(palname = "Set1", ...) {
scale_fill_brewer(palette = palname, ...)
}
# Phylum order
x = tapply(sigtabresTreatment6Genus_T1_vs_T3$log2FoldChange, sigtabresTreatment6Genus_T1_vs_T3$Phylum, function(x) max(x))
x = sort(x, TRUE)
sigtabresTreatment6Genus_T1_vs_T3$Phylum = factor(as.character(sigtabresTreatment6Genus_T1_vs_T3$Phylum), levels=names(x))
# Genus order
x = tapply(sigtabresTreatment6Genus_T1_vs_T3$log2FoldChange, sigtabresTreatment6Genus_T1_vs_T3$Genus, function(x) max(x))
x = sort(x, TRUE)
sigtabresTreatment6Genus_T1_vs_T3$Genus = factor(as.character(sigtabresTreatment6Genus_T1_vs_T3$Genus), levels=names(x))
pdf("DESeq2-Soil-batch1-DEG-T1_vs_T3_pval.01.pdf", width = 8, height = 6);
ggplot(sigtabresTreatment6Genus_T1_vs_T3, aes(x=Genus, y=log2FoldChange, color=Phylum)) + geom_point(size=6)+theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off();
write.csv(sigtabresTreatment6Genus_T1_vs_T3,"sigResult-Treatment6Genus_T1_vs_T3")
###############
Further R code that needs to run
###########
Many thanks
Yogesh

Related

saving and naming files in R automatically based on input filename

I have generated several Utilisation Distributions (UD) with AdehabitatHR and stored them as Geotiffs. I am now using the same UDs with the Lattice package to generate some maps and saving them to a high-res tiff image with LZW compression. Problem is that I have literally hundreds of maps to make, save and name. Is there a way automatically do this once i have loaded all the necessary files from a directory? Each one of my UDs has the following structure of the filename "UD_resolution_species_area_year_season. tif" and in the final name I give to my map I would like to keep the same structure (or entire filename) but add the prefix "blablabla_" e.g. "blablabla_UD_resolution_species_area_year_season.tiff". The image also include a main name, a capital letter, which should also change.
At the moment I am using the following:
rlist = list.files(getwd(), pattern = "tif$", full.names = FALSE)
for (i in rlist) {
assign(unlist(strsplit(i, "[.]"))[1], raster(i))
}
shplist = list.files(getwd(), pattern = "shp$", full.names = FALSE)
for (i in shplist) {
assign(unlist(strsplit(i, "[.]"))[1], readOGR(i))
}
UD <- 'UD_resolution_species_area_year_season'
ext <- extent(UD) + 0.3 # set the extent for the plot
aa <-
quantile(UD,
probs = c(0.25, 0.75),
type = 8,
names = TRUE)
my.at <- c(aa[1], aa[2])
my.at <- round(my.at, 3)
maxval <- maxValue(UD)
tiff(
"C:/myworkingdirectory/maps/blablabla_UD_resolution_species_area_year_season.tiff",
res = 600,
compression = "lzw",
width = 15,
height = 15,
units = "cm"
)
levelplot(
UD,
xlab = "",
ylab = "",
xlim = c(ext[1], ext[2]),
ylim = c(ext[3], ext[4]),
margin = FALSE,
contour = FALSE,
col.regions = viridis(1000),
colorkey = list(at = seq(0, maxval)),
main = "A",
maxpixels = 2e5
) + latticeExtra::layer(sp.polygons(Land, fill = "grey50", col = NA)) + contourplot(
`UD`,
at = my.at[1],
labels = FALSE,
margin = FALSE,
lty = 2,
col = "orange",
pretty = TRUE
) + contourplot(
UD,
at = my.at[2],
labels = FALSE,
margin = FALSE,
lty = 2,
col = "red",
pretty = TRUE,
)
dev.off()
It is a common beginners mistake to use assign. Do not use it, it creates the type of trouble you are now facing. In stead, you can make lists and/or use a loop.
Also what you are asking is basic R stuff, but you are complicating the question with adding lots of irrelevant detail about setting the extent, and levelplot. It is better to learn about doing these basic things by removing the clutter and focus on a simple case first. That is also how you should write questions for this forum.
In essence you have a bunch of files you want to process. Below I show how you can loop over a vector of the names and then loop and do what you need to do in that loop.
library(raster)
rastfiles <- list.files(pattern = "tif$", full.names=TRUE)
outputfiles <- file.path("output/path", paste0("prefix_", basename(rastfiles)))
for (i in 1:length(rastfiles))
r <- raster(rastfiles[i])
png(outputfiles[i])
plot(r)
dev.off()
}
You can also first read all the files into a list
rastfiles <- list.files(pattern = "tif$", full.names=TRUE)
rlist <- lapply(rastfiles, raster)
names(rlist) <- gsub(".tif$", "", basename(rastfiles))
rastfiles <- list.files(pattern = "shp$", full.names=TRUE)
slist <- lapply(shpfiles, readOGR)
names(slist) <- gsub(".shp$", "", basename(shpfiles))
And perhaps create a vector of output filenames
outputtif <- file.path("output/dir", basename(rastfiles))
And then loop over the items in the list, or the output filenames

Skip empty panel using lattice package, R programming

I want to skip a empty panel using lattice package in R.
set.seed(1)
df1 <- data.frame("treatment" = c(rep("A",16),rep("B",16),rep("C",16)),
"disease_type" = c(rep("1",8),rep("2",8)),
"days_after_application" = rep(c(rep("10-24",4),rep("24-48",4)),6),
"severity" = rnorm(48, mean = 80, sd = 5))
df1[(df1$disease_type == "2" & df1$days_after_application == "24-48"),"severity"] <- NA
library(lattice)
figure1 <- bwplot(treatment~severity|days_after_application+disease_type,
data = df1,layout = c(2,2),
strip = strip.custom(strip.names = TRUE))
jpeg("figure1.jpeg")
print(figure1)
dev.off()
Here is what I get
My question is how I can remove/skip empty panel in the top right WITHOUT changing layout?
I have tried following code. However, it doesn't work.
figure2 <- bwplot(treatment~severity|days_after_application+disease_type,
data = df1,layout = c(2,2),
strip = strip.custom(strip.names = TRUE),
skip = c(FALSE,FALSE,FALSE,TRUE))
jpeg("figure2.jpeg")
print(figure2)
dev.off()
Here is what I got
I also tried following codes. But it is not what I want since I do want 2 levels strips.
df1[(df1$disease_type == "2" & df1$days_after_application == "24-48"),] <- NA
bwplot(treatment~severity|interaction(days_after_application,disease_type),
data = df1,layout = c(2,2),
strip = strip.custom(strip.names = TRUE))
Thank you!
Get help from a Professor in Temple University.
Here is his solution:
figure4 <- bwplot(treatment~severity|days_after_application+disease_type,
data = df1,layout = c(2,2),
strip = strip.custom(strip.names = TRUE),
skip = c(FALSE,FALSE,FALSE,TRUE),
scales=list(alternating=FALSE), ## keep x-scale on bottom
between=list(x=1, y=1)) ## space between panels
pdf("figure4%03d.pdf",onefile = FALSE) ## force two pages in file.
print(figure4)
dev.off()

Plotting function gives data must be of vector type, was 'NULL'

So I use the following functions for plotting most of the data I have to plot. I created it thanks to different chunks of code that I have found online. So far I have never encountered any issue with it.
Here is the plotting function first.
library(ggplot2)
library(reshape2)
#' Plot a given mean with error bars
#' #param resultTable The table with all the result to plot
#' #param techniques The name of the techniques in the form of a list/vector
#' #param nbTechs The number of given techniques
#' #param ymin The minimum value for y
#' #param ymax The maximum value for y
#' #param xAxisLabel The label for the x (vertical) axis
#' #param yAxisLable The label for the y (horizontal) axis
#' #return
#'
barChartTime <- function(resultTable, techniques, nbTechs = -1, ymin, ymax, xAxisLabel = "I am the X axis", yAxisLabel = "I am the Y Label"){
#tr <- t(resultTable)
if(nbTechs <= 0){
stop('Please give a positive number of Techniques, nbTechs');
}
tr <- as.data.frame(resultTable)
nbTechs <- nbTechs - 1 ; # seq will generate nb+1
#now need to calculate one number for the width of the interval
tr$CI2 <- tr$upperBound_CI - tr$mean_time
tr$CI1 <- tr$mean_time - tr$lowerBound_CI
#add a technique column
tr$technique <- factor(seq.int(0, nbTechs, 1));
breaks <- c(as.character(tr$technique));
print(tr)
g <- ggplot(tr, aes(x=technique, y=mean_time)) +
geom_bar(stat="identity",fill = I("#CCCCCC")) +
geom_errorbar(aes(ymin=mean_time-CI1, ymax=mean_time+CI2),
width=0, # Width of the error bars
size = 1.1
) +
#labs(title="Overall time per technique") +
labs(x = xAxisLabel, y = yAxisLabel) +
scale_y_continuous(limits = c(ymin,ymax)) +
scale_x_discrete(name="",breaks,techniques)+
coord_flip() +
theme(panel.background = element_rect(fill = 'white', colour = 'white'),axis.title=element_text(size = rel(1.2), colour = "black"),axis.text=element_text(size = rel(1.2), colour = "black"),panel.grid.major = element_line(colour = "#DDDDDD"),panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank())+
geom_point(size=4, colour="black") # dots
print(g)
}
Now, here is (a simplified version of the data) data that I am using (and that reproduces the error):
EucliP,AngularP,EucliR,AngularR,EucliSp,AngularSp,EucliSl,AngularSl
31.6536,30.9863,64.394,92.7838,223.478,117.555,44.7374,25.4852
12.3592,40.7639,70.2508,176.55,10.3927,145.909,143.025,126.667
14.572,8.98445,113.599,150.551,47.1545,54.3019,10.7038,47.7004
41.7957,20.9542,55.1732,67.1647,52.364,41.3655,62.7036,75.65
135.868,83.7135,14.0262,69.7183,44.987,35.9599,19.5183,66.0365
33.5359,17.2129,6.95909,47.518,224.561,91.4999,67.1279,31.4079
25.7285,33.6705,17.4725,58.45,43.1709,113.847,28.9496,20.0574
48.4742,127.588,75.0804,89.1176,31.4494,27.9548,38.4563,126.248
31.9831,80.0161,19.9592,145.891,55.2789,142.738,94.5126,136.099
17.4044,52.3866,49.9976,150.891,104.936,77.2849,232.23,35.6963
153.359,151.897,41.8876,46.3893,79.5218,75.2011,68.9786,91.8972
And here is the code that I am using:
data = read.table("*Path_to_file*.csv", header=T, sep=",")
data$EucliPLog = (data$EucliP) #Before here I used to use a log transform that I tried to remove for some testing
data$EucliRLog = (data$EucliR) #Same thing
data$EucliSpLog = (data$EucliSp) #Same thing
data$EucliSlLog = (data$EucliSl) #Same thing
a1 = t.test(data$EucliPLog)$conf.int[1]
a2 = t.test(data$EucliPLog)$conf.int[2]
b1 = t.test(data$EucliRLog)$conf.int[1]
b2 = t.test(data$EucliRLog)$conf.int[2]
c1 = t.test(data$EucliSpLog)$conf.int[1]
c2 = t.test(data$EucliSpLog)$conf.int[2]
d1 = t.test(data$EucliSlLog)$conf.int[1]
d2 = t.test(data$EucliSlLog)$conf.int[2]
analysisData = c()
analysisData$ratio = c("Sl","Sp","R","P")
analysisData$pointEstimate = c(exp(mean(data$EucliSlLog)),exp(mean(data$EucliSpLog)),exp(mean(data$EucliRLog)),exp(mean(data$EucliPLog)))
analysisData$ci.max = c(exp(d2), exp(c2),exp(b2), exp(a2))
analysisData$ci.min = c(exp(d1), exp(c1),exp(b1), exp(a1))
datatoprint <- data.frame(factor(analysisData$ratio),analysisData$pointEstimate, analysisData$ci.max, analysisData$ci.min)
colnames(datatoprint) <- c("technique", "mean_time", "lowerBound_CI", "upperBound_CI ")
barChartTime(datatoprint,analysisData$ratio ,nbTechs = 4, ymin = 0, ymax = 90, "", "Title")
So If I do use the log() that I mention in the comments of the last piece of code, everything works fine and I get my plots displayed. However, I tried removing the log and I get the famous
Error in matrix(value, n, p) :
'data' must be of a vector type, was 'NULL'
I have tried looking for null values in my data but there are none and I do not know where to look at next. Would love to get some help with that.
Thanks in advance
Edit: Here is the result of dput on datatoprint:
structure(list(technique = structure(c(3L, 4L, 2L, 1L), .Label = c("P",
"R", "Sl", "Sp"), class = "factor"), mean_time = c(1.04016257618464e+32,
1.64430609815788e+36, 7.5457775364611e+20, 3.85267453902928e+21
), lowerBound_CI = c(6.64977706609883e+50, 5.00358136618364e+57,
2.03872433045407e+30, 4.93863589006376e+35), `upperBound_CI ` = c(16270292584857.9,
540361462434140, 279286207454.44, 30055062.6409769)), .Names = c("technique",
"mean_time", "lowerBound_CI", "upperBound_CI "), row.names = c(NA,
-4L), class = "data.frame")
And the dput on analysisData:
structure(list(ratio = c("Sl", "Sp", "R", "P"), pointEstimate = c(1.04016257618464e+32,
1.64430609815788e+36, 7.5457775364611e+20, 3.85267453902928e+21
), ci.max = c(6.64977706609883e+50, 5.00358136618364e+57, 2.03872433045407e+30,
4.93863589006376e+35), ci.min = c(16270292584857.9, 540361462434140,
279286207454.44, 30055062.6409769)), .Names = c("ratio", "pointEstimate",
"ci.max", "ci.min"))
Without the log I don't have anything on display because the value are above 10^40++ whereas with the log it's below the upper limit (90).
I don' get the error you get though.

Add a label to map at each leg start

I'm plotting legs of a route to a ggmap. It works okay so far. I've been trying to add a label containing the order (n from the loop) of each leg.
I've tried +geom_text to the geom_leg() but I get the error :
Error in geom_leg(aes(x = startLon, y = startLat, xend = endLon, yend = endLat), :
non-numeric argument to binary operator
I'd appreciate any help adding a label to indicate the leg.
Data :
structure(c("53.193418", "53.1905138631287", "53.186744", "53.189836",
"53.1884117", "53.1902965", "53.1940384", "53.1934748", "53.1894004",
"53.1916771", "-2.881248", "-2.89043889005541", "-2.890165",
"-2.893896", "-2.88802", "-2.8919373", "-2.8972299", "-2.8814698",
"-2.8886692", "-2.8846099"), .Dim = c(10L, 2L))
Function :
create.map<-function(lst){
library("ggmap")
cncat<-c(paste(lst[,1],lst[,2],sep=","))
df2<-data.frame(cncat)
leg <-function(start, dest, order){
r<- route(from=start,to=dest,mode = c("walking"),structure = c("legs"))
c<- geom_leg(aes(x = startLon, y = startLat,xend = endLon, yend = endLat),
alpha = 2/4, size = 2, data = r,colour = 'blue')+
geom_text(aes(label = order), size = 3)
return (c)
}
a<-qmap('Chester, UK', zoom = 15, maptype = 'road')
for (n in 1:9){
l<-leg(as.character(df2[n,1]), as.character(df2[n+1,1]),n)
a<-a+l
}
a
}
Is this close? (Note: this calls your list of points way.points).
way.points <- as.data.frame(way.points,stringsAsFactors=FALSE)
library(ggmap)
rte.from <- apply(way.points[-nrow(way.points),],1,paste,collapse=",")
rte.to <- apply(way.points[-1,],1,paste,collapse=",")
rte <- do.call(rbind,
mapply(route, rte.from, rte.to, SIMPLIFY=FALSE,
MoreArgs=list(mode="walking",structure="legs")))
coords <- rbind(as.matrix(rte[,7:8]),as.matrix(rte[nrow(rte),9:10]))
coords <- as.data.frame(coords)
ggm <- qmap('Chester, UK', zoom = 15, maptype = 'road')
ggm +
geom_path(data=coords,aes(x=startLon,y=startLat),color="blue",size=2)+
geom_point(data=way.points,aes(x=as.numeric(V2),y=as.numeric(V1)),
size=10,color="yellow")+
geom_text(data=way.points,
aes(x=as.numeric(V2),y=as.numeric(V1), label=seq_along(V1)))
So this assembles a vector of from and to coordinates using apply(...), then uses mapply(...) to call route(...) with both vectors, returning the overall list of coordinates in a data frame rte. Because the coordinates are stored as, e.g. $startLat and $endLat, we form a coords data frame by adding the final $endLat and $endLong to rte to get the very last leg of the route. Then we use geom_path(...) to draw the path in one step. Finally we use geom_text(...) with x and y-values from the original way.points data frame, and we use geom_point(...) just to make them stand out a bit.
Here's a bare bones solution. I just added the labels to the finished ggmap object a. If you replace the line
a
with
lst2 <- data.frame(cbind(lst, leg = as.character(1:10) )
names(lst2) <- c("lat", "lon", "leg")
a <- a + geom_text(data=lst2,aes(x=lon,y=lat,label=leg),size=5, vjust = 0, hjust = -0.5)
return(a)
in your create.map function, you should get (roughly) the desired result. I might have reversed the lat and lon variables, and you probably want to tweak the size, location, etc. Hope this helps!

Wind rose with ggplot (R)?

I am looking for good R code (or package) that uses ggplot2 to create wind roses that show the frequency, magnitude and direction of winds.
I'm particularly interested in ggplot2 as building the plot that way gives me the chance to leverage the rest of the functionality in there.
Test data
Download a year of weather data from the 80-m level on the National Wind Technology's "M2" tower. This link will create a .csv file that is automatically downloaded. You need to find that file (it's called "20130101.csv"), and read it in.
# read in a data file
data.in <- read.csv(file = "A:/drive/somehwere/20130101.csv",
col.names = c("date","hr","ws.80","wd.80"),
stringsAsFactors = FALSE))
This would work with any .csv file and will overwrite the column names.
Sample data
If you don't want to download that data, here are 10 data points that we will use to demo the process:
data.in <- structure(list(date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "1/1/2013", class = "factor"), hr = 1:9, ws.80 = c(5,
7, 7, 51.9, 11, 12, 9, 11, 17), wd.80 = c(30, 30, 30, 180, 180,
180, 269, 270, 271)), .Names = c("date", "hr", "ws.80", "wd.80"
), row.names = c(NA, -9L), class = "data.frame")
For sake of argument we'll assume that we are using the data.in data frame, which has two data columns and some kind of date / time information. We'll ignore the date and time information initially.
The ggplot function
I've coded the function below. I'm interested in other people's experience or suggestions on how to improve this.
# WindRose.R
require(ggplot2)
require(RColorBrewer)
plot.windrose <- function(data,
spd,
dir,
spdres = 2,
dirres = 30,
spdmin = 2,
spdmax = 20,
spdseq = NULL,
palette = "YlGnBu",
countmax = NA,
debug = 0){
# Look to see what data was passed in to the function
if (is.numeric(spd) & is.numeric(dir)){
# assume that we've been given vectors of the speed and direction vectors
data <- data.frame(spd = spd,
dir = dir)
spd = "spd"
dir = "dir"
} else if (exists("data")){
# Assume that we've been given a data frame, and the name of the speed
# and direction columns. This is the format we want for later use.
}
# Tidy up input data ----
n.in <- NROW(data)
dnu <- (is.na(data[[spd]]) | is.na(data[[dir]]))
data[[spd]][dnu] <- NA
data[[dir]][dnu] <- NA
# figure out the wind speed bins ----
if (missing(spdseq)){
spdseq <- seq(spdmin,spdmax,spdres)
} else {
if (debug >0){
cat("Using custom speed bins \n")
}
}
# get some information about the number of bins, etc.
n.spd.seq <- length(spdseq)
n.colors.in.range <- n.spd.seq - 1
# create the color map
spd.colors <- colorRampPalette(brewer.pal(min(max(3,
n.colors.in.range),
min(9,
n.colors.in.range)),
palette))(n.colors.in.range)
if (max(data[[spd]],na.rm = TRUE) > spdmax){
spd.breaks <- c(spdseq,
max(data[[spd]],na.rm = TRUE))
spd.labels <- c(paste(c(spdseq[1:n.spd.seq-1]),
'-',
c(spdseq[2:n.spd.seq])),
paste(spdmax,
"-",
max(data[[spd]],na.rm = TRUE)))
spd.colors <- c(spd.colors, "grey50")
} else{
spd.breaks <- spdseq
spd.labels <- paste(c(spdseq[1:n.spd.seq-1]),
'-',
c(spdseq[2:n.spd.seq]))
}
data$spd.binned <- cut(x = data[[spd]],
breaks = spd.breaks,
labels = spd.labels,
ordered_result = TRUE)
# clean up the data
data. <- na.omit(data)
# figure out the wind direction bins
dir.breaks <- c(-dirres/2,
seq(dirres/2, 360-dirres/2, by = dirres),
360+dirres/2)
dir.labels <- c(paste(360-dirres/2,"-",dirres/2),
paste(seq(dirres/2, 360-3*dirres/2, by = dirres),
"-",
seq(3*dirres/2, 360-dirres/2, by = dirres)),
paste(360-dirres/2,"-",dirres/2))
# assign each wind direction to a bin
dir.binned <- cut(data[[dir]],
breaks = dir.breaks,
ordered_result = TRUE)
levels(dir.binned) <- dir.labels
data$dir.binned <- dir.binned
# Run debug if required ----
if (debug>0){
cat(dir.breaks,"\n")
cat(dir.labels,"\n")
cat(levels(dir.binned),"\n")
}
# deal with change in ordering introduced somewhere around version 2.2
if(packageVersion("ggplot2") > "2.2"){
cat("Hadley broke my code\n")
data$spd.binned = with(data, factor(spd.binned, levels = rev(levels(spd.binned))))
spd.colors = rev(spd.colors)
}
# create the plot ----
p.windrose <- ggplot(data = data,
aes(x = dir.binned,
fill = spd.binned)) +
geom_bar() +
scale_x_discrete(drop = FALSE,
labels = waiver()) +
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE) +
theme(axis.title.x = element_blank())
# adjust axes if required
if (!is.na(countmax)){
p.windrose <- p.windrose +
ylim(c(0,countmax))
}
# print the plot
print(p.windrose)
# return the handle to the wind rose
return(p.windrose)
}
Proof of Concept and Logic
We'll now check that the code does what we expect. For this, we'll use the simple set of demo data.
# try the default settings
p0 <- plot.windrose(spd = data.in$ws.80,
dir = data.in$wd.80)
This gives us this plot:
So: we've correctly binned the data by direction and wind speed, and have coded up our out-of-range data as expected. Looks good!
Using this function
Now we load the real data. We can load this from the URL:
data.in <- read.csv(file = "http://midcdmz.nrel.gov/apps/plot.pl?site=NWTC&start=20010824&edy=26&emo=3&eyr=2062&year=2013&month=1&day=1&endyear=2013&endmonth=12&endday=31&time=0&inst=21&inst=39&type=data&wrlevel=2&preset=0&first=3&math=0&second=-1&value=0.0&user=0&axis=1",
col.names = c("date","hr","ws.80","wd.80"))
or from file:
data.in <- read.csv(file = "A:/blah/20130101.csv",
col.names = c("date","hr","ws.80","wd.80"))
The quick way
The simple way to use this with the M2 data is to just pass in separate vectors for spd and dir (speed and direction):
# try the default settings
p1 <- plot.windrose(spd = data.in$ws.80,
dir = data.in$wd.80)
Which gives us this plot:
And if we want custom bins, we can add those as arguments:
p2 <- plot.windrose(spd = data.in$ws.80,
dir = data.in$wd.80,
spdseq = c(0,3,6,12,20))
Using a data frame and the names of columns
To make the plots more compatible with ggplot(), you can also pass in a data frame and the name of the speed and direction variables:
p.wr2 <- plot.windrose(data = data.in,
spd = "ws.80",
dir = "wd.80")
Faceting by another variable
We can also plot the data by month or year using ggplot's faceting capability. Let's start by getting the time stamp from the date and hour information in data.in, and converting to month and year:
# first create a true POSIXCT timestamp from the date and hour columns
data.in$timestamp <- as.POSIXct(paste0(data.in$date, " ", data.in$hr),
tz = "GMT",
format = "%m/%d/%Y %H:%M")
# Convert the time stamp to years and months
data.in$Year <- as.numeric(format(data.in$timestamp, "%Y"))
data.in$month <- factor(format(data.in$timestamp, "%B"),
levels = month.name)
Then you can apply faceting to show how the wind rose varies by month:
# recreate p.wr2, so that includes the new data
p.wr2 <- plot.windrose(data = data.in,
spd = "ws.80",
dir = "wd.80")
# now generate the faceting
p.wr3 <- p.wr2 + facet_wrap(~month,
ncol = 3)
# and remove labels for clarity
p.wr3 <- p.wr3 + theme(axis.text.x = element_blank(),
axis.title.x = element_blank())
Comments
Some things to note about the function and how it can be used:
The inputs are:
vectors of speed (spd) and direction (dir) or the name of the data frame and the names of the columns that contain the speed and direction data.
optional values of the bin size for wind speed (spdres) and direction (dirres).
palette is the name of a colorbrewer sequential palette,
countmax sets the range of the wind rose.
debug is a switch (0,1,2) to enable different levels of debugging.
I wanted to be able to set the maximum speed (spdmax) and the count (countmax) for the plots so that I can compare windroses from different data sets
If there are wind speeds that exceed (spdmax), those are added as a grey region (see the figure). I should probably code something like spdmin as well, and color-code regions where the wind speeds are less than that.
Following a request, I implemented a method to use custom wind speed bins. They can be added using the spdseq = c(1,3,5,12) argument.
You can remove the degree bin labels using the usual ggplot commands to clear the x axis: p.wr3 + theme(axis.text.x = element_blank(),axis.title.x = element_blank()).
At some point recently ggplot2 changed the ordering of bins, so that the plots didn't work. I think this was version 2.2. But, if your plots look a bit weird, change the code so that test for "2.2" is maybe "2.1", or "2.0".
Here is my version of the code. I added labels for directions (N, NNE, NE, ENE, E....) and made the y label to show frequency in percent instead of counts.
Click here to see figure of wind Rose with directions and frequency (%)
# WindRose.R
require(ggplot2)
require(RColorBrewer)
require(scales)
plot.windrose <- function(data,
spd,
dir,
spdres = 2,
dirres = 22.5,
spdmin = 2,
spdmax = 20,
spdseq = NULL,
palette = "YlGnBu",
countmax = NA,
debug = 0){
# Look to see what data was passed in to the function
if (is.numeric(spd) & is.numeric(dir)){
# assume that we've been given vectors of the speed and direction vectors
data <- data.frame(spd = spd,
dir = dir)
spd = "spd"
dir = "dir"
} else if (exists("data")){
# Assume that we've been given a data frame, and the name of the speed
# and direction columns. This is the format we want for later use.
}
# Tidy up input data ----
n.in <- NROW(data)
dnu <- (is.na(data[[spd]]) | is.na(data[[dir]]))
data[[spd]][dnu] <- NA
data[[dir]][dnu] <- NA
# figure out the wind speed bins ----
if (missing(spdseq)){
spdseq <- seq(spdmin,spdmax,spdres)
} else {
if (debug >0){
cat("Using custom speed bins \n")
}
}
# get some information about the number of bins, etc.
n.spd.seq <- length(spdseq)
n.colors.in.range <- n.spd.seq - 1
# create the color map
spd.colors <- colorRampPalette(brewer.pal(min(max(3,
n.colors.in.range),
min(9,
n.colors.in.range)),
palette))(n.colors.in.range)
if (max(data[[spd]],na.rm = TRUE) > spdmax){
spd.breaks <- c(spdseq,
max(data[[spd]],na.rm = TRUE))
spd.labels <- c(paste(c(spdseq[1:n.spd.seq-1]),
'-',
c(spdseq[2:n.spd.seq])),
paste(spdmax,
"-",
max(data[[spd]],na.rm = TRUE)))
spd.colors <- c(spd.colors, "grey50")
} else{
spd.breaks <- spdseq
spd.labels <- paste(c(spdseq[1:n.spd.seq-1]),
'-',
c(spdseq[2:n.spd.seq]))
}
data$spd.binned <- cut(x = data[[spd]],
breaks = spd.breaks,
labels = spd.labels,
ordered_result = TRUE)
# figure out the wind direction bins
dir.breaks <- c(-dirres/2,
seq(dirres/2, 360-dirres/2, by = dirres),
360+dirres/2)
dir.labels <- c(paste(360-dirres/2,"-",dirres/2),
paste(seq(dirres/2, 360-3*dirres/2, by = dirres),
"-",
seq(3*dirres/2, 360-dirres/2, by = dirres)),
paste(360-dirres/2,"-",dirres/2))
# assign each wind direction to a bin
dir.binned <- cut(data[[dir]],
breaks = dir.breaks,
ordered_result = TRUE)
levels(dir.binned) <- dir.labels
data$dir.binned <- dir.binned
# Run debug if required ----
if (debug>0){
cat(dir.breaks,"\n")
cat(dir.labels,"\n")
cat(levels(dir.binned),"\n")
}
# create the plot ----
p.windrose <- ggplot(data = data,
aes(x = dir.binned,
fill = spd.binned
,y = (..count..)/sum(..count..)
))+
geom_bar() +
scale_x_discrete(drop = FALSE,
labels = c("N","NNE","NE","ENE", "E",
"ESE", "SE","SSE",
"S","SSW", "SW","WSW", "W",
"WNW","NW","NNW")) +
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE) +
theme(axis.title.x = element_blank()) +
scale_y_continuous(labels = percent) +
ylab("Frequencia")
# adjust axes if required
if (!is.na(countmax)){
p.windrose <- p.windrose +
ylim(c(0,countmax))
}
# print the plot
print(p.windrose)
# return the handle to the wind rose
return(p.windrose)
}
Have you ever tried windRose function from Openair package? It's very easy and you can set intervals, statistics and etc.
windRose(mydata, ws = "ws", wd = "wd", ws2 = NA, wd2 = NA,
ws.int = 2, angle = 30, type = "default", bias.corr = TRUE, cols
= "default", grid.line = NULL, width = 1, seg = NULL, auto.text
= TRUE, breaks = 4, offset = 10, normalise = FALSE, max.freq =
NULL, paddle = TRUE, key.header = NULL, key.footer = "(m/s)",
key.position = "bottom", key = TRUE, dig.lab = 5, statistic =
"prop.count", pollutant = NULL, annotate = TRUE, angle.scale =
315, border = NA, ...)
pollutionRose(mydata, pollutant = "nox", key.footer = pollutant,
key.position = "right", key = TRUE, breaks = 6, paddle = FALSE,
seg = 0.9, normalise = FALSE, ...)

Resources