Related
I am trying to create a forestplot, using forestplotter function, am able to get a beautiful graph, but am not able to see the entire graph, the column widths in few of the columns are so big, even if the string size is less, making the width of the entire graph, so big to see, can someone help me with this and also is it possible to align the datahrame contents uniformly centre aligned......Please help me with this
The code and relevant data are
###Required packages###
library(grid)
library(forestploter)
library(rmeta)
library(gridExtra)
#Data entered#
df <- data.frame(Study=c("A","B","C","D","Summary"),
nA = c(24,187,36,26,273),
median_A = c(4.9,5.69,8.866995074,8.5,NA),
Q1A =c(3,2.86,4.495073892,2,NA),
Q3A =c(8.5,9.78,14.96305419,32,NA),
nP = c(23,193,36,26,278),
median_P = c(7.2,6.79,8.990147783,12.5,NA),
Q1P =c(3.4,3.59,4.002463054,2,NA),
Q3P =c(10.9,10.12,12.06896552,43,NA),
W = c("10.6%","80.8%","8.0%","0.70%",NA),
E=c(-2.3,-1.1,-0.123152709,-4,-1.16881587),
UL=c(1.161473203,0.156288294,3.881699516,10.02689306,-0.039791047),
LL=c(-5.761473203,-2.356288294,-4.128004935,-18.02689306,-2.297840692))
#Calculate SE for box size#
df$SE <- (df$UL-df$E)/1.96
#Column for Confidence intervals for Drug A and Placebo, with 2 significant digit#
df$IQRA <- sprintf("%.2f (%.2f to %.2f)",df$median_A,df$Q1A, df$Q3A)
df$IQRP <- sprintf("%.2f (%.2f to %.2f)",df$median_P,df$Q1P, df$Q3P)
#Column for Confidence intervals for NET EFFECT, with 2 significant digit#
df$MD <- sprintf("%.2f (%.2f to %.2f)", df$E, df$LL, df$UL)
#Create a column with space for forest plot#
df$" "<- paste(rep(" ", 16), collapse = " ")
##Forest plot theme##
#To be modified as needed#
ftn <-forest_theme(
base_size = 16,
base_family = "serif",
ci_pch = 15,
ci_col = "black",
ci_lty = 1,
ci_lwd = 1,
ci_Theight = 0.25,
legend_name = " ",
legend_position = "right",legend_value = "",
xaxis_lwd = 1,
xaxis_cex = 0.7,
refline_lwd = 1,
refline_lty = "dashed",
refline_col = "red",
summary_fill = "blue",
summary_col = "blue",
footnote_cex = 0.4,
footnote_fontface = "plain",
footnote_col = "black",
title_just = c("center"),
title_cex = 1.1,
title_fontface = "bold",
title_col = "black",
show.rownames = FALSE)
##Table in Order for Forest plot##
#First get Column names#
colnames(df)
df2 <-df[,c(1,2,15,6,16,18,17)]
#Make NA cells empty
df2[5,3] <-c(" ")
df2[5,5] <-c(" ")
##Forestplot##
plot<-forest(df2,
est = df$E,
lower = df$LL,
upper = df$UL,
sizes = (df$SE/10),
ci_column = 6,
ref_line = 0,
arrow_lab = c("Drug A Better", "Placebo Better"),
xlim = c(-7, 6),
is_summary = c(FALSE,FALSE,FALSE,FALSE,TRUE),
xlog = FALSE,
ticks_digits = 0,ticks_at = c(-6,0,6),
theme = ftn)
##Show plot
print(plot, autofit = FALSE)
I'm getting the error message when trying to deploy my shiny flexdashboard that was created with r markdown. Can anyone help? My code is below. Thank you! The data is saved here
The "submit" function seems to be the issue, I read in the random forest model I created so a data table could output once people enter in their desired input variables - but it doesn't seem to be working. The map and the accompanying data table works fine though.
library(readr)
library(tibble)
library(mlbench)
library(tidyverse)
library(leaflet)
library(sf)
library(rgdal)
library(leaflet)
library(ggplot2)
library(dplyr)
library(sp)
library(raster)
library(ggplot2)
library(rgeos)
library(ggthemes)
library(data.table)
library(geojsonR)
library(leaflet.extras)
library(flexdashboard)
library(reshape2)
library(lmtest)
library(htmltools)
library(caret)
library(randomForest)
library(ggcorrplot)
library(lsr)
library(DT)
library(plotly)
library(knitr)
library(crosstalk)
library(shiny)
library(shinydashboard)
setwd("~/SPRING 2022/Data Science for Public Policy/FinalProject/new_DataSets")
risk_map <- readRDS("risk_map.rds")
model <- readRDS("model.rds")
variables <- c("dissim_19","Total_Pop20", "pct_adultsWchild_PH", "mean_pov_idx", "mean_lbr_idx", "mean_trans_idx","recap","CDBG_perCapita")
Fair Housing Plan Risk by County
Row {data-height=400}
Map of Risk Prediction for Fair Housing Plans by County
##0 high risk, 1 medium risk, 2 low risk
#color and for na
pal <- colorFactor(
palette = c("#f03b20",
"#feb24c",
"#ffeda0"),
domain = risk_map#data$predicted_outcome,
na.color = "grey" # desired color for NA polygons on map
)
pal_noNA <- colorFactor(
palette = c("#f03b20",
"#feb24c",
"#ffeda0"),
domain = risk_map#data$predicted_outcome,
na.color = NA
)
risk_popup <- paste("<strong>Predicted Fair Housing Plan Risk Level: </strong>",
risk_map#data$predicted_outcome,"<br><strong>County Name: </strong>",
risk_map#data$county_name, "<br><strong>Total 2020 Population: </strong>",
risk_map#data$Total_Pop20, "<br><strong>CDBG Funding per Capita: </strong>",
risk_map#data$CDBG_perCapita,"<br><strong>2019 White to Non-White Dissimilarity Index: </strong>",
risk_map#data$dissim_19)
risk_map %>%
leaflet::leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(lat = 39.8283, lng = -98.5795, zoom = 4) %>%
addPolygons(stroke = TRUE,
smoothFactor = 0.2,
fillOpacity = .8,
color = ~pal(predicted_outcome),
weight = 0.5,
popup = risk_popup) %>%
# addAwesomeMarkers(data = results_points,
# lat = ~X,
# lng = ~Y,
# icon = awesome,
# popup = results_popup) %>%
# #fillColor = "black",fillOpacity = 0.5,stroke = F)
leaflet::addLegend("bottomright",
pal = pal_noNA,
values = ~predicted_outcome,
title = "Predicted Risk of Fair Housing Plan Outcome by U.S. County",
opacity = 1) %>%
leaflet::addMeasure()
Row {data-width=600}
Predictive Model Results
a <- 1:34
remove <- c (6, 18, 21:29, 31:33)
renderDT(
risk_map#data,
fillContainer = TRUE,
filter = "top",server = FALSE,
extensions = c('Buttons', 'Scroller'),
options = list(
dom = 'Blfrtip',
buttons = c('csv', 'excel', 'pdf'),
scrollY = '600px',
#scroller = TRUE,
fixedColumns = TRUE,
columnDefs = list(list(visible=FALSE, targets = a[! a %in% remove]))
# columnDefs = list(
# list(
# visible = FALSE,
# targets = c(6, 18, 21:29,31:33)
),
colnames = c(
"County Name" = "NAMELSAD",
"State" = "State",
"Dissimilarity Index 2019" = "dissim_19",
"2020 Population" = "Total_Pop20",
"% of Adults w/ Children" = "pct_adultsWchild_PH",
"Total CDBG Funding" = "total_amt_cdbg",
"Mean Poverty Index" = "mean_pov_idx",
"Mean Job Prxmity Index" = "mean_lbr_idx",
"Mean Transit Index" = "mean_trans_idx",
"More than 4 RECAPs" = "recap",
"AFH Submitter" = "AFH.Lead.Entity",
"CDBG Funding per Capita" = "CDBG_perCapita",
"Real Plan Outcome" = "real_outcome",
"Predicted Plan Outcome" = "predicted_outcome"
)
)
Generate Predicted Risk by County
Column {data-width=600}
Enter in Indicators to Predict Fair Housing Plan Outcome by County
# dissim_19 #input
# Total_Pop20 #input
# CDBG_perCapita
# pct_adultsWchild_PH #slider
# mean_pov_idx #slider
# mean_trans_idx #slider
# mean_lbr_idx #slider
# recap #yes or no multiple choice
sidebarPanel(
HTML("<h3>Enter in all input variables for your county to predict outcome of AFH<h4>"),
numericInput("dissim_19",
label = "White to Non-White Dissimilarity Index",
value = 34.58177),
numericInput("Total_Pop20",
label = "Total Population in 2020",
value = 2149031),
sliderInput("pct_adultsWchild_PH",
label = "Percent of Adults w at Least One Child in all of the County's Public Housing",
min = 0, max = 97, value = 1, step = 0.2),
numericInput("mean_pov_idx",
label = "Mean Poverty Index Score",
value = 35.00000),
numericInput("mean_trans_idx",
label = "Mean Transportation Index Score",
value = 39.000000),
numericInput("mean_lbr_idx",
label = "Mean LBR Index Score",
value = 1500000),
checkboxInput("recap", "Does the County have more than 4 census tracts designated as a RECAP? If Yes, click here", TRUE),
numericInput("CDBG_perCapita",
label = "Total CDBG Funding Amount per Capita",
value = 1500000),
actionButton("submitbutton", "Submit", class = "btn btn-primary")
)
mainPanel(
tags$label(h3('Status/Output')), # Status/Output Text Box
verbatimTextOutput('contents'),
tableOutput('tabledata') # Prediction results table
)
####################################
# Server #
####################################
server<- function(input, Output, session) {
# Input Data
datasetInput <- reactive({
df <- data.frame(
Name = c("White to Non-White Dissimilarity Index",
"Total Population in 2020",
"Percent of Adults w at Least One Child in all of the County's Public Housing",
"Mean Poverty Index Score",
"Mean Transportation Index Score",
"Mean LBR Index Score",
"Total CDBG Funding Amount per Capita"
),
Value = as.character(c(input$dissim_19,
input$Total_Pop20,
input$pct_adultsWchild_PH,
input$mean_pov_idx,
input$mean_trans_idx,
input$mean_lbr_idx,
input$CDBG_perCapita
)),
stringsAsFactors = FALSE)
Outcome <- 0
df <- rbind(df, Outcome)
input <- transpose(df)
write.table(input,"input.csv", sep=",", quote = FALSE, row.names = FALSE, col.names = FALSE)
test <- read.csv(paste("input", ".csv", sep=""), header = TRUE)
Output <- data.frame(Prediction=predict(model,test), round(predict(model,test,type="prob"), 3))
print(Output)
})
# Status/Output Text Box
Output$contents <- renderPrint({
if (input$submitbutton>0) {
isolate("Calculation complete.")
} else {
return("Server is ready for calculation.")
}
})
# Prediction results table
Output$tabledata <- renderTable({
if (input$submitbutton>0) {
isolate(datasetInput())
}
})
}
####################################
# Create the shiny app #
####################################
#shinyApp(ui = ui, server = server)
I was wondering if it was possible to use subset on a geom_polyfreq()?
I am running a topic model and in order to report the facets properly i want to remove 4 out of 10 facets.
My code is as follows:
ggplot(data = dat,
aes(x = date,
fill = Topics)) +
geom_freqpoly(binwidth = 3) +
labs(x = "",
y = "Topic Count",
title = "Mentions of Topic On a Monthly Basis")+
scale_x_date(date_breaks = "months", date_labels="%b")+
theme(text = element_text(size=8)) +
theme(axis.text.x = element_text(angle = 45))+
facet_wrap(Topics ~ ., scales = "free")
> ggplot(subset(dat, Topics %in% c(3, 4, 5, 7, 8, 9)),
aes(x = date,
fill = topic)) +
geom_freqpoly(binwidth = 3) +
labs(x = "",
y = "Topic Count",
title = "Mentions of Topic On a Monthly Basis")+
scale_x_date(date_breaks = "months", date_labels="%b")+
theme(text = element_text(size=9)) +
theme(axis.text.x = element_text(angle = 45))+
facet_wrap(Topics ~ ., scales = "free")
However, when I try to subset the data, I get an error that says:
Fejl: Faceting variables must have at least one value
Does anybody know what the issue is?
I hope this makes sense.
The full code is down below.
article.data <- article.data[!is.na(article.data$fulltext), ]
## Get date
article.data$date <- as.Date(article.data$date, "%Y-%m-%d")
#all of 2018
dat <- article.data[article.data$date > as.Date("2018-01-01", "%Y-%m-%d") &
article.data$date < as.Date("2018-12-01", "%Y-%m-%d"), ]
## 'tokenize' fulltext
quanteda_options("language_stemmer" = "danish")
texts <- gsub(":", " ", dat$fulltext, fixed = T)
texts <- tokens(texts, what = "word",
remove_numbers = T,
remove_punct = T,
remove_symbols = T,
remove_separators = T,
remove_hyphens = T,
remove_url = T,
verbose = T)
texts <- tokens_tolower(texts)
texts <- tokens_remove(texts, stopwords("danish"))
texts <- tokens_wordstem(texts)
texts <- tokens_remove(texts, stopwords("danish"))
# get actual dfm from tokens
txt.mat <- dfm(texts)
#remove frequent words with no substance
txt.mat <- txt.mat %>% dfm_remove(c("ad",
"af","aldrig","alene","alle",
"allerede","alligevel","alt",
"altid","anden","andet","andre",
"at","bag","bare", "bedre", "begge","bl.a.",
"blandt", "blev", "blevet", "blive","bliver",
"burde", "bør","ca.", "com", "da",
"dag", "dansk", "danske", "de",
"dem", "den", "denne","dens",
"der","derefter","deres","derfor",
"derfra","deri","dermed","derpå",
"derved","det","dette","dig",
"din","dine","disse","dit",
"dog","du","efter","egen",
"ej","eller","ellers","en",
"end","endnu","ene","eneste","enhver","ens",
"enten","er","et","f.eks.","far","fem",
"fik","fire","flere","flest",
"fleste","for", "foran",
"fordi","forrige","fra", "fx",
"få","får","før","først",
"gennem","gjorde","gjort","god",
"godt","gør","gøre","gørende",
"ham","han","hans","har",
"havde","have","hej","hel",
"heller","helt","hen","hende",
"hendes","henover","her",
"herefter","heri","hermed",
"herpå","hos","hun","hvad",
"hvem","hver","hvilke","hvilken",
"hvilkes","hvis",
"hvor", "hvordan","hvorefter","hvorfor",
"hvorfra","hvorhen","hvori","hvorimod",
"hvornår","hvorved","i", "ifølge", "igen",
"igennem","ikke","imellem","imens",
"imod","ind","indtil","ingen",
"intet","ja","jeg","jer","jeres",
"jo","kan","kom","komme",
"kommer", "kroner", "kun","kunne","lad",
"langs", "lang", "langt", "lav","lave","lavet",
"lidt","lige","ligesom","lille",
"længere","man","mand","mange",
"med","meget","mellem","men", "mener",
"mens","mere","mest","mig",
"min","mindre","mindst","mine",
"mit","mod","må","måske",
"ned","nej","nemlig","ni",
"nogen","nogensinde","noget",
"nogle","nok","nu","ny", "nye",
"nyt","når","nær","næste",
"næsten","og","også","okay",
"om","omkring","op","os",
"otte","over","overalt","pga.", "partier",
"partiets", "partiers", "politiske",
"procent", "på", "ritzau", "samme",
"sammen","se","seks","selv","selvom",
"senere","ser","ses","siden","sig",
"sige", "siger", "sin","sine","sit",
"skal","skulle","som","stadig",
"stor","store","synes","syntes",
"syv","så","sådan","således",
"tag","tage","temmelig","thi",
"ti","tidligere","til","tilbage",
"tit","to","tre","ud","uden",
"udover","under","undtagen","var",
"ved","vi","via","vil","ville", "viser",
"vor","vore","vores","vær","være",
"været","øvrigt","facebook","http", "https",
"www","millioner", "frem", "lars", "lars_løkke",
"rasmussen", "løkke_rasmussen", "statsminister", "politik",
"formand", "partiet", "år", "tid", "and", "fler",
"sid", "regeringen", "giv", "politisk", "folketing", "mer",
"ifølg"))
############################################################
## FEATURE SELECTION
############################################################
# check out top-appearing features in dfm
topfeatures(txt.mat)
# keep features (words) appearing in >2 documents
txt.mat <- dfm_trim(txt.mat, min_termfreq = 4)
# filter out one-character words
txt.mat <- txt.mat[, str_length(colnames(txt.mat)) > 2]
# filter out some html trash features
#txt.mat <- txt.mat[, !grepl("[[:digit:]]+px", colnames(txt.mat))]
#txt.mat <- txt.mat[, !grepl(".", colnames(txt.mat), fixed = T)]
#txt.mat <- txt.mat[, !grepl("_", colnames(txt.mat), fixed = T)]
#txt.mat <- txt.mat[, !grepl("#", colnames(txt.mat), fixed = T)]
#txt.mat <- txt.mat[, !grepl("#", colnames(txt.mat), fixed = T)]
############################################################
## SELECT FEATURES BY TF-IDF
############################################################
# Create tf_idf-weighted dfm
ti <- dfm_tfidf(txt.mat)
# Select from main dfm using its top features
txt.mat <- dfm_keep(txt.mat, names(topfeatures(ti, n = 1000)))
############################################################
## RUN TOPIC MODEL
############################################################
# convert quanteda dfm to tm 'dtm'
dtm <- convert(txt.mat, to = "topicmodels")
# run lda with 8 topics
lda <- LDA(dtm, k = 8)
# review terms by topic
terms(lda, 10)
############################################################
## LOOK FOR 'OPTIMAL' k
############################################################
# randomly sample test data
set.seed(61218)
select <- sample(1:nrow(dtm), size = 100)
test <- dtm[select, ]
train <- dtm[!(1:nrow(dtm) %in% select), ]
n.tops <- 3:14
metrics <- data.frame(topics = n.tops,
perplexity = NA)
for(i in n.tops) { # NB: takes awhile to run
print(i)
est <- LDA(train, k = i)
metrics[(i - 1), "perplexity"] <- perplexity(est, newdata = test)
}
save(metrics, file = "lda_perplexity2018.RData")
qplot(data = metrics, x = topics, y = perplexity, geom = "line",
xlab = "Number of topics",
ylab = "Perplexity on test data") + theme_bw()
#We found that 8 topics was one of those of lowest perplexity but
#also the ones which made the most sense
############################################################
## RERUN WITH BETTER CHOICE OF k
############################################################
# run lda with 10 topics
lda <- LDA(dtm, k = 10)
save(lda, file = "dr_ft_keep2018.RData")
# examine output
terms(lda, 20)
# put topics into original data
dat$topic <- topics(lda)
# add labels
#dat$date <- factor(dat$date,
#levels = 1:12,
#labels = c("januar","februar", "marts","april", "maj", "juni", "juli", "august", "september", "oktober", "november", "decemeber"))
dat$Topics <- factor(dat$topic,
levels = 1:10,
labels = c("Topc 1", "Topic 2", "Integration", "Taxation", "Burka Prohibition",
"Topic 6", "Justice", "Foreign Affairs", "Housing", "Topic 10"))
# frequency
qplot(data = dat, x = Topics,
geom = "bar", xlab = "",
ylab = "Topic Frequency", fill=Topics, main = "Figure 1: Main Topics in 2018 - DR") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90))
#Make visualization showing topics over time
ggplot(data = dat,
aes(x = date,
fill = Topics[1])) +
geom_freqpoly(binwidth = 30) +
facet_wrap(Topics ~ ., scales = "free")+
theme_classic() +
scale_x_date(breaks = as.Date(c( "2018-02-01", "2018-04-01", "2018-06-01", "2018-08-01", "2018-10-01", "2018-12-01", date_labels="%B"))) +
theme(axis.text.x = element_text(angle = 90))
ggplot(data = dat,
aes(x = date,
fill = Topics)) +
geom_freqpoly(binwidth = 3) +
labs(x = "",
y = "Topic Count",
title = "Mentions of Topic On a Monthly Basis")+
scale_x_date(date_breaks = "months", date_labels="%b")+
theme(text = element_text(size=8)) +
theme(axis.text.x = element_text(angle = 45))+
facet_wrap(Topics ~ ., scales = "free")
It's best practice on this forum to make your question reproducible, so that others can try it and test their solutions to confirm they work. It's also good if you can make it minimal, both to respect potential answerers' time and to help clarify your own understanding of the problem.
How to make a great R reproducible example
In this case, the error message suggests that your subsetting is removing all your data, which breaks the faceting. It can't plot any facets if the faceting variable has no values.
It looks like dat$Topics is a factor, but your loop is referring to Topics like they're numeric with Topics %in% c(3, 4, 5, 7, 8, 9). For example, I could define a factor vector with the same levels as your Topics variable:
Topics <- factor(1:10, levels = 1:10,
labels = c("Topc 1", "Topic 2", "Integration", "Taxation", "Burka Prohibition",
"Topic 6", "Justice", "Foreign Affairs", "Housing", "Topic 10"))
Compare the output of these three lines:
Topics %in% c(1, 2)
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
as.numeric(Topics) %in% c(1, 2)
# [1] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
Topics %in% c("Topc 1", "Topic 2")
# [1] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
In the top case, none of the data matches the test, so using that to subset the data would give you an empty data set and seems like a plausible cause for the error you got.
To refer to the Topics by their underlying level, we can refer to as.numeric(Topics) %in% c(1, 2). If we want to refer to the Topics by their labels, I could use Topics %in% c("Topc 1", "Topic 2").
Since I don't have your data, I can't confirm this exact syntax will work for you, but I hope something along these lines will.
For more on how to work with factors in R, I recommend: https://r4ds.had.co.nz/factors.html
I am trying to make a Shiny app where the user selects a few options and a network and data table will display based on the inputs. I have a diet study database and would like users to be able to specify the predator species they are interested in, the diet metric (weight, volumetric, etc) and the taxonomic level they want nodes identified to. The data table works fine (so I did not include the code) and updates based on the input but the network does not change, it only shows all of the data. When I run the code for generating the plot outside of Shiny it works fine. This is my first shiny attempt so any suggestions would be greatly appreciated.
library(dplyr)
library(igraph)
library(networkD3)
Diet <-data.frame(
Predator_Scientific_Name = rep("Acanthocybium solanderi", 10),
Class_Predator = rep("Actinopterygii", 10),
Order_Predator = rep("Perciformes", 10),
Family_Predator = rep("Scombridae", 10),
Genus_Predator = rep("Acanthocybium", 10),
Species_Predator = rep("solandri", 10),
Class_Prey = rep("Actinopterygii", 10),
Order_Prey = c( "Clupeiformes" , NA , "Perciformes", "Perciformes", "Perciformes", "Perciformes", "Perciformes", "Perciformes", "Tetraodontiformes", "Tetraodontiformes"),
Family_Prey = c("Clupeidae", NA, "Coryphaenidae", "Carangidae", "Scombridae","Echeneidae","Carangidae", "Scombridae", "Balistidae","Diodontidae"),
Genus_Prey = c("Sardinella", NA, "Coryphaena", "Decapterus", "Euthynnus", NA, NA, NA, "Balistes", "Diodon"),
Species_Prey = c("aurita" , "", "hippurus", "punctatus","alletteratus", "", "", "","capriscus", "spp." ),
Lowest_Taxonomic_Identification_Prey = c("Sardinella aurita","Actinopterygii","Coryphaena hippurus","Decapterus punctatus","Euthynnus alletteratus", "Echeneidae", "Carangidae","Scombridae","Balistes capriscus","Diodon spp."),
Frequency_of_Occurrence = c(2.8, 59.1, 1.4, 7.0, 1.4, 1.4, 15.5, 21.1, 2.8, 4.2), StringAsFactors = FALSE
)
pred.name <- unique(Diet$Predator_Scientific_Name)
prey.tax <- unique(Diet$Lowest_Taxonomic_Identification_Prey)
#Progress bar function
compute_data <- function(updateProgress = NULL) {
# Create 0-row data frame which will be used to store data
dat <- data.frame(x = numeric(0), y = numeric(0))
for (i in 1:10) {
Sys.sleep(0.25)
# Compute new row of data
new_row <- data.frame(x = rnorm(1), y = rnorm(1))
# If we were passed a progress update function, call it
if (is.function(updateProgress)) {
text <- paste0("x:", round(new_row$x, 2), " y:", round(new_row$y, 2))
updateProgress(detail = text)
}
# Add the new row of data
dat <- rbind(dat, new_row)
}
dat
}
####
# Define UI for application that draws a histogram
ui <- dashboardPage(
skin = "blue",
dashboardHeader(title = "Diet Database"),
dashboardSidebar(
sidebarMenu(
menuItem("Parameters",
tabName = "paramaters",
icon = shiny::icon("bar-chart")))
),
dashboardBody(
tabItems(
tabItem(
tabName = "paramaters",
fluidRow(
shiny::column(
width = 4,
shinydashboard::box(
title = "Predator",
status = "primary",
solidHeader = TRUE,
collapsible = TRUE,
width = NULL,
shiny::helpText("Select a predator to view its connections and prey items:"),
shiny::selectInput(
"pred",
shiny::h5("Predator Scientific Name:"),
c(NA,pred.name))),
shinydashboard::box(
title = "Prey",
status = "primary",
solidHeader = TRUE,
collapsible = TRUE,
width = NULL,
shiny::helpText("Select a prey taxa to view its connections and predators:"),
shiny::selectInput(
"prey",
shiny::h5("Prey Taxa:"),
c(NA,prey.tax))),
shinydashboard::box(
title = "Diet Metric",
status = "primary",
solidHeader = TRUE,
collapsible = TRUE,
width = NULL,
shiny::helpText("Select a diet metric to use:"),
shiny::selectInput(
"dietmetric",
shiny::h5("Diet Metric:"),
c("Frequency of Occurrence" = "Frequency_of_Occurrence",
"Wet Weight" = "Weight",
"Dry Weight" = "Dry_Weight",
"Volume" = "Volume",
"Index of Relative Importance" = "IRI",
"Index of Caloric Importance" = "ICI",
"Number" = "Number"))),
shinydashboard::box(
title = "Taxonomic Level",
status = "primary",
solidHeader = TRUE,
collapsible = TRUE,
width = NULL,
shiny::helpText("Select a taxonomic level of nodes:"),
shiny::selectInput(
"nodetax",
shiny::h5("Taxonomic Level:"),
c("Order" = "Order",
"Family" = "Family",
"Genus" = "Genus",
"Species" = "Species"))),
shinydashboard::box(
title = "Generate Network",
status = "primary",
solidHeader = T,
collapsible = T,
width = NULL,
actionButton("makenet", "Generate")
)
),
#Area for network to be displayed
shiny::column(
width = 8,
shinydashboard::box(
title = "Trophic Network",
status = "primary",
solidHeader = TRUE,
collapsible = FALSE,
width = NULL,
forceNetworkOutput("netplot")
)
)
))
)))
server <- function(input, output, session) {
network.data <- eventReactive(input$makenet, {
edgelist <- Diet %>% filter(Predator_Scientific_Name == input$pred|Lowest_Taxonomic_Identification_Prey == input$prey
) %>% select(
paste(input$nodetax, "Predator", sep = "_"),
Class_Predator,
paste(input$nodetax, "Prey", sep = "_"),
Class_Prey,
input$dietmetric
)
colnames(edgelist) <- c("SourceName",
"SourceClass",
"TargetName",
"TargetClass",
"Weight")
edgelist <- edgelist[complete.cases(edgelist),]
})
output$netplot <- renderForceNetwork( {
network.data()
ig <-igraph::simplify(igraph::graph_from_data_frame(edgelist[,c(1,3,5)], directed = TRUE))
SourceID <- TargetID <- c()
for (i in 1:nrow(edgelist)) {
SourceID[i] <- which(edgelist[i,1] == V(ig)$name)-1
TargetID[i] <- which(edgelist[i,3] == V(ig)$name)-1
}
#Create edgelist that contains source and target nodes and edge weights
edgeList <- cbind(edgelist, SourceID, TargetID)
nodeList <- data.frame(ID = c(0:(igraph::vcount(ig) - 1)),
nName = igraph::V(ig)$name)
#Determine and assign groups based on class
preddf <-
data.frame(SciName = edgelist[, 1], class = edgelist[, 2])
preydf <-
data.frame(SciName = edgelist[, 3], class = edgelist[, 4])
groupsdf <- rbind(preddf, preydf)
groupsdf <- groupsdf %>% mutate(SciName = as.character(SciName),
class = as.character(class))
nodeGroup <- c()
for (i in 1:nrow(nodeList)) {
index <- which(groupsdf[, 1] == nodeList$nName[i])
nodeGroup[i] <- groupsdf[index[1], 2]
}
nodeList <-
cbind(nodeList,
nodeGroup)
progress <- shiny::Progress$new()
progress$set(message = "Generating your network...", value = 0)
# Close the progress when this reactive exits (even if there's an error)
on.exit(progress$close())
# Create a callback function to update progress.
# Each time this is called:
# - If `value` is NULL, it will move the progress bar 1/5 of the remaining
# distance. If non-NULL, it will set the progress to that value.
# - It also accepts optional detail text.
updateProgress <- function(value = NULL, detail = NULL) {
if (is.null(value)) {
value <- progress$getValue()
value <- value + (progress$getMax() - value) / 5
}
progress$set(value = value, detail = detail)
}
# Compute the new data, and pass in the updateProgress function so
# that it can update the progress indicator.
compute_data(updateProgress)
networkD3::forceNetwork(
Links = edgeList,
# data frame that contains info about edges
Nodes = nodeList,
# data frame that contains info about nodes
Source = "SourceID",
# ID of source node
Target = "TargetID",
# ID of target node
Value = "Weight",
# value from the edge list (data frame) that will be used to value/weight relationship amongst nodes
NodeID = "nName",
# value from the node list (data frame) that contains node
Group = "nodeGroup",
# value from the node list (data frame) that contains value we want to use for node color
fontSize = 25,
opacity = 0.85,
zoom = TRUE,
# ability to zoom when click on the node
opacityNoHover = 0.4 # opacity of labels when static
)
})
}
# Run the application
shinyApp(ui = ui, server = server)
I am sharing my fixed code in case it helps someone in the future. I basically just changed the top of the server code.
network.data <- eventReactive(input$makenet, {
Diet %>% filter(Predator_Scientific_Name == input$pred|Lowest_Taxonomic_Identification_Prey == input$prey
) %>% select(
paste(input$nodetax, "Predator", sep = "_"),
Class_Predator,
paste(input$nodetax, "Prey", sep = "_"),
Class_Prey,
input$dietmetric
) %>% rename("SourceName" = paste(input$nodetax, "Predator", sep = "_"),
"SourceClass" = Class_Predator,
"TargetName" = paste(input$nodetax, "Prey", sep = "_"),
"TargetClass" = Class_Prey,
"Weight" = input$dietmetric) %>% na.omit()
})
output$netplot <- renderForceNetwork( {
edgelist <- network.data()
I am looking for good R code (or package) that uses ggplot2 to create wind roses that show the frequency, magnitude and direction of winds.
I'm particularly interested in ggplot2 as building the plot that way gives me the chance to leverage the rest of the functionality in there.
Test data
Download a year of weather data from the 80-m level on the National Wind Technology's "M2" tower. This link will create a .csv file that is automatically downloaded. You need to find that file (it's called "20130101.csv"), and read it in.
# read in a data file
data.in <- read.csv(file = "A:/drive/somehwere/20130101.csv",
col.names = c("date","hr","ws.80","wd.80"),
stringsAsFactors = FALSE))
This would work with any .csv file and will overwrite the column names.
Sample data
If you don't want to download that data, here are 10 data points that we will use to demo the process:
data.in <- structure(list(date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "1/1/2013", class = "factor"), hr = 1:9, ws.80 = c(5,
7, 7, 51.9, 11, 12, 9, 11, 17), wd.80 = c(30, 30, 30, 180, 180,
180, 269, 270, 271)), .Names = c("date", "hr", "ws.80", "wd.80"
), row.names = c(NA, -9L), class = "data.frame")
For sake of argument we'll assume that we are using the data.in data frame, which has two data columns and some kind of date / time information. We'll ignore the date and time information initially.
The ggplot function
I've coded the function below. I'm interested in other people's experience or suggestions on how to improve this.
# WindRose.R
require(ggplot2)
require(RColorBrewer)
plot.windrose <- function(data,
spd,
dir,
spdres = 2,
dirres = 30,
spdmin = 2,
spdmax = 20,
spdseq = NULL,
palette = "YlGnBu",
countmax = NA,
debug = 0){
# Look to see what data was passed in to the function
if (is.numeric(spd) & is.numeric(dir)){
# assume that we've been given vectors of the speed and direction vectors
data <- data.frame(spd = spd,
dir = dir)
spd = "spd"
dir = "dir"
} else if (exists("data")){
# Assume that we've been given a data frame, and the name of the speed
# and direction columns. This is the format we want for later use.
}
# Tidy up input data ----
n.in <- NROW(data)
dnu <- (is.na(data[[spd]]) | is.na(data[[dir]]))
data[[spd]][dnu] <- NA
data[[dir]][dnu] <- NA
# figure out the wind speed bins ----
if (missing(spdseq)){
spdseq <- seq(spdmin,spdmax,spdres)
} else {
if (debug >0){
cat("Using custom speed bins \n")
}
}
# get some information about the number of bins, etc.
n.spd.seq <- length(spdseq)
n.colors.in.range <- n.spd.seq - 1
# create the color map
spd.colors <- colorRampPalette(brewer.pal(min(max(3,
n.colors.in.range),
min(9,
n.colors.in.range)),
palette))(n.colors.in.range)
if (max(data[[spd]],na.rm = TRUE) > spdmax){
spd.breaks <- c(spdseq,
max(data[[spd]],na.rm = TRUE))
spd.labels <- c(paste(c(spdseq[1:n.spd.seq-1]),
'-',
c(spdseq[2:n.spd.seq])),
paste(spdmax,
"-",
max(data[[spd]],na.rm = TRUE)))
spd.colors <- c(spd.colors, "grey50")
} else{
spd.breaks <- spdseq
spd.labels <- paste(c(spdseq[1:n.spd.seq-1]),
'-',
c(spdseq[2:n.spd.seq]))
}
data$spd.binned <- cut(x = data[[spd]],
breaks = spd.breaks,
labels = spd.labels,
ordered_result = TRUE)
# clean up the data
data. <- na.omit(data)
# figure out the wind direction bins
dir.breaks <- c(-dirres/2,
seq(dirres/2, 360-dirres/2, by = dirres),
360+dirres/2)
dir.labels <- c(paste(360-dirres/2,"-",dirres/2),
paste(seq(dirres/2, 360-3*dirres/2, by = dirres),
"-",
seq(3*dirres/2, 360-dirres/2, by = dirres)),
paste(360-dirres/2,"-",dirres/2))
# assign each wind direction to a bin
dir.binned <- cut(data[[dir]],
breaks = dir.breaks,
ordered_result = TRUE)
levels(dir.binned) <- dir.labels
data$dir.binned <- dir.binned
# Run debug if required ----
if (debug>0){
cat(dir.breaks,"\n")
cat(dir.labels,"\n")
cat(levels(dir.binned),"\n")
}
# deal with change in ordering introduced somewhere around version 2.2
if(packageVersion("ggplot2") > "2.2"){
cat("Hadley broke my code\n")
data$spd.binned = with(data, factor(spd.binned, levels = rev(levels(spd.binned))))
spd.colors = rev(spd.colors)
}
# create the plot ----
p.windrose <- ggplot(data = data,
aes(x = dir.binned,
fill = spd.binned)) +
geom_bar() +
scale_x_discrete(drop = FALSE,
labels = waiver()) +
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE) +
theme(axis.title.x = element_blank())
# adjust axes if required
if (!is.na(countmax)){
p.windrose <- p.windrose +
ylim(c(0,countmax))
}
# print the plot
print(p.windrose)
# return the handle to the wind rose
return(p.windrose)
}
Proof of Concept and Logic
We'll now check that the code does what we expect. For this, we'll use the simple set of demo data.
# try the default settings
p0 <- plot.windrose(spd = data.in$ws.80,
dir = data.in$wd.80)
This gives us this plot:
So: we've correctly binned the data by direction and wind speed, and have coded up our out-of-range data as expected. Looks good!
Using this function
Now we load the real data. We can load this from the URL:
data.in <- read.csv(file = "http://midcdmz.nrel.gov/apps/plot.pl?site=NWTC&start=20010824&edy=26&emo=3&eyr=2062&year=2013&month=1&day=1&endyear=2013&endmonth=12&endday=31&time=0&inst=21&inst=39&type=data&wrlevel=2&preset=0&first=3&math=0&second=-1&value=0.0&user=0&axis=1",
col.names = c("date","hr","ws.80","wd.80"))
or from file:
data.in <- read.csv(file = "A:/blah/20130101.csv",
col.names = c("date","hr","ws.80","wd.80"))
The quick way
The simple way to use this with the M2 data is to just pass in separate vectors for spd and dir (speed and direction):
# try the default settings
p1 <- plot.windrose(spd = data.in$ws.80,
dir = data.in$wd.80)
Which gives us this plot:
And if we want custom bins, we can add those as arguments:
p2 <- plot.windrose(spd = data.in$ws.80,
dir = data.in$wd.80,
spdseq = c(0,3,6,12,20))
Using a data frame and the names of columns
To make the plots more compatible with ggplot(), you can also pass in a data frame and the name of the speed and direction variables:
p.wr2 <- plot.windrose(data = data.in,
spd = "ws.80",
dir = "wd.80")
Faceting by another variable
We can also plot the data by month or year using ggplot's faceting capability. Let's start by getting the time stamp from the date and hour information in data.in, and converting to month and year:
# first create a true POSIXCT timestamp from the date and hour columns
data.in$timestamp <- as.POSIXct(paste0(data.in$date, " ", data.in$hr),
tz = "GMT",
format = "%m/%d/%Y %H:%M")
# Convert the time stamp to years and months
data.in$Year <- as.numeric(format(data.in$timestamp, "%Y"))
data.in$month <- factor(format(data.in$timestamp, "%B"),
levels = month.name)
Then you can apply faceting to show how the wind rose varies by month:
# recreate p.wr2, so that includes the new data
p.wr2 <- plot.windrose(data = data.in,
spd = "ws.80",
dir = "wd.80")
# now generate the faceting
p.wr3 <- p.wr2 + facet_wrap(~month,
ncol = 3)
# and remove labels for clarity
p.wr3 <- p.wr3 + theme(axis.text.x = element_blank(),
axis.title.x = element_blank())
Comments
Some things to note about the function and how it can be used:
The inputs are:
vectors of speed (spd) and direction (dir) or the name of the data frame and the names of the columns that contain the speed and direction data.
optional values of the bin size for wind speed (spdres) and direction (dirres).
palette is the name of a colorbrewer sequential palette,
countmax sets the range of the wind rose.
debug is a switch (0,1,2) to enable different levels of debugging.
I wanted to be able to set the maximum speed (spdmax) and the count (countmax) for the plots so that I can compare windroses from different data sets
If there are wind speeds that exceed (spdmax), those are added as a grey region (see the figure). I should probably code something like spdmin as well, and color-code regions where the wind speeds are less than that.
Following a request, I implemented a method to use custom wind speed bins. They can be added using the spdseq = c(1,3,5,12) argument.
You can remove the degree bin labels using the usual ggplot commands to clear the x axis: p.wr3 + theme(axis.text.x = element_blank(),axis.title.x = element_blank()).
At some point recently ggplot2 changed the ordering of bins, so that the plots didn't work. I think this was version 2.2. But, if your plots look a bit weird, change the code so that test for "2.2" is maybe "2.1", or "2.0".
Here is my version of the code. I added labels for directions (N, NNE, NE, ENE, E....) and made the y label to show frequency in percent instead of counts.
Click here to see figure of wind Rose with directions and frequency (%)
# WindRose.R
require(ggplot2)
require(RColorBrewer)
require(scales)
plot.windrose <- function(data,
spd,
dir,
spdres = 2,
dirres = 22.5,
spdmin = 2,
spdmax = 20,
spdseq = NULL,
palette = "YlGnBu",
countmax = NA,
debug = 0){
# Look to see what data was passed in to the function
if (is.numeric(spd) & is.numeric(dir)){
# assume that we've been given vectors of the speed and direction vectors
data <- data.frame(spd = spd,
dir = dir)
spd = "spd"
dir = "dir"
} else if (exists("data")){
# Assume that we've been given a data frame, and the name of the speed
# and direction columns. This is the format we want for later use.
}
# Tidy up input data ----
n.in <- NROW(data)
dnu <- (is.na(data[[spd]]) | is.na(data[[dir]]))
data[[spd]][dnu] <- NA
data[[dir]][dnu] <- NA
# figure out the wind speed bins ----
if (missing(spdseq)){
spdseq <- seq(spdmin,spdmax,spdres)
} else {
if (debug >0){
cat("Using custom speed bins \n")
}
}
# get some information about the number of bins, etc.
n.spd.seq <- length(spdseq)
n.colors.in.range <- n.spd.seq - 1
# create the color map
spd.colors <- colorRampPalette(brewer.pal(min(max(3,
n.colors.in.range),
min(9,
n.colors.in.range)),
palette))(n.colors.in.range)
if (max(data[[spd]],na.rm = TRUE) > spdmax){
spd.breaks <- c(spdseq,
max(data[[spd]],na.rm = TRUE))
spd.labels <- c(paste(c(spdseq[1:n.spd.seq-1]),
'-',
c(spdseq[2:n.spd.seq])),
paste(spdmax,
"-",
max(data[[spd]],na.rm = TRUE)))
spd.colors <- c(spd.colors, "grey50")
} else{
spd.breaks <- spdseq
spd.labels <- paste(c(spdseq[1:n.spd.seq-1]),
'-',
c(spdseq[2:n.spd.seq]))
}
data$spd.binned <- cut(x = data[[spd]],
breaks = spd.breaks,
labels = spd.labels,
ordered_result = TRUE)
# figure out the wind direction bins
dir.breaks <- c(-dirres/2,
seq(dirres/2, 360-dirres/2, by = dirres),
360+dirres/2)
dir.labels <- c(paste(360-dirres/2,"-",dirres/2),
paste(seq(dirres/2, 360-3*dirres/2, by = dirres),
"-",
seq(3*dirres/2, 360-dirres/2, by = dirres)),
paste(360-dirres/2,"-",dirres/2))
# assign each wind direction to a bin
dir.binned <- cut(data[[dir]],
breaks = dir.breaks,
ordered_result = TRUE)
levels(dir.binned) <- dir.labels
data$dir.binned <- dir.binned
# Run debug if required ----
if (debug>0){
cat(dir.breaks,"\n")
cat(dir.labels,"\n")
cat(levels(dir.binned),"\n")
}
# create the plot ----
p.windrose <- ggplot(data = data,
aes(x = dir.binned,
fill = spd.binned
,y = (..count..)/sum(..count..)
))+
geom_bar() +
scale_x_discrete(drop = FALSE,
labels = c("N","NNE","NE","ENE", "E",
"ESE", "SE","SSE",
"S","SSW", "SW","WSW", "W",
"WNW","NW","NNW")) +
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE) +
theme(axis.title.x = element_blank()) +
scale_y_continuous(labels = percent) +
ylab("Frequencia")
# adjust axes if required
if (!is.na(countmax)){
p.windrose <- p.windrose +
ylim(c(0,countmax))
}
# print the plot
print(p.windrose)
# return the handle to the wind rose
return(p.windrose)
}
Have you ever tried windRose function from Openair package? It's very easy and you can set intervals, statistics and etc.
windRose(mydata, ws = "ws", wd = "wd", ws2 = NA, wd2 = NA,
ws.int = 2, angle = 30, type = "default", bias.corr = TRUE, cols
= "default", grid.line = NULL, width = 1, seg = NULL, auto.text
= TRUE, breaks = 4, offset = 10, normalise = FALSE, max.freq =
NULL, paddle = TRUE, key.header = NULL, key.footer = "(m/s)",
key.position = "bottom", key = TRUE, dig.lab = 5, statistic =
"prop.count", pollutant = NULL, annotate = TRUE, angle.scale =
315, border = NA, ...)
pollutionRose(mydata, pollutant = "nox", key.footer = pollutant,
key.position = "right", key = TRUE, breaks = 6, paddle = FALSE,
seg = 0.9, normalise = FALSE, ...)