Below is the structure of the dataframe
Village <- c("Location A" , "Location B", "Location C", "Location C", "Location A")
Farmers_Name <- c("Mary", "John", "Grace","Steph", "Richard")
Practiced_MinimumTillage <- c(0,1,1,0,1)
Practiced_Intercropping <- c(1,1,1,0,0)
Practiced_CropRotation <- c(1,1,1,1,0)
Practiced_ApplicationOfManure <- c(0,1,0,1,0)
farmers <- data.frame(Farmers_Name,Village,Practiced_MinimumTillage,Practiced_Intercropping,Practiced_CropRotation,Practiced_ApplicationOfManure)
The output of the dataframe farmers.
Farmers_Name Village Practiced_MinimumTillage Practiced_Intercropping Practiced_CropRotation Practiced_ApplicationOfManure
1 Mary Location A 0 1 1 0
2 John Location B 1 1 1 1
3 Grace Location C 1 1 1 0
4 Steph Location C 0 0 1 1
5 Richard Location A 1 0 0 0
Summarizing farm practices to get an understanding of the usage. A frequency table of the practices used by farmers in their farm.
practices <- select(farmers,Practiced_MinimumTillage,Practiced_Intercropping,Practiced_CropRotation,Practiced_ApplicationOfManure)
practices %>%
summarise_all(sum, na.rm=TRUE) %>%
gather(var,value) %>%
arrange(desc(value)) %>%
ggplot(aes(var, value)) + geom_bar(stat = "Identity") + coord_flip()
In the farmers dataframe, I'd like to use the column Village, for selectInput function. Whereby if a person selects "Location A" or "Location B" from the dropdown, above plot based on the frequency table is rendered in the output. How do I restructure the dataframe to suit this
using either dplyr or data.table?
It's pretty straightforward but comment if you have any questions -
Village <- c("Location A" , "Location B", "Location C", "Location C", "Location A")
Farmers_Name <- c("Mary", "John", "Grace","Steph", "Richard")
Practiced_MinimumTillage <- c(0,1,1,0,1)
Practiced_Intercropping <- c(1,1,1,0,0)
Practiced_CropRotation <- c(1,1,1,1,0)
Practiced_ApplicationOfManure <- c(0,1,0,1,0)
farmers <- data.frame(Farmers_Name,Village,Practiced_MinimumTillage,Practiced_Intercropping,
Practiced_CropRotation,Practiced_ApplicationOfManure)
shinyApp(
ui = fluidPage(
selectInput("village", "Select Village", choices = unique(farmers$Village)),
plotOutput("some_plot")
),
server = function(input, output, session) {
output$some_plot <- renderPlot({
filter(farmers, Village == input$village) %>%
select(Practiced_MinimumTillage,Practiced_Intercropping,Practiced_CropRotation,
Practiced_ApplicationOfManure) %>%
summarise_all(sum, na.rm=TRUE) %>%
gather(var,value) %>%
arrange(desc(value)) %>%
ggplot(aes(var, value)) + geom_bar(stat = "Identity") + coord_flip()
})
}
)
Related
I have a data set with a variable 'education' which is coded differently in each of the three countries included, for example:
Code
Country 1
Country 2
Country 3
1
No education
No education
No education
2
Primary
Primary
Islamic education
3
Secondary
Secondary
Primary
4
NA
NA
Secondary
I need to apply factor levels, which are different for each country.
Below is my attempt, but it doesn't appear to work:
df <- data.frame(
Country = sample(c("Country 1", "Country 2", "Country 3"), 100, replace = TRUE),
Education_1 = sample(1:4)
)
df$Education <-
if(df$Country == "Country1") {
factor(df$Education,
levels = c(1:4),
labels = c("No education", "Primary", "Secondary", "NA"))
} else if (df$Country == "Country2") {
factor(df$Education,
levels = c(1:4),
labels = c("No education", "Primary", "Secondary", "NA"))
} else {
factor(df$Education,
levels = c(1:4),
labels = c("No education", "Islamic education", "Primary", "Secondary")
)
}
Thanks
Perhaps this helps? This takes the data from the table mapping countries with the education code and the education category and converts it to long format.
Then use a left join to the two column dataframe with countries and education codes.
You could use the resulting column with education type as a string or the codes could be recoded to be consistent.
library(dplyr)
library(tidyr)
library(stringr)
df <- data.frame(
Country = sample(c("Country 1", "Country 2", "Country 3"), 100, replace = TRUE),
Education_1 = sample(1:4))
df_ed <- structure(list(Code = 1:4, Country.1 = c("No education", "Primary",
"Secondary", NA), Country.2 = c("No education", "Primary", "Secondary",
NA), Country.3 = c("No education", "Islamic education", "Primary",
"Secondary")), class = "data.frame", row.names = c(NA, -4L))
df_levels <-
df_ed %>%
pivot_longer(-Code) %>%
mutate(name = str_replace(name, "\\.", " "))
df1 <-
df %>%
left_join(df_levels, by = c("Country" = "name", "Education_1" = "Code"))
head(df1)
#> Country Education_1 value
#> 1 Country 1 3 Secondary
#> 2 Country 2 4 <NA>
#> 3 Country 3 1 No education
#> 4 Country 1 2 Primary
#> 5 Country 3 3 Primary
#> 6 Country 2 4 <NA>
Created on 2021-09-22 by the reprex package (v2.0.0)
I'm having some problems with my ggplot title in this Shinyapp. I'm comparing countries and I want the countries the plot shows (colour = input$stat) to also be visible in the ggplot title. With the current code I'm only getting the first one. Is there perhaps an elegant solution to this problem?
If I'm comparing France, The United Kingdom and Spain, I want the title to be:
"Coronalandskampen, France, The United Kingdom, Spain"
library(tidyverse)
library(readxl)
library(httr)
library(zoo)
library(caTools)
library(shiny)
library(data.table)
#Get data
url <- paste("https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-",format(Sys.time(), "%Y-%m-%d"), ".xlsx", sep = "")
GET(url, authenticate(":", ":", type="ntlm"), write_disk(tf <- tempfile(fileext = ".xlsx")))
df <- read_excel(tf)
df <- df %>%
rename(land = countriesAndTerritories,
`Antal fall` = cases,
`Antal döda` = deaths) %>%
arrange(land, dateRep) %>%
group_by(land) %>%
mutate(`Antal döda, kumulativt` = cumsum(`Antal döda`),
`Antal fall, kumulativt` = cumsum(`Antal fall`)) %>%
ungroup() %>%
filter(`Antal döda, kumulativt` > 10) %>%
group_by(land) %>%
mutate(antal_dagar = row_number(),
start_datum = min(dateRep),
`Antal Fall, rullande medeltal över sju dagar` = rollmean(`Antal fall`, 7, fill = NA),
`Antal döda, rullande medeltal över sju dagar` = rollmean(`Antal döda`, 7, fill = NA)) %>%
ungroup() %>%
mutate(`Döda per 100 000 invånare` = `Antal döda, kumulativt` * 100000 / popData2019) %>%
select(land, antal_dagar, `Antal fall`, `Antal fall, kumulativt`, `Antal döda`, `Antal döda, kumulativt`, `Döda per 100 000 invånare`,
`Antal Fall, rullande medeltal över sju dagar`, `Antal döda, rullande medeltal över sju dagar`, start_datum, geoId)
ui <- fluidPage(
navbarPage("Statistik Covid-19",
sidebarLayout(
sidebarPanel(
selectInput("stat", "Välj länder:", choices = unique(df$land), selected = "Sweden", multiple = TRUE),
varSelectInput("var", "Variabel:", df[c(3,4,5,6,7, 8, 9)])),
mainPanel(plotOutput("covid"))
)))
server <- function(input, output, session) {
df_graf <- reactive({df %>%
req(input$stat) %>%
filter(land %in% input$stat)
})
output$covid <- renderPlot({
ggplot(df_graf(), aes(antal_dagar, df_graf()[[input$var]], colour = land)) +
geom_line(size = 1.25) +
theme_Skane() +
labs(title = paste0("Coronalandskampen, ", input$stat),
x = "Antal dagar sedan 10:e dödsfallet",
y = as.name(input$var),
colour = NULL,
caption = "Source: European Centre for Disease Prevention and Control")
})
}
shinyApp(ui, server)
So basically, your question boils down to making sure that:
title = paste0("Coronalandskampen, ", input$stat)
returns the string "Coronalandskampen, France, The United Kingdom, Spain".
When running:
> paste0("Coronalandskampen, ", c("A", "B", "C", "D"))
[1] "Coronalandskampen, A" "Coronalandskampen, B"
[3] "Coronalandskampen, C" "Coronalandskampen, D"
We see that the result is a character vector with more than one element. The labsfunction uses only the first element of this vector. Therefore you need to build a single string.
Lets try this:
> paste0("Coronalandskampen, ", paste0(c("A", "B", "C", "D"), collapse = ", "))
[1] "Coronalandskampen, A, B, C, D"
So, in your code you can write:
title = paste0("Coronalandskampen, ", paste0(input$stat, collapse = ", "))
I have a dataset that looks like this:
df <- data.frame("id" = c("Alpha", "Beta", "Gamma","Alpha","Beta","Gamma","Lambda","Tau"),
"group" = c("Alpha is good", "Alpha is good", "Alpha is good", "Beta is bad", "Beta is bad","Beta is bad","Beta is bad","Beta is bad"),
"Val" = c(2,2,2,5,5,5,5,5))
I would like to filter observation when the group name matches the id name. In sum, the final dataset should look like this:
final <- data.frame("id" = c("Alpha", "Beta"),
"group" = c("Alpha is good", "Beta is bad"),
"Val" = c(2,5))
The idea is that the function should be able to recognize if the string in "id" is also present in "group".
I hope this is clear
Thanks in advance for your help
We can use str_detect which is vectorized (According to ?str_detect
Vectorised over string and pattern.
library(stringr)
library(dplyr(
df %>%
mutate_if(is.factor, as.character) %>%
filter(str_detect(group, id))
If there are overlapping elements in each group
df %>%
mutate_if(is.factor, as.character) %>%
group_by(group1 = group) %>%
filter(str_detect(group, id))
One base R possibility could be:
df[unlist(Map(grepl, df$id, df$group)), ]
id group Val
1 Alpha Alpha is good 2
5 Beta Beta is bad 5
Or even more elegantly using mapply() (based on a comment from #r2evans):
df[mapply(grepl, df$id, df$group), ]
Sample data:
df <- data.frame("id" = c("Alpha", "Beta", "Gamma","Alpha","Beta","Gamma","Lambda","Tau"),
"group" = c("Alpha is good", "Alpha is good", "Alpha is good", "Beta is bad", "Beta is bad","Beta is bad","Beta is bad","Beta is bad"),
"Val" = c(2,2,2,5,5,5,5,5),
stringsAsFactors = FALSE)
When I try to run the following code I get an error:
value <- as.matrix(wsu.wide[, c(4, 3, 2)])
Error in [.data.frame(wsu.wide, , c(4, 3, 2)) : undefined columns
selected
How do I get this line of work? It's part of dcasting my data.
This is full the code:
library(readxl)
library(reshape2)
Store_and_Regional_Sales_Database <- read_excel("~/Downloads/Data_Files/Store and Regional Sales Database.xlsx", skip = 2)
store <- Store_and_Regional_Sales_Database
freq <- table(store$`Sales Region`)
freq
rel.freq <- freq / nrow(store)
rel.freq
rel.freq.scaled <- rel.freq * 100
rel.freq.scaled
labs <- paste(names(rel.freq.scaled), "\n", "(", rel.freq.scaled, "%", ")", sep = "")
pie(rel.freq.scaled, labels = labs, main = "Pie Chart of Sales Region")
monitor <- store[which(store$`Item Description` == '24" Monitor'),]
wsu <- as.data.frame(monitor[c("Week Ending", "Store No.", "Units Sold")])
wsu.wide <- dcast(wsu, "Store No." ~ "Week Ending", value.var = "Units Sold")
value <- as.matrix(wsu.wide[, c(4, 3, 2)])
Thanks.
Edit:
This is my table called "monitor":
When I then make this wsu <- as.data.frame(monitor[c("Week Ending", "Store No.", "Units Sold")]) I create another vector with only variables "Week Ending", "Store No." and "Units Sold".
However, as I write the wsu.wide code the ouput I get is only this:
Why do I only get this small table when I'm asking to dcast my data?
After this I don't get what is wrong.
The problem is at the line:
wsu.wide <- dcast(wsu, "Store No." ~ "Week Ending", value.var="Units Sold")
Instead of the double quotation mark " you should use the grave accent - ` in the formula:
wsu.wide <- dcast(wsu, `Store No.` ~ `Week Ending`, value.var = "Units Sold")
To avoid this kind of problem it is better not to use spaces in the R object names it is better to substitute Sales Region variable name to sales_region using underscore. See e.g. Google's R Style Guide.
Please see the code below, I used simulation of your data as extract it from the picture is quite cumbersome:
library(readxl)
library(reshape2)
#simulation
n <- 4
Store_and_Regional_Sales_Database <- data.frame(
a = seq_along(LETTERS[1:n]),
sr = LETTERS[1:n],
sr2 = '24" Monitor',
sr3 = 1:4,
sr4 = 2:5,
sr5 = 3:6)
names(Store_and_Regional_Sales_Database)[2:6] <- c(
"Sales Region", "Item Description",
"Week Ending", "Store No.", "Units Sold")
# algorithm
store <- Store_and_Regional_Sales_Database
freq <- table(store$`Sales Region`)
freq
rel.freq <- freq/nrow(store)
rel.freq
rel.freq.scaled <- rel.freq * 100
rel.freq.scaled
labs <- paste(names(rel.freq.scaled), "\n", "(", rel.freq.scaled, "%", ")", sep = "")
pie(rel.freq.scaled, labels = labs, main = "Pie Chart of Sales Region")
monitor <- store[which(store$`Item Description` == '24" Monitor'),]
wsu <- as.data.frame(monitor[c("Week Ending", "Store No.", "Units Sold")])
wsu.wide <- dcast(wsu, `Store No.` ~ `Week Ending`, value.var = "Units Sold")
value <- as.matrix(wsu.wide[ ,c(4,3,2)])
Output:
3 2 1
[1,] NA NA 3
[2,] NA 4 NA
[3,] 5 NA NA
[4,] NA NA NA
I'm trying to speed up an R Shiny app I'm building to explore a set of data. The app displays sentences for some of the data and a series of bar charts for other data. Each of these is rendered separately.
Every time I click one of the checkboxes, however, to filter the data, the whole thing re-renders and this takes about a second to complete. How can I isolate the rendering of all of the charts until a "go" button is clicked? I've tried a few other suggestions on SO but haven't found out how to do it with multiple renderPlots.
Sorry for the mess of code. I didn't want to shorten it too much or I'll lose some of the context.
library(ggplot2)
d <- read.csv("dm_survey.csv")
server <- function(input, output) {
output$surveyPlot1 <- renderPlot({
d <- d[d$"Frequency of Games" %in% c(input$frequency),]
d <- d[d$"Length of Games" %in% c(input$length),]
d <- d[d$"Primary Locations" %in% c(input$locations),]
d <- d[d$"Campaign Worlds" %in% c(input$worlds),]
d <- d[d$"Adventures" %in% c(input$adventures),]
d <- d[d$"Preferred Combat Type" %in% c(input$combat),]
d <- d[d$"Preparation Time" %in% c(input$preptime),]
output$textmain <- renderUI(HTML("<h1>2016 D&D Dungeon Master Survey</h1>"))
table_columns <- c("Campaign Worlds", "Primary Locations",
"Adventures",
"Preferred Combat Type")
total_sentences <- c()
for (table_column in table_columns) {
num_respondents <- nrow(d[table_column])
question_title <- tolower(table_column)
sentence <- paste("Of", num_respondents, "respondents on", question_title, collapse = "")
tbl <- data.frame(sort(table(d[table_column]),decreasing = TRUE))
if (nrow(tbl) == 1) {
tbl <- data.frame("Activity" = d[table_column][1,], "Count" = nrow(d),"Freq" = 100)
print(tbl)
} else {
tbl["Percentage"] <- round(tbl["Freq"] / colSums(tbl["Freq"]) * 100, 0)
}
for(i in 1:nrow(tbl)) {
answer_title <- tolower(as.character(tbl[i,1]))
answer_percentage <- tbl[i,3]
sentence <- paste(sentence, ", ", answer_percentage, "% answered ", answer_title, collapse="", sep = '')
}
sentence <- paste(sentence, ".", collapse="", sep = '')
total_sentences <- c(total_sentences, sentence)
}
output$text1 <- renderUI(HTML(paste(total_sentences[1],"<br/><br/>")))
output$text2 <- renderUI(HTML(paste(total_sentences[2],"<br/><br/>")))
output$text3 <- renderUI(HTML(paste(total_sentences[3],"<br/><br/>")))
output$text4 <- renderUI(HTML(paste(total_sentences[4],"<br/><br/>")))
column_name <- "Frequency of Games"
factor_labels <- c("Less than monthly","Monthly","Twice monthly","Weekly","Twice a week","More than twice weekly")
d[,column_name] <- factor(d[,column_name], levels = factor_labels)
p1 <- ggplot(d, aes(factor(d[,column_name])))
p1 + geom_bar() + coord_flip() + theme_minimal() +
scale_y_continuous(expand=c(.1, 0)) +
labs(x = "",
title = column_name,
y=paste("Number of Respondants out of",nrow(d))) +
geom_text(aes(label = scales::percent((..count..)/sum(..count..))),
stat= "count", hjust=-.1)
})
output$surveyPlot2 <- renderPlot({
d <- d[d$"Frequency of Games" %in% c(input$frequency),]
d <- d[d$"Length of Games" %in% c(input$length),]
d <- d[d$"Primary Locations" %in% c(input$locations),]
d <- d[d$"Campaign Worlds" %in% c(input$worlds),]
d <- d[d$"Adventures" %in% c(input$adventures),]
d <- d[d$"Preferred Combat Type" %in% c(input$combat),]
d <- d[d$"Preparation Time" %in% c(input$preptime),]
column_name <- "Length of Games"
factor_labels <- c("Longer than eight hours","About eight hours","About six hours","About four hours","About three hours","About two hours","About an hour")
d[,column_name] <- factor(d[,column_name], levels = factor_labels)
p2 <- ggplot(d, aes(factor(d[,column_name])))
p2 + geom_bar() + coord_flip() + theme_minimal() +
scale_y_continuous(expand=c(.1, 0)) +
labs(x = "",
title = column_name,
y=paste("Number of Respondants out of",nrow(d))) +
geom_text(aes(label = scales::percent((..count..)/sum(..count..))),
stat= "count", hjust=-.1)
})
output$surveyPlot3 <- renderPlot({
d <- d[d$"Frequency of Games" %in% c(input$frequency),]
d <- d[d$"Length of Games" %in% c(input$length),]
d <- d[d$"Primary Locations" %in% c(input$locations),]
d <- d[d$"Campaign Worlds" %in% c(input$worlds),]
d <- d[d$"Adventures" %in% c(input$adventures),]
d <- d[d$"Preferred Combat Type" %in% c(input$combat),]
d <- d[d$"Preparation Time" %in% c(input$preptime),]
column_name <- "Preparation Time"
factor_labels <- c("More than four hours","About four hours","About three hours","About two hours","About an hour","About 30 minutes","About 15 minutes","I don't prepare at all")
d[,column_name] <- factor(d[,column_name], levels = factor_labels)
p3 <- ggplot(d, aes(factor(d[,column_name])))
p3 + geom_bar() + coord_flip() + theme_minimal() +
scale_y_continuous(expand=c(.1, 0)) +
labs(x = "",
title = column_name,
y=paste("Number of Respondants out of",nrow(d))) +
geom_text(aes(label = scales::percent((..count..)/sum(..count..))),
stat= "count", hjust=-.1)
})
output$surveyPlot4 <- renderPlot({
d <- d[d$"Frequency of Games" %in% c(input$frequency),]
d <- d[d$"Length of Games" %in% c(input$length),]
d <- d[d$"Primary Locations" %in% c(input$locations),]
d <- d[d$"Campaign Worlds" %in% c(input$worlds),]
d <- d[d$"Adventures" %in% c(input$adventures),]
d <- d[d$"Preferred Combat Type" %in% c(input$combat),]
d <- d[d$"Preparation Time" %in% c(input$preptime),]
# Set up a bunch of facets to show bar plots
l <- reshape(d,
varying = c("Campaign and Worldbuilding","Story and Adventures","Combat Encounters","NPC Development","Exploration and Roleplay","Treasure and Magic Items","Prop and Handouts"),
v.names = "Times",
timevar = "Activities",
times = c("Campaign and Worldbuilding","Story and Adventures","Combat Encounters","NPC Development","Exploration and Roleplay","Treasure and Magic Items","Prop and Handouts"),
direction = "long")
keeps <- c("Activities", "Times")
l <- l[keeps]
l[l=="None"] <- "None"
l[l=="About 5 minutes"] <- "5 min"
l[l=="About 15 minutes"] <- "15 min"
l[l=="About 30 minutes"] <- "30 min"
l[l=="About an hour"] <- "1 hr"
l[l=="About two hours"] <- "2 hrs"
l[l=="More than two hours"] <- "> 2 hrs"
factor_labels <- c("None","5 min","15 min","30 min","1 hr","2 hrs","> 2 hrs")
factor_charts <- c("Campaign and Worldbuilding","Story and Adventures","Combat Encounters","NPC Development","Exploration and Roleplay","Treasure and Magic Items","Prop and Handouts")
l[,"Times"] <- factor(l[,"Times"], levels = factor_labels)
l[,"Activities"] <- factor(l[,"Activities"], levels = factor_charts)
row_count <- nrow(d)
ggplot(l, aes(x=Times)) + geom_bar() + facet_wrap(~Activities, nrow = 2, scales="free_x") +
xlab(paste("Preparation Time for Specific Activities out of",nrow(d),"Respondants")) +
ylab("Number of respodants") +
scale_y_continuous(expand=c(.1, 0)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
axis.title.x=element_text(),
axis.title.x=element_text()) +
geom_text(aes(row_count=row_count, label = paste(round((..count../row_count)*100,0),"%", sep="")),
stat= "count", vjust=-.2, size=3)
})
}
ui <- fluidPage(
sidebarLayout(
sidebarPanel(
actionButton("recalculate", "Recalculate"),
checkboxGroupInput("frequency", "Frequency of Games",
unique(d[,2]), selected = unique(d[,2])),
checkboxGroupInput("length", "Length of Games",
unique(d[,3]), selected = unique(d[,3])),
checkboxGroupInput("locations", "Primary Locations",
unique(d[,5]), selected = unique(d[,5])),
checkboxGroupInput("worlds", "Campaign Worlds",
unique(d[,6]), selected = unique(d[,6])),
checkboxGroupInput("adventures", "Adventures",
unique(d[,7]), selected = unique(d[,7])),
checkboxGroupInput("combat", "Combat Type",
unique(d[,8]), selected = unique(d[,8])),
checkboxGroupInput("preptime", "Preparation Time",
unique(d[,9]), selected = unique(d[,9]))
),
mainPanel(htmlOutput("textmain"),
htmlOutput("text1"),
htmlOutput("text2"),
htmlOutput("text3"),
htmlOutput("text4"),
plotOutput("surveyPlot1"),
plotOutput("surveyPlot2"),
plotOutput("surveyPlot3"),
plotOutput("surveyPlot4"))
)
)
shinyApp(ui = ui, server = server)