I am trying to build a web app in shiny that would allow for different user input and then plot graphs/output data tables accordingly. I am using WHO's data about suicide rates and there are two possible types of graphs: bar plot and line graph.
The user is given a choice between plotting the graph in which the x axis is either the age group (barplot) or year (line graph). They are also given the choice of plotting the graph separately for males and females and different countries as well.
The code below works fine for everything except when the user chooses x axis = year with gender = 'gender neutral'. The error says that the object rate is not found. However, the block of code which includes the object rate works perfectly fine in other places.
library(shiny)
library(dplyr)
library(ggplot2)
setwd("C:\\Users\\Lenovoi7\\Shrewsbury School\\IT\\Coursework")
who<-data.frame(read.csv("who.csv", stringsAsFactors = TRUE))
dput(head(who))
countries<-sort(unique(who$country))
countries<-union(countries, c("World"))
ui<-fluidPage(
titlePanel("Suicide statistics"),
sidebarLayout(
sidebarPanel(
selectInput(
inputId="x",
label="Please choose the x variable",
choices=c("",
"Age group"="age",
"Year"="year")),
conditionalPanel(
condition = "input.x == 'age' || input.x == 'year'",
selectInput(
inputId = "gender",
label = "Please specify the gender characteristics",
choices = c("", "Gender neutral" = "gender_neutral",
"Gender specific" = "gender_specific"),
selected = NULL),
#nested conditional panel
#only show this panel if the input is gender_specific
conditionalPanel(
condition = "input.gender == 'gender_specific'",
selectInput(
inputId = "country",
label = "Select a country:",
choices = countries,
selected = "Bosnia and Herzegovina")),
conditionalPanel(
condition = "input.gender == 'gender_neutral'",
selectInput(
inputId = "country",
label = "Select a country:",
choices = countries,
selected = "Bosnia and Herzegovina")))),
mainPanel(
plotOutput("graph")
)))
server <- function(input, output) {
x<-reactive({input$x})
gender<-reactive({input$gender})
country<-reactive({input$country})
output$graph <- renderPlot(
#x axis = age group
if (x()=="age"){
if (gender()=="gender_neutral"){
if (country()=="World"){
ggplot(data=who, aes(x=age)) + geom_bar(aes(weights=suicides_no), position="dodge")}
else {
#create a new subset of data that will be used??
who_subset<-subset(who, country == input$country)
ggplot(data=who_subset, aes(x=age)) + geom_bar(aes(weights=suicides_no))}}
else if (gender()=="gender_specific"){
if (country()=="World"){
ggplot(data=who, aes(x=age)) + geom_bar(aes(weights=suicides_no, fill=sex), position="dodge")}
else {
#create a new subset of data that will be used??
who_subset<-subset(who, country==input$country)
ggplot(data=who_subset, aes(x=age)) + geom_bar(aes(weights=suicides_no, fill=sex), position="dodge")}}}
else if (x()=="year"){
if (gender()=="gender_neutral"){
if (country()=="World"){
who_all <- who %>%
group_by(year) %>%
summarize(suicides_no = sum(suicides_no),
population = sum(population)) %>%
mutate(rate = 100000 * suicides_no/population)
ggplot() +
geom_line(data = who_all, aes(year, rate))
}
else {
who_subset<-subset(who, country==input$country)
who_sub_sex <- who_subset %>%
group_by(year) %>%
summarize(suicides_no = sum(suicides_no),
population = sum(population)) %>%
mutate(rate = 100000 * suicides_no/population)
ggplot() +
geom_line(data = who_subset, aes(year, rate))
}}
else if (gender()=="gender_specific"){
if (country()=="World"){
who_all <- who %>%
group_by(year) %>%
summarize(suicides_no = sum(suicides_no),
population = sum(population)) %>%
mutate(rate = 100000 * suicides_no/population)
ggplot() +
geom_line(data = who_all, aes(year, rate))
}
else {
#create a new subset of data that will be used??
who_subset<-subset(who, country==input$country)
who_sub_sex <- who_subset %>%
group_by(year, sex) %>%
summarize(suicides_no = sum(suicides_no),
population = sum(population)) %>%
mutate(rate = 100000 * suicides_no / population)
ggplot() +
geom_line(data = who_sub_sex, aes(year, rate, color = sex))}
}
}
)}
# Create a Shiny app object
shinyApp(ui = ui, server = server)
dput(head(who))
structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L, 1L),
.Label = c("Albania",
"Anguilla", "Antigua and Barbuda", "Argentina", "Armenia", "Aruba",
"Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Barbados",
"Belarus", "Belgium", "Belize", "Bermuda", "Bolivia",
"Bosnia and Herzegovina",
"Brazil", "British Virgin Islands", "Brunei Darussalam", "Bulgaria",
"Cabo Verde", "Canada", "Cayman Islands", "Chile", "Colombia",
"Costa Rica", "Croatia", "Cuba", "Cyprus", "Czech Republic",
"Denmark", "Dominica", "Dominican Republic", "Ecuador", "Egypt",
"El Salvador", "Estonia", "Falkland Islands (Malvinas)", "Fiji",
"Finland", "France", "French Guiana", "Georgia", "Germany", "Greece",
"Grenada", "Guadeloupe", "Guatemala", "Guyana", "Haiti", "Honduras",
"Hong Kong SAR", "Hungary", "Iceland", "Iran (Islamic Rep of)",
"Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Japan", "Jordan",
"Kazakhstan", "Kiribati", "Kuwait", "Kyrgyzstan", "Latvia", "Lithuania",
"Luxembourg", "Macau", "Malaysia", "Maldives", "Malta", "Martinique",
"Mauritius", "Mayotte", "Mexico", "Monaco", "Mongolia", "Montenegro",
"Montserrat", "Morocco", "Netherlands", "Netherlands Antilles",
"New Zealand", "Nicaragua", "Norway", "Occupied Palestinian Territory",
"Oman", "Panama", "Paraguay", "Peru", "Philippines", "Poland",
"Portugal", "Puerto Rico", "Qatar", "Republic of Korea",
"Republic of Moldova",
"Reunion", "Rodrigues", "Romania", "Russian Federation",
"Saint Kitts and Nevis",
"Saint Lucia", "Saint Pierre and Miquelon",
"Saint Vincent and Grenadines",
"San Marino", "Sao Tome and Principe", "Saudi Arabia", "Serbia",
"Seychelles", "Singapore", "Slovakia", "Slovenia", "South Africa",
"Spain", "Sri Lanka", "Suriname", "Sweden", "Switzerland",
"Syrian Arab Republic",
"Tajikistan", "TFYR Macedonia", "Thailand", "Trinidad and Tobago",
"Tunisia", "Turkey", "Turkmenistan", "Turks and Caicos Islands",
"Ukraine", "United Arab Emirates", "United Kingdom",
"United States of America",
"Uruguay", "Uzbekistan", "Venezuela (Bolivarian Republic of)",
"Virgin Islands (USA)", "Zimbabwe"), class = "factor"),
year = c(1985L, 1985L, 1985L, 1985L, 1985L, 1985L),
sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L),.
Label = c("female", "male"), class = "factor"),
age = structure(1:6, .Label = c("15-24 years", "25-34 years",
"35-54 years", "5-14 years", "55-74 years", "75+ years"),
class = "factor"),
suicides_no = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), population = c(277900L, 246800L,
267500L, 298300L, 138700L, 34200L)),
row.names = c(NA, 6L), class = "data.frame")
Is there any chance somebody knows a way out of this problem? Again I want the web app to output line graph when the user chooses x axis = year and gender = gender_neutral.
Try out with this server code.
The changes are already described in my comments. Since I dont have the who data.frame I could not test it.
server <- function(input, output) {
output$graph <- renderPlot({
if (input$x == "age") {
if (input$gender=="gender_neutral"){
if (input$country=="World"){
ggplot(data = who, aes(x = age)) + geom_bar(aes(weights = suicides_no), position="dodge")}
else {
#create a new subset of data that will be used??
who_subset <- subset(who, country == input$country)
ggplot(data=who_subset, aes(x=age)) + geom_bar(aes(weights=suicides_no))
}
} else if (input$gender=="gender_specific") {
if (input$country=="World"){
ggplot(data=who, aes(x=age)) + geom_bar(aes(weights=suicides_no, fill=sex), position="dodge")}
else {
#create a new subset of data that will be used??
who_subset <- subset(who, country==input$country)
ggplot(data = who_subset, aes(x=age)) + geom_bar(aes(weights=suicides_no, fill=sex), position="dodge")
}
}
} else if (input$x=="year"){
if (input$gender=="gender_neutral"){
if (input$country=="World"){
who_all <- who %>%
group_by(year) %>%
summarize(suicides_no = sum(suicides_no),
population = sum(population)) %>%
mutate(rate = 100000 * suicides_no/population)
ggplot() +
geom_line(data = who_all, aes(year, rate))
} else {
who_subset <- subset(who, country==input$country)
who_sub_sex <- who_subset %>%
group_by(year) %>%
summarize(suicides_no = sum(suicides_no),
population = sum(population)) %>%
mutate(rate = 100000 * suicides_no/population)
ggplot() +
geom_line(data = who_sub_sex, aes(year, rate))
}
} else if (input$gender=="gender_specific"){
if (input$country=="World"){
who_all <- who %>%
group_by(year) %>%
summarize(suicides_no = sum(suicides_no),
population = sum(population)) %>%
mutate(rate = 100000 * suicides_no/population)
ggplot() +
geom_line(data = who_all, aes(year, rate))
} else {
#create a new subset of data that will be used??
who_subset <- subset(who, country==input$country)
who_sub_sex <- who_subset %>%
group_by(year, sex) %>%
summarize(suicides_no = sum(suicides_no),
population = sum(population)) %>%
mutate(rate = 100000 * suicides_no / population)
ggplot() +
geom_line(data = who_sub_sex, aes(year, rate, color = sex))}
}
}
})
}
Related
I have the following dataframe:
structure(list(share.beer = c(0.277, 0.1376, 0.1194, 0.0769,
0.0539, 0.0361, 0.0361, 0.0351, 0.0313, 0.03, 0.0119, 0.0084,
0.007, 0.0069), country = c("Brazil", "China, mainland", "United States",
"Thailand", "Vietnam", "China, mainland", "China, mainland",
"China, mainland", "China, mainland", "Argentina", "Indonesia",
"China, mainland", "China, mainland", "India"), Beer = c("soyb",
"maiz", "soyb", "cass", "cass", "whea", "rape", "soyb", "rice",
"soyb", "cass", "cott", "swpo", "rape")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -14L))
I want to create a barplot so that the beer type appears in the legend, the countries as y values while the share.beer are my values to be filled.
I have tried in various ways, including the following code, but I can't get the result I would like to. Here, for instance, I kept the variable "Beer""
df %>%
pivot_longer(cols = -Country, values_to = "Count", names_to = "Type") %>%
ggplot() +
geom_col(aes(x = reorder(Country, -Count), y = Count, fill = Beer))
However, I get an error
Can't combine share beer and Beer .
Any help?
You actually don't need the pivot_longer to create a suitable dataframe. You can use the following code:
library(tidyverse)
df %>%
ggplot() +
geom_col(aes(x = reorder(country, -share.beer), y = share.beer, fill = Beer)) +
xlab("Country") +
ylab("Share beer") +
coord_flip()
Output:
I generated multiple plots with vertical lines in each plot using the following codes:
I was wondering if there's any way to change colours and line types of each vertical lines on this codes:
library(gridExtra)
library(grid)
library(lattice)
library(ggplot2)
vertical.lines <- c(1990,1991)
df1 %>%
filter(isocode == "KOR") %>%
select(year,rgdpe, rgdpo, avh, emp, hc) %>%
tidyr::gather(predictor, value, -year)%>%
ggplot() +
geom_line(aes(year, value))+
facet_wrap(~predictor, scales = "free")+
labs(x = NULL, y = NULL, top = "Title of the Chart",
bottom = textGrob(
"Data source: World Bank",
gp = gpar(fontface = 3, fontsize = 9),
hjust = 1,x = 1))+ sapply(vertical.lines, function(xint) geom_vline(aes(xintercept = xint)))
I want to put something like
lty = c(3,1), color = c("blue","red"), lwd=c(.5,.5))
But I don't know how to apply to my code.
My data looks like to have the structure like the following
Maybe if you need, I can add more variables.
structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L), .Label = c("Aruba",
"Angola", "Anguilla", "Albania", "United Arab Emirates", "Argentina",
"Armenia", "Antigua and Barbuda", "Australia", "Austria", "Azerbaijan",
"Burundi", "Belgium", "Benin", "Burkina Faso", "Bangladesh",
"Bulgaria", "Bahrain", "Bahamas", "Bosnia and Herzegovina", "Belarus",
"Belize", "Bermuda", "Bolivia (Plurinational State of)", "Brazil",
"Barbados", "Brunei Darussalam", "Bhutan", "Botswana", "Central African Republic",
"Canada", "Switzerland", "Chile", "China", "Cote d'Ivoire", "Cameroon",
"Congo, Democratic Republic", "Congo", "Colombia", "Comoros",
"Cabo Verde", "Costa Rica", "Curacao", "Cayman Islands", "Cyprus",
"Czech Republic", "Germany", "Djibouti", "Dominica", "Denmark",
"Dominican Republic", "Algeria", "Ecuador", "Egypt", "Spain",
"Estonia", "Ethiopia", "Finland", "Fiji", "France", "Gabon",
"United Kingdom", "Georgia", "Ghana", "Guinea", "Gambia", "Guinea-Bissau",
"Equatorial Guinea", "Greece", "Grenada", "Guatemala", "Guyana",
"China, Hong Kong SAR", "Honduras", "Croatia", "Haiti", "Hungary",
"Indonesia", "India", "Ireland", "Iran (Islamic Republic of)",
"Iraq", "Iceland", "Israel", "Italy", "Jamaica", "Jordan", "Japan",
"Kazakhstan", "Kenya", "Kyrgyzstan", "Cambodia", "Saint Kitts and Nevis",
"Republic of Korea", "Kuwait", "Lao People's DR", "Lebanon",
"Liberia", "Saint Lucia", "Sri Lanka", "Lesotho", "Lithuania",
"Luxembourg", "Latvia", "China, Macao SAR", "Morocco", "Republic of Moldova",
"Madagascar", "Maldives", "Mexico", "North Macedonia", "Mali",
"Malta", "Myanmar", "Montenegro", "Mongolia", "Mozambique", "Mauritania",
"Montserrat", "Mauritius", "Malawi", "Malaysia", "Namibia", "Niger",
"Nigeria", "Nicaragua", "Netherlands", "Norway", "Nepal", "New Zealand",
"Oman", "Pakistan", "Panama", "Peru", "Philippines", "Poland",
"Portugal", "Paraguay", "State of Palestine", "Qatar", "Romania",
"Russian Federation", "Rwanda", "Saudi Arabia", "Sudan", "Senegal",
"Singapore", "Sierra Leone", "El Salvador", "Serbia", "Sao Tome and Principe",
"Suriname", "Slovakia", "Slovenia", "Sweden", "Eswatini", "Sint Maarten (Dutch part)",
"Seychelles", "Syrian Arab Republic", "Turks and Caicos Islands",
"Chad", "Togo", "Thailand", "Tajikistan", "Turkmenistan", "Trinidad and Tobago",
"Tunisia", "Turkey", "Taiwan", "U.R. of Tanzania: Mainland",
"Uganda", "Ukraine", "Uruguay", "United States of America", "Uzbekistan",
"St. Vincent & Grenadines", "Venezuela (Bolivarian Republic of)",
"British Virgin Islands", "Viet Nam", "Yemen", "South Africa",
"Zambia", "Zimbabwe"), class = "factor"), isocode = c("ABW",
"ABW", "ABW", "ABW", "ABW"), year = 1990:1994, currency = structure(c(4L,
4L, 4L, 4L, 4L), .Label = c("Algerian Dinar", "Argentine Peso",
"Armenian Dram", "Aruban Guilder", "Australian Dollar", "Azerbaijanian Manat",
"Bahamian Dollar", "Bahraini Dinar", "Baht", "Balboa", "Barbados Dollar",
"Belarussian Ruble", "Belize Dollar", "Bermudian Dollar", "Bolivar Fuerte",
"Boliviano", "Brazilian Real", "Brunei Dollar", "Bulgarian Lev",
"Burundi Franc", "CFA Franc BCEAO", "CFA Franc BEAC", "Cabo Verde Escudo",
"Canadian Dollar", "Cayman Islands Dollar", "Cedi", "Chilean Peso",
"Colombian Peso", "Comoro Franc", "Convertible Marks", "Cordoba Oro",
"Costa Rican Colon", "Croatian Kuna", "Czech Koruna", "Dalasi",
"Danish Krone", "Denar", "Djibouti Franc", "Dobra", "Dominican Peso",
"Dong", "East Caribbean Dollar", "Egyptian Pound", "Ethiopian Birr",
"Euro", "Fiji Dollar", "Forint", "Franc Congolais", "Gourde",
"Guarani", "Guinea Franc", "Guyana Dollar", "Hong Kong Dollar",
"Hryvnia", "Iceland Krona", "Indian Rupee", "Iranian Rial", "Iraqi Dinar",
"Jamaican Dollar", "Jordanian Dinar", "Kenyan Shilling", "Kip",
"Kuwaiti Dinar", "Kwacha", "Kwanza", "Kyat", "Lari", "Lebanese Pound",
"Lek", "Lempira", "Leone", "Lilangeni", "Loti", "Malagasy Ariary",
"Malaysian Ringgit", "Manat", "Mauritius Rupee", "Metical", "Mexican Peso",
"Moldovan Leu", "Moroccan Dirham", "Naira", "Namibian Dollar",
"Nepalese Rupee", "Netherlands Antillian Guilder", "New Israeli Sheqel",
"New Leu", "New Taiwan Dollar", "New Turkish Lira", "New Zealand Dollar",
"Ngultrum", "Norwegian Krone", "Nuevo Sol", "Ouguiya", "Pakistan Rupee",
"Pataca", "Peso Uruguayo", "Philippine Peso", "Pound Sterling",
"Pula", "Qatari Rial", "Quetzal", "Rand", "Rial Omani", "Riel",
"Rufiyaa", "Rupiah", "Russian Ruble", "Rwanda Franc", "Saudi Riyal",
"Serbian Dinar", "Seychelles Rupee", "Singapore Dollar", "Som",
"Somoni", "Sri Lanka Rupee", "Sudanese Pound", "Surinam Dollar",
"Swedish Krona", "Swiss Franc", "Syrian Pound", "Taka", "Tanzanian Shilling",
"Tenge", "Trinidad and Tobago Dollar", "Tugrik", "Tunisian Dinar",
"UAE Dirham", "US Dollar", "Uganda Shilling", "Uzbekistan Sum",
"Won", "Yemeni Rial", "Yen", "Yuan Renminbi", "Zloty"), class = "factor"),
rgdpe = c(2574.41870117188, 2803.42724609375, 2943.32641601562,
3130.13989257812, 3535.80346679688), rgdpo = c(3043.74633789062,
3204.01831054688, 3399.08251953125, 3711.45483398438, 4192.33935546875
), pop = c(0.062149, 0.064622, 0.068235, 0.072504, 0.0767
), emp = c(NA, 0.0292000006884336, 0.030903272330761, 0.0329118072986603,
0.0348959788680077), avh = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), hc = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_)), row.names = c("ABW-1990", "ABW-1991",
"ABW-1992", "ABW-1993", "ABW-1994"), class = "data.frame")
One option to achieve your desired result would be to make use of purrr::pmap or mapply which both allow you to loop over multiple vectors or lists simultaneously.
Additionally, if you want to add title and/or caption you could do so via the title and caption arguments of labs()and set the style via theme options.
library(ggplot2)
library(tidyr)
library(dplyr)
library(purrr)
df1 %>%
select(year, rgdpe, rgdpo, avh, emp, hc) %>%
tidyr::gather(predictor, value, -year) %>%
ggplot() +
geom_line(aes(year, value)) +
facet_wrap(~predictor, scales = "free") +
labs(x = NULL, y = NULL, title = "Title of the Chart", caption = "Data source: World Bank") +
theme(plot.caption = element_text(face = 3, size = 9, hjust = 1)) +
purrr::pmap(
data.frame(
xint = c(1990, 1991),
lty = c(3, 1),
color = c("blue", "red")
),
function(xint, lty, color, lwd) geom_vline(xintercept = xint, color = color, linetype = lty, size = .5)
)
I am making an app in shiny, but I would like to know if it is possible to separate two inputs a bit. Something like what you see in the picture:
Expected space
Im not sure what I need to get a white space
The ui I'm using is the following:
datoscovid <- read_excel("/Users/jorge_hca/Desktop/Trabajo/datacovid.xlsx")
coordenadas <- read_excel("/Users/jorge_hca/Desktop/Trabajo/world_countrys.xlsx")
datoscov1 <- datoscovid |>
left_join(coordenadas, by = c("location" = "name"))
theme <- bs_theme(
bg = "#000000", fg = "#B8BCC2",
"input-border-color" = "#a6a6a6"
)
ui <- bootstrapPage(
absolutePanel(
top = 10, left = 50, style = "z-index:500; text-align: right;",
tags$h2("Los ingresos en México por deciles")
),
theme = theme,
useShinyalert(),
tags$style(type = "text/css", "html, body {width:100%;height:100%}"),
leafletOutput("map", width = "100%", height = "100%"),
absolutePanel(top = 10, right = 10,
fluidRow(dateInput("fecha", "Fecha a graficar:", width = 180,value = "2021-02-12"),
selectInput("var", "Escoge un país:", width = 180,
c("Antigua y Barbuda" = "Antigua and Barbuda",
"Argentina" = "Argentina",
"Bahamas" = "Bahamas",
"Barbados" = "Barbados",
"Belice" = "Belize",
"Bolivia" = "Bolivia",
"Brasil" = "Brazil",
"Canada" = "Canada",
"Chile" = "Chile",
"Colombia" = "Colombia",
"Costa Rica" = "Costa Rica",
"Cuba" = "Cuba",
"Dominica" = "Dominica",
"Republica Dominicana" = "Dominican Republic",
"Ecuador" = "Ecuador",
"El Salvador" = "El Salvador",
"Groenlandia" = "Greenland",
"Grenada" = "Grenada",
"Guatemala" = "Guatemala",
"Guyana" = "Guyana",
"Haiti" = "Haiti",
"Honduras" = "Honduras",
"Jamaica" = "Jamaica",
"Mexico" = "Mexico",
"Nicaragua" = "Nicaragua",
"Panama" = "Panama",
"Paraguay" = "Paraguay",
"Peru" = "Peru",
"Trinidad y Tobago" = "Trinidad and Tobago",
"Estados Unidos" = "United States",
"Uruguay" = "Uruguay",
"Venezuela" = "Venezuela"
), selected = "Mexico" )),
echarts4rOutput("graf", height = '350px', width = '550px'),
)
)
Any ideas or suggestions?
You can insert an empty div with a height between your two inputs:
selectInput(......),
div(style = "height:100px"),
echarts4rOutput(......)
SA = c("Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Ecuador", "Paraguay", "Peru", "Uruguay", "Venezuela")
AF1 = gapminder %>%
mutate(
country,
continent == case_when(
country == SA ~ "South America",
TRUE ~ as.character(continent)
)
)
I am trying to rename the country in SA to South America, but it does not work.
I think I understand what you're looking for. I'm not sure why 'country' is in the mutate because you're not actually changing it. For the continent, you are looking to see if the value is in SA, not equal to SA. Does this work?
SA = c("Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Ecuador", "Paraguay", "Peru", "Uruguay", "Venezuela")
AF1 = gapminder %>%
mutate(
continent = case_when(
country %in% SA ~ "South America",
TRUE ~ continent
)
)
I have been searching for this simple thing for hours now, but to no avail. I have a dataframe with one of the columns the variable "country". I want two things the following:
Plot the most frequent countries, most frequent on top (partial solution found EDIT full solution found >> focus question on limiting output in bar plot based on frequency);
Only show the top x "most frequent" countries, moving the rest into 'Other' variable.
I tried to ggplot table() or summary() but that does not work. Is it even possible within ggplot, or should I use barchart (I managed to do this using barchart, just using summary(df$something) and adding max = x). I also wanted to stack the output (different questions about country).
Most frequent countries on top:
ggplot(aDDs,aes(x=
factor(answer,
levels=names(sort(table(answer),increasing=TRUE))
),fill=question
)
) + geom_bar() + coord_flip()
Suggestions are very very welcome.
====== EDIT3:
I continued working on the code based on the suggestion by #CMichael, but now encountered another, quite strange, thing. Because this 'ifelse' problem concerns a slightly one question than my original one, I have posted a separate question for this matter. Please check it here: R: ifelse function returns vector position instead of value (string)
====== EDIT:
The aDDs example is reproduced below - aDDs dataset can be downloaded here:
temp <- structure(list(student = c(2270285L, 2321254L, 75338L, 2071594L,1682771L, 1770356L, 2155693L, 3154864L, 3136979L, 2082311L),answer = structure(c(181L, 87L, 183L, 89L, 115L, 183L, 172L,180L, 175L, 125L), .Label = c("Congo", "Guinea-Bissau", "Solomon Islands","Central African Rep", "Comoros", "Equatorial Guinea", "Liechtenstein","Nauru", "Brunei", "Djibouti", "Kiribati", "Papua New Guinea","Samoa", "South Sudan", "Tajikistan", "Tonga", "Bhutan","Gabon", "Laos", "Lesotho", "Maldives", "Micronesia", "St Kitts and Nevis","Mozambique", "Niger", "Andorra", "Cape Verde", "Mauritania","Antigua and Deps", "Chad", "Guinea", "Malta", "Burundi","Eritrea", "Iceland", "Kyrgyzstan", "Turkmenistan", "Azerbaijan","Dominica", "Belize", "Malawi", "Mali", "Moldova", "Benin","Cuba", "Gambia", "Luxembourg", "St Lucia", "Angola", "Cambodia","Georgia", "Madagascar", "Oman", "Kosovo", "Kuwait", "Namibia","Bahrain", "Congo - Democratic Rep", "Montenegro", "Senegal","Sierra Leone", "Togo", "Botswana", "Fiji", "Libya", "Uzbekistan","Guyana", "Mongolia", "Somalia", "Zambia", "Estonia", "Ivory Coast","Myanmar", "Grenada", "Qatar", "Saint Vincent and the Grenadines","Tanzania", "Armenia", "Bahamas", "Belarus", "Burkina", "Liberia","Afghanistan", "Latvia", "Yemen", "Mauritius", "Albania","Barbados", "Iraq", "Macedonia", "Nicaragua", "Panama", "Slovenia","Lebanon", "Slovakia", "Kazakhstan", "Paraguay", "Korea South","Suriname", "Czech Republic", "Rwanda", "Haiti", "Lithuania","Israel", "Zimbabwe", "Cyprus", "Honduras", "Uruguay", "Syria","Finland", "Tunisia", "Taiwan", "Uganda", "Denmark", "Austria","Sri Lanka", "Vietnam", "Bosnia Herzegovina", "Thailand","Norway", "Trinidad and Tobago", "Switzerland", "Nepal","Sudan", "Jamaica", "Japan", "United Arab Emirates", "Bolivia","New Zealand", "Ethiopia", "Jordan", "Cameroon", "Croatia","Sweden", "Kenya", "Singapore", "Guatemala", "Ireland Republic","Saudi Arabia", "Bulgaria", "Malaysia", "Belgium", "Dominican Republic","Algeria", "El Salvador", "Bangladesh", "Serbia", "Ghana","Costa Rica", "Indonesia", "Hungary", "Venezuela", "Ecuador","Ukraine", "Romania", "Turkey", "China", "Morocco", "Russian Federation","Peru", "South Africa", "Argentina", "Portugal", "Iran","Poland", "Italy", "Chile", "France", "Germany", "Australia","Philippines", "Egypt", "Greece", "Nigeria", "Canada", "Pakistan","United Kingdom", "Mexico", "Colombia", "Brazil", "Netherlands","Spain", "India", "United States"), class = "factor"), question = c("C1-pres","C1-pres", "C1-pres", "C1-pres", "C1-pres", "C1-pres", "C1-pres","B1-pres", "B1-pres", "B1-pres")), .Names = c("student","answer", "question"), row.names = c("156", "203", "280", "347","412", "478", "534", "1649651", "1649691", "1649763"), class = "data.frame")
For the filtering question you should introduce a new column:
data$filteredCountry = ifelse(data$value > threshold, data$country, "other")
Now you can use filteredCountry as your x in the aesthetics.
The data ordering question pops up every now and then (e.g., ggplot2: sorting a plot). You need to order your country factor levels by the underlying values. Your reorder command seems to sort by country name again, I would expect something like reorder(country,frequency) but sample data would help.
UPDATE:
With the now provided data it becomes obvious that you need to create summary dataset:
data <- read.table("aDDs.csv",sep=",",header=T)
require(plyr)
summary <- ddply(data,.(answer),summarise,freq=length(answer))
This yields the data frame summary with one entry for each country (181 in total). Now you can do the filtering and the reordering:
threshold = quantile(summary$freq,0.9)
summary $filteredCountry = ifelse(summary$freq > threshold, summary$answer, "other")
summary$filteredCountry = reorder(summary$filteredCountry,-summary$freq)
Now you can plot:
require(ggplot2)
p=ggplot(data=summary,aes(x=filteredCountry,y=freq))
p = p+geom_bar(aes(fill=filteredCountry),stat="identity")
p
Thanks to suggestions from #CMichael and answers to another - related - post here on SO. I managed to create a stacked and ordered bar plot using ggplot:
create a list with most frequent country names
temp <- row.names(as.data.frame(summary(aDDs$answer, max=12))) # create a df or something else with the summary output.
aDDs$answer <- as.character(aDDs$answer) # IMPORTANT! Here was the problem: turn into character values
create new column that filters top results
aDDs$top <- ifelse(
aDDs$answer %in% temp, ## condition: match aDDs$answer with row.names in summary df
aDDs$answer, ## then it should be named as aDDs$answer
"Other" ## else it should be named "Other"
)
aDDs$top <- as.factor(aDDs$top) # factorize the output again
plot
ggplot(aDDs,aes(x=
factor(top,
levels=names(sort(table(top),increasing=TRUE))
),fill=question
)
) + geom_bar() + coord_flip()
And here the output (still needs some tweaking, but it is what I wanted):