Related
I'm coloring countries based on variable v1 and I want to add geom_point to each of those countries and set geom size equal to my variable v2. However, when I add the geom_point, the geom_point for the USA does not appear and other points are scattered on the map.
Below is my script, as per other posts I've seen here.
ddf <- structure(list(Country = c("Brazil", "United States", "Sweden", "South Korea", "Senegal", "Pakistan", "Norway","New Zealand", "Mexico","India",
"Netherlands", "Denmark", "Czech Republic", "China"),
v1 = c(2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 9L, 1L, 3L, 1L, 4L), v2 = c(13, 3, 2, 2, 2, 11, 4, 15, 4, 4, 3, 12, 5, 20)),
.Names = c("Country", "v1", "v2"), row.names = c(2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 9L, 1L, 3L, 1L, 4L), class = "data.frame",
na.action = structure(2L, .Names = "2", class = "omit"))
data(wrld_simpl)
wrld_simpl#data$id <- wrld_simpl#data$NAME
wrld <- fortify(wrld_simpl, region="id")
wrld <- subset(wrld, id != "Antarctica")
SelectedCountries = subset(wrld, id %in% c("Brazil", "United States", "Sweden", "South Korea", "Senegal", "Pakistan", "Norway",
"New Zealand", "Mexico", "India",
"Netherlands", "Denmark", "Czech Republic", "China"))
CountryCenters <- aggregate(cbind(long, lat) ~ id, data = SelectedCountries,
FUN = function(x) mean(range(x)))
ddf = merge(ddf, CountryCenters, by.x = "Country", by.y = "id")
gg <- ggplot() +
geom_map(data=wrld, map=wrld, aes(map_id=id, x=long, y=lat), fill="grey90", size=0.25) +
geom_map(data=ddf, map=wrld, aes(map_id=Country, fill = v1), size=0.25) +
geom_point(data=ddf, aes(x=long, y=lat, size = v2), colour = "red")
gg
Any help would be appreciated!
It appears your centroid calculation has an issue with United States and New Zealand. One solution is to use sf for your geoprocessing and geom_sf for your plotting, like this:
library(maptools)
library(ggplot2)
library(sf)
data(wrld_simpl)
world <- st_as_sf(wrld_simpl)
# Define polygon features
poly_ddf <- st_as_sf(merge(ddf, world, by.x = "Country", by.y = "NAME"))
# Calculate centroids
# Centroid calc prefers projected data
# PICK A PROJECTION THAT MATCHES THE GOALS OF YOUR WORK
ddf_p <- st_transform(poly_ddf, crs = 6933)
centroids <- st_centroid(ddf_p)
#> Warning in st_centroid.sf(ddf_p): st_centroid assumes attributes are constant
#> over geometries of x
ggplot() +
geom_sf(data = world) +
geom_sf(data = poly_ddf, aes(fill = v1)) +
geom_sf(data = centroids, aes(size = v2), color = "red")
I want to create a scatterplot with a dichotomous variable on the y-axis and a continuous variable on the x-axis. In addition to that, I want to display a S-shaped line showing the relationship between those two variables. After watching a tutorial, I transform the predictive variable to a factor with two values and used that line of code: geom_smooth(method = "glm", se = FALSE, method.args = list(family = "binomial")). But I always get a warning message.
structure(list(country = structure(c(8L, 9L, 4L, 5L, 10L, 1L,
7L, 6L, 3L, 12L, 2L, 11L), .Label = c("Lebanon", "Tunesia", "Palestine",
"Iraq", "Jordan", "Morocco", "Libya", "Algeria", "Egypt", "Kuwait",
"Yemen", "Sudan", ""), class = "factor"), women_divorce = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Agree/strongly agree",
"Disagree/strongly disagree", "Don't know"), class = "factor"),
n = c(1608L, 1576L, 1975L, 1881L, 892L, 2098L, 1416L, 1714L,
2006L, 828L, 1900L, 1276L), prop = c(0.703104503716659, 0.676975945017182,
0.810755336617406, 0.790004199916002, 0.653001464128843,
0.874895746455379, 0.743697478991597, 0.751754385964912,
0.811488673139159, 0.481675392670157, 0.816852966466036,
0.540220152413209), reg_var = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
life_exp = c(76.693, 71.825, 70.454, 74.405, 75.398, 78.875,
72.724, 76.453, 73.895, 65.095, 76.505, 66.096)), row.names = c(NA,
-12L), groups = structure(list(country = structure(1:12, .Label = c("Lebanon",
"Tunesia", "Palestine", "Iraq", "Jordan", "Morocco", "Libya",
"Algeria", "Egypt", "Kuwait", "Yemen", "Sudan", ""), class = "factor"),
.rows = structure(list(6L, 11L, 9L, 3L, 4L, 8L, 7L, 1L, 2L,
5L, 12L, 10L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 12L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
I used this code:
life_exp_var$reg_var <- as.factor(life_exp_var$reg_var)
ggplot(life_exp_var, aes(life_exp, reg_var)) +
geom_point() +
geom_smooth(method = "glm", se = FALSE, method.args = list(family = "binomial")) +
geom_text(aes(label = country, vjust = 1), size = 5) +
scale_x_continuous(limits = c(65, 80), breaks = seq(65, 80, by = 2.5)) +
labs(title = "Relationship between a country's average life expectancy and approval of equal divorce rights for women",
x = "Life expactancy (in years)", y = "Approval of equal divorce rights (%)") +
theme_minimal() +
theme(plot.title = element_text(size = 20, hjust = 0),
axis.text = element_text(size = 20, colour = "black"),
axis.title.x = element_text(size = 20, colour = "black"),
axis.title.y = element_text(size = 20, colour = "black"),
axis.ticks.x = element_line(),
axis.ticks.y = element_line(),
panel.border = element_rect(colour = "black", fill = NA),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "grey"))
Getting this plot:
As you can see, there is no glm line created. I get this warning message:
Warning message:
Computation failed in `stat_smooth()`:
y values must be 0 <= y <= 1
What am I doing wrong?
Greetings
What you has to do is to transform the y variable to numeric, so, instead of the line
life_exp_var$reg_var <- as.factor(life_exp_var$reg_var)
you should use
life_exp_var$reg_var <- as.numeric(as.character(life_exp_var$reg_var))
You can take a look here: https://ggplot2.tidyverse.org/reference/geom_smooth.html
I'm trying to plot a histogram with ggplot2 comparing 2 periods of time. I want the bars to dodge (i.e. plot side-by-side) and not stack. I've tried this:
qplot(region, data = data, fill = month) +
labs(y = "Sales", fill = "") +
geom_bar(position = "dodge")
The dodge partially works but I'm still getting overlap:
How can I get the bars to position side-by-side properly?
Update
Output of dput(head(data, 20)) as requested by #RuiBarradas in the comments:
structure(list(month = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("May 2018",
"May 2019 (so far)"), class = "factor"), region = structure(c(5L,
6L, 6L, 5L, 5L, 4L, 6L, 6L, 4L, 5L, 6L, 5L, 6L, 6L, 5L, 4L, 5L,
5L, 6L, 6L), .Label = c("Abbotsford", "Agassiz", "Bowen Island",
"Burnaby East", "Burnaby North", "Burnaby South", "Chilliwack",
"Cloverdale", "Coquitlam", "Cultus Lake", "Harrison Hot Springs",
"Hope", "Islands-Van. & Gulf", "Ladner", "Langley", "Maple Ridge",
"Mission", "N. Delta", "New Westminster", "North Surrey", "North Vancouver",
"Pemberton", "Pitt Meadows", "Port Coquitlam", "Port Moody",
"Richmond", "Rosedale", "Sardis", "South Surrey White Rock",
"Squamish", "Sunshine Coast", "Surrey", "Tsawwassen", "Vancouver East",
"Vancouver West", "West Vancouver", "Whistler", "Yarrow", "Harrison Mills / Mt Woodside"
), class = "factor")), row.names = c(NA, 20L), class = "data.frame")
Apparently it is a qplot error, that can be reproduced with the data in the OP's link and the question's code.
library(ggplot2)
qplot(region, data = data, fill = month) +
labs(y = "Sales") +
geom_bar(position = position_dodge())
But with ggplot everything works as expected.
ggplot(data, aes(x = region, fill = month)) +
geom_bar(position = position_dodge()) +
theme(axis.text.x = element_text(angle=90, hjust=1)) +
labs(y = "Sales")
In the code below inA in third line from the bottom of the function is not recognized/evaluated properly. I get
Error in parse(text = inA) : object 'inA' not found".
inA is recognized just fine in the two other lines above that where it is used. I have tried a LOT of permutations.
gcplot2 <- function (inraw,inA,inB){
inwork <- ddply(inraw,
.( eval(parse(text=inA)), eval(parse(text=inB)),BestYr),
summarize,
cases=sum(Cases,na.rm=TRUE),
pop=sum(Pop,na.rm=TRUE),
rate=round(100000*cases/pop,2))
names(inwork)[1] <- inA
names(inwork)[2] <- inB
#problem "inA" is here
x <- ggplot(inwork,aes(BestYr,rate, color=eval(parse(text=inA))))
x <- x + geom_line(size=1.5) + facet_wrap(as.formula(paste0("~ ",inB)))
print(x)
}
gcplot2(inraw=gc.full,"NewRace","Region")
Here is a small sample of the data frame that I hope can be used for a "reproducible example".
dput(temp2)
structure(list(LHJ = c("SACRAMENTO", "YOLO", "SAN BENITO", "COLUSA",
"STANISLAUS", "SAN DIEGO", "SHASTA", "TULARE", "MONTEREY", "KERN"
), BestYr = c(2010L, 2010L, 2010L, 2012L, 2012L, 2012L, 2011L,
2011L, 2010L, 2010L), Sex = structure(c(2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 1L), .Label = c("F", "M"), class = "factor"), RaceEth = structure(c(3L,
4L, 6L, 2L, 6L, 4L, 4L, 2L, 4L, 4L), .Label = c("A", "B", "H",
"O", "U", "W"), class = "factor"), AgeGrp = structure(c(1L, 4L,
5L, 2L, 7L, 2L, 4L, 2L, 3L, 1L), .Label = c("0-9", "10-14", "15-19",
"20-24", "25-29", "30-34", "35-44", "45+", "Unk"), class = "factor"),
Cases = c(NA, 0, 0, 0, 15.652173913, NA, 0, 0, 0, 0), Pop = c(32752.30608,
538.17138648, 444.83561193, 11.107216039, 14186.950585, 5486.3069863,
338.26814356, 245.3890448, 535.23711331, 2278.6798429), NewRace = c("Hispanic",
"Other", "White", "Black", "White", "Other", "Other", "Black",
"Other", "Other"), Region = structure(c(6L, 6L, 3L, 5L, 3L,
8L, 5L, 3L, 2L, 3L), .Label = c("Bay Area", "Central Coast",
"Central Inland", "Los Angeles", "Northern", "Sacramento Area",
"San Francisco", "Southern"), class = "factor")), .Names = c("LHJ",
"BestYr", "Sex", "RaceEth", "AgeGrp", "Cases", "Pop", "NewRace",
"Region"), row.names = c(41377L, 67523L, 42571L, 7418L, 59857L,
45051L, 54102L, 64260L, 32612L, 17538L), class = "data.frame")
I would do this as follows, with aes_string (and specifying the ddply variables via a character vector rather than .()):
gcplot2 <- function (inraw,inA,inB){
require("plyr")
require("ggplot2")
inwork <- ddply(inraw,
c(inA, inB,"BestYr"),
summarize,
cases=sum(Cases,na.rm=TRUE),
pop=sum(Pop,na.rm=TRUE),
rate=round(100000*cases/pop,2))
x <- ggplot(inwork,aes(BestYr,rate)) +
geom_line( aes_string(color=inA),size=1.5) +
facet_wrap(as.formula(paste0("~ ",inB)))
x
}
gcplot2(inraw=gc.full,"NewRace","Region")
I get warnings, but I think that's due to using a tiny subset of the data ...
I am graphing some data with ggplot. However, I don't understand the error I'm getting with slightly different data than data that I can graph successfully. For example, this data graphs successfully:
to_graph <- structure(list(Teacher = c("BS", "BS", "FA"
), Level = structure(c(2L, 1L, 1L), .Label = c("BE", "AE", "ME",
"EE"), class = "factor"), Count = c(2L, 25L, 28L)), .Names = c("Teacher",
"Level", "Count"), row.names = c(NA, 3L), class = "data.frame")
ggplot(data=to_graph, aes(x=Teacher, y=Count, fill=Level), ordered=TRUE) +
geom_bar(aes(fill = Level), position = 'fill') +
scale_y_continuous("",formatter="percent") +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF")) +
opts(axis.text.x=theme_text(angle=45)) +
opts(title = "Score Distribution")
But this does not:
to_graph <- structure(list(School = c(84351L, 84384L, 84385L, 84386L, 84387L,
84388L, 84389L, 84397L, 84398L, 84351L, 84384L, 84385L, 84386L,
84387L, 84388L, 84389L, 84397L, 84398L, 84351L, 84386L), Level = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 3L, 3L), .Label = c("BE", "AE", "ME", "EE"), class = "factor"),
Count = c(3L, 7L, 5L, 4L, 3L, 4L, 4L, 6L, 2L, 116L, 138L,
147L, 83L, 76L, 81L, 83L, 85L, 53L, 1L, 1L)), .Names = c("School",
"Level", "Count"), row.names = c(NA, 20L), class = "data.frame")
ggplot(data=to_graph, aes(x=School, y=Count, fill=Level), ordered=TRUE) +
geom_bar(aes(fill = Level), position = 'fill') +
scale_y_continuous("",formatter="percent") +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF")) +
opts(axis.text.x=theme_text(angle=90)) +
opts(title = "Score Distribution")
With the latter code, I get this error:
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this. Error in if (!all(data$ymin == 0)) warning("Filling not well
defined when ymin != 0") : missing value where TRUE/FALSE needed
Anyone know what's going on here? Thank you!
The error occurs because your x variable has numerical values, when in reality you want them to be discrete, i.e. use x=factor(School).
The reason for this is that stat_bin, the default stat for geom_bar, will try to summarise for each unique value of x. When your x-variable is numeric, it tries to summarise at each integer in the range. This is clearly not what you need.
ggplot(data=to_graph, aes(x=factor(School), y=Count, fill=Level), ordered=TRUE) +
geom_bar(aes(fill = Level), position='fill') +
opts(axis.text.x=theme_text(angle=90)) +
scale_y_continuous("",formatter="percent") +
opts(title = "Score Distribution") +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF"))