I'm trying to plot the facet plot below in alphabetical order, I've tried multiple ways of doing it and even tried getting rid of the facet and just plotting Overpaid vs proportion, but the plot still had an alphabetical x axis.
Initially I tried assigning a frequency value to every element then ordering the list by that value but that didn't change the plot.
overpaid_each_country <- survey_df %>%
filter(!is.na(Overpaid)) %>%
filter(Country %in% Country_Sum$Var1)
overpaid_each_country <- overpaid_each_country[ , which(names(overpaid_each_country) %in% c("Overpaid","Country"))]
overpaid_each_country <- transform(overpaid_each_country, freq = ave(seq(nrow(overpaid_each_country)), Overpaid, FUN=length))
overpaid_each_country <- overpaid_each_country[order(overpaid_each_country$freq), ]
Then I tried setting the factor levels but although the factor levels changed, the plot didn't.
overpaid_each_country %>%
mutate(Overpaid2 = factor(Overpaid, levels = c("Greatly overpaid", "Somewhat overpaid", "Greatly underpaid", "Neither underpaid nor overpaid", "Somewhat underpaid"))) %>%
ggplot(aes(x = Overpaid2, y = ..prop.., group = 1)) +
geom_bar( color = "white", fill = "#42dff4") +
facet_wrap(~ Country, nrow = 4) +
aes(stringr::str_wrap(Overpaid, 10)) +
xlab("OverPaid orUnderpaid") +
ylab("Proportion of Respondents")+
labs(title = "Are you Overpaid or Underpaid?") +
theme(axis.title.x=element_blank(), axis.text.y=element_text(size=12), axis.text.x = element_text(size=12), plot.title = element_text(size = 17))
You could use forcats::fct_reorder() to re-order your factor-levels, or you change the levels from somethink like "a" to "e", and then pass a named vector (where names match factor levels) to scale_x_discrete() as labels-argument.
library(ggplot2)
d <- data.frame(
x = factor(sample(1:5, size = 100, replace = TRUE), labels = c("a", "b", "c", "d", "e"))
)
ggplot(d, aes(x = x, y = ..prop..)) + geom_bar()
labels <- c(a = "Greatly underpaid", b = "Somewhat underpaid", c = "Neither/nor", d = "somewhat overpaid", e = "greatly overpaid")
ggplot(d, aes(x = x, y = ..prop..)) +
geom_bar() +
scale_x_discrete(labels = labels)
In your 6th line
aes(stringr::str_wrap(Overpaid, 10)) +
change to overpaid2
aes(stringr::str_wrap(Overpaid2, 10)) +
Related
I have the following data & code to produce a barplot (building on this answer)
tmpdf <- tibble(class = c("class 1", rep("class 2", 4), rep("class 3", 4)),
var_1 = c("none", rep(c("A", "B", "C", "D"), 2)),
y_ = as.integer(c(runif(9, min = 100, max=250))))
tmpdf <- rbind(tmpdf, cbind(expand.grid(class = levels(as.factor(tmpdf$class)),
var_1 = levels(as.factor(tmpdf$var_1))),
y_ = NA))
ggplot(data=tmpdf, aes(x = class, y = y_, fill=var_1, width=0.75 )) +
geom_bar(stat = "identity", position=position_dodge(width = 0.90), color="black", size=0.2)
This produces the below plot:
However, since not all class / var_1 combinations are present, some space on the x-axis is lost. I would now like to remove the empty space on the x-axis without making the bars wider(!).
Can someone point me to the right direction?
You can use na.omit to remove unused levels, and then use facet_grid with scales = "free_x" and space = "free_x" to remove space.
ggplot(data=na.omit(tmpdf), aes(x = var_1, y = y_, fill=var_1, width=0.75)) +
geom_col(position=position_dodge(width = 0.90), color="black", size=0.2) +
facet_grid(~ class, scales = "free_x", space = "free_x", switch = "x") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.background = element_blank())
Technically, you could tweak a column chart (geom_col) to the desired effect, like so:
mpdf %>%
mutate(xpos = c(1.6, 2 + .2 * 0:3, 3 + .2 * 0:3)) %>%
ggplot() +
geom_col(aes(x = xpos, y = y_, fill = var_1)) +
scale_x_continuous(breaks = c(1.6, 2.3 + 0:1), labels = unique(mpdf$class))
However, the resulting barplot (condensed or not) might be difficult to interpret as long as you want to convey differences between classes. For example, the plot has to be studied carefully to detect that variable D runs against the pattern of increasing values from class 2 to 3.
I am trying to obtain a back-to-back bar plot (or pyramid plot) similar to the ones shown here:
Population pyramid with gender and comparing across two time periods with ggplot2
Basically, a pyramid plot of a quantitative variable whose values have to be displayed for combinations of three categorical variables.
library(ggplot2)
library(dplyr)
df <- data.frame(Gender = rep(c("M", "F"), each = 20),
Age = rep(c("0-10", "11-20", "21-30", "31-40", "41-50",
"51-60", "61-70", "71-80", "81-90", "91-100"), 4),
Year = factor(rep(c(2009, 2010, 2009, 2010), each= 10)),
Value = sample(seq(50, 100, 5), 40, replace = TRUE)) %>%
mutate(Value = ifelse(Gender == "F", Value *-1 , Value))
ggplot(df) +
geom_col(aes(fill = interaction(Gender, Year, sep = "-"),
y = Value,
x = Age),
position = "dodge") +
scale_y_continuous(labels = abs,
expand = c(0, 0)) +
scale_fill_manual(values = hcl(h = c(15,195,15,195),
c = 100,
l = 65,
alpha=c(0.4,0.4,1,1)),
name = "") +
coord_flip() +
facet_wrap(.~ Gender,
scale = "free_x",
strip.position = "bottom") +
theme_minimal() +
theme(legend.position = "bottom",
panel.spacing.x = unit(0, "pt"),
strip.background = element_rect(colour = "black"))
example of back-to-back barplot I want to mimick
Trying to mimick this example on my data, things go wrong from the first ggplot function call as the bars are not dodged on both sides of the axis:
mydf = read.table("https://raw.githubusercontent.com/gilles-guillot/IPUMS_R/main/tmp/df.csv",
header=TRUE,sep=";")
ggplot(mydf) +
geom_col(aes(fill = interaction(mig,ISCO08WHO_yrstud, sep = "-"),
x = country,
y = f),
position = "dodge")
failed attempt to get a back-to-back bar plot
as I was expected from:
ggplot(df) +
geom_col(aes(fill = interaction(Gender, Year, sep = "-"),
y = Value,
x = Age),
position = "dodge")
geol_col plot with bar dodged symmetrically around axis
In the example you are following, df$Value is made negative if Gender == 'F'. You need to do similar to achieve "bar dodged symmetrically around axis".
How do I color outliers that are above a specific value using ggplot2 in R?.
(Sorry for the seemingly easy question, I am a beginner. the reason why is that these are frequencies of a value of 0, I am then transforming this column of data by taking the -log10(). So anything that has a frequency of 0 would then be transformed into Inf. Attached is a screenshot of my plot, essentially I want to make all the outlier points above 10 on the y axis to be a different color.
boxplots <- function(df){
df$'frequency'[is.na(df$'frequency')] <- 0.00
df$'-log10(frequency)' <- -log10(df$'frequency')
x <- data.frame(group = 'x', value = df$'-log10(frequency)'[df$'Type'=='x'])
y <- data.frame(group = 'y', value = df$'-log10(frequency)'[df$'Type'=='y'])
z <- data.frame(group = 'z', value = df$'-log10(frequency)'[df$'Type'=='c=z'])
plot.data <<- rbind(x, y, z)
labels <- c("z", "y", "z")
t<-plot.data %>%
ggplot(aes(x = group, y = value, fill = group))+
geom_boxplot()+
scale_fill_viridis(discrete = TRUE, alpha = 0.6)+
geom_jitter(color="black", size=0.4, alpha=0.9) +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Distribution of -log10(frequency) by Type") +
xlab("Type")+
ylab("-log10(frequency)")+
scale_x_discrete(labels=labels)+
scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2))
print(t)
s<<-t
ggsave("frequency_by_type.png", plot = t)
}
you could just create a new column indicating wheather it is an outlier or not and map this to the geom_jitter color. I resumed the answer in a smaller example but you should be able to fit this accordingly:
library(ggplot2)
library(viridis)
plot.data <- data.frame(group = c("1","1","1","1","1","2","2","2","2","2"),
value = c(1,5,10,6,3,1,5,10,6,3))
t<-plot.data %>%
mutate(outlier = ifelse(value >9, "YES", "NO")) %>%
ggplot(aes(x = group, y = value, fill = group))+
geom_boxplot()+
geom_jitter(aes(group, value, color = outlier) , size=2, alpha=0.9)+
scale_fill_viridis(discrete = TRUE, alpha = 0.6)
t
library(ggplot2)
# Basic box plot
p <- ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot()
p
# Rotate the box plot
p + coord_flip()
# Notched box plot
ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot(notch=TRUE)
# Change outlier, color, shape and size
ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot(outlier.colour="red", outlier.shape=8,
outlier.size=4)
I am trying to plot bar graphs on a map following this example: How to plot barchart onto ggplot2 map. This works fine but in contrast to the example I would like to add x and y axis. The problem is that the y range of the data differs considerably across the regions for which I would like to create the bar graphs.
In order to produce comparable graphs (i.e. where the y axis has an identical dimension), I apply the same y limits to all graphs and adjust the breaks for each graph. This is, however, not a good solution as some of the graphs (p2 in this case) have a large empty area above and below the bars. In order to make it work, I am looking for a method top crop the graphs to remove the empty areas, while at the same time maintaining the dimensions of the graph so that y axis can be compared.
library(dplyr)
library(ggplot2)
df <- data.frame(type = c("A", "B", "A", "B"), country = c("NLD", "NLD", "BEL", "BEL"), value = c(10, -10, 5, 2))
df1 <- filter(df, country == "NLD")
p1 <- ggplot(data = df1) +
geom_col(aes(x = type, y = value)) +
scale_y_continuous(limits = c(min(df$value), max(df$value)), breaks = seq(min(df1$value), max(df1$value), 2)) +
theme_bw()
p1
df2 <- filter(df, country == "BEL")
p2 <- ggplot(data = df2) +
geom_col(aes(x = type, y = value)) +
scale_y_continuous(limits = c(min(df$value), max(df$value)), breaks = seq(min(df2$value), max(df2$value), 2)) +
theme_bw()
p2
Have you tried coord_fixed for each of the plot? If you can control the width of each plot the same, the height of bars will be comparable.
library(dplyr)
library(ggplot2)
library(gridExtra)
df <- data.frame(type = c("A", "B", "A", "B"), country = c("NLD", "NLD", "BEL", "BEL"), value = c(10, -10, 5, 2))
df1 <- filter(df, country == "NLD")
p1 <- ggplot(data = df1) +
geom_col(aes(x = type, y = value)) +
# scale_y_continuous(limits = c(min(df$value), max(df$value)), breaks = seq(min(df1$value), max(df1$value), 2)) +
theme_bw()
df2 <- filter(df, country == "BEL")
p2 <- ggplot(data = df2) +
geom_col(aes(x = type, y = value)) +
# scale_y_continuous(limits = c(min(df$value), max(df$value)), breaks = seq(min(df2$value), max(df2$value), 2)) +
theme_bw()
x_range <- length(unique(df$type))
y_range1 <- max(df1$value) - min(df1$value)
y_range2 <- max(df2$value) - min(df2$value)
g1 <- p1 + coord_fixed(ratio = x_range / y_range1)
g2 <- p2 + coord_fixed(ratio = x_range / y_range1)
# example output
grid.arrange(g1, g2, nrow = 1)
I have a plot like the one below, with a factor variable on the y-axis.
library(reshape2)
library(ggplot2)
MA <- c("A", "B", "C")
dfr <- data.frame(
name = factor(MA, levels = MA),
start = 1:3,
end = 3:5,
prozent = c(1,0.5,0.75)
)
mdfr <- melt(dfr, measure.vars = c("start", "end"))
ggplot(mdfr, aes(value, name)) + geom_line(aes(size = prozent)) + scale_size_area()
I want to remove the whitespace (or, in default ggplot2, "greyspace") between the horizontal lines so that they touch each other.
You could do as follows:
ggplot(mdfr, aes(value, name)) +
geom_line(aes(size = prozent)) +
scale_size_area() +
scale_y_discrete(expand = c(7, 0))