How to find Percent Frequency gg plot - r

There have been a few questions on here asking how to plot percent frequency. I have tried implementing the suggestions but am still having trouble.
I have the following vector:
var <- c(2,2,1,0,1,1,1,1,1,3,2,3,3,5,1,4,4,0,3,4,1,0,3,3,0,0,
1,3,2,6,2,2,2,1,0,2,3,2,0,0,0,0,3,2,2,4,3,2,2,0,4,1,0,1,3,1,4,3,1,2,
6,7,6,1,2,2,4,5,3,0,6,5,2,0,7,1,7,3,1,4,1,1,2,1,1,2,1,1,4,2,0,3,3,2,2,2,5,3,2,5,2,5)
I plotted a histogram using the following code:
df <- data.table(x = var)
df <- df[, .N, by=x]
df$x <- factor(df$x, levels=c(0:25))
p <- ggplot(df, aes(x=x, y= N)) +
geom_bar(
stat="identity", width=1.0,
colour = "darkgreen",
fill = 'paleturquoise4'
)
p <- p + labs(scale_x_discrete(drop=FALSE) )
p = p + coord_cartesian(ylim=c(0, 50)) +
scale_y_continuous(breaks=seq(0, 50, 2))
print(p)
I tried using the following but it does not work.
p <- ggplot(df, aes(x=x, y= N)) +
geom_bar(
aes(y = (..count..)/sum(..count..)),
stat="identity", width=1.0,
colour = "darkgreen",
fill = 'paleturquoise4'
)

One thing you can do is that you can do the calculation before you draw the graphic. But, if I follow your approach, you would want something like this.
ggplot(df, aes(x=x)) +
geom_bar(aes(y = N/sum(N)), stat="identity", width=1.0,
colour = "dark green", fill = 'paleturquoise4') +
ylab("y")

Related

How to fit a lot of x labels or space out x labels in ggplot2

Is there a way to space out so that each x label is more distinguishable?
I feel like this question has been asked before but I can't seem to find an answer. I believe the graph needs to be bigger for this to work, is their a way to make the graph bigger within Rstudio? or make the text smaller
My code so far :
bar_plt = ggplot(data, aes(fct_infreq(Event))) + geom_bar(fill = "dodgerblue", width = .4) +
xlab("Event Names") + ylab("Number of Observations") + coord_flip()
TIA
what about working with labels in this way (sorry for the fake data, but I have not got a sample of your):
library(ggplot2)
# numbers
set.seed(1)
y<-sample(1:30, 500, TRUE)
# very long and numerous labels
x <- paste(sample(letters[1:22], 500, TRUE),sample(letters[1:2], 500, TRUE),'abcdefghijklmnopqrstuvwxyz')
data <- data.frame(x,y)
# simple ggplot barplot
p <- ggplot(data, aes(x = x, y = y)) + geom_bar(stat = "identity") + coord_flip()
# play with the size to have a fitting dimension
p <- p + theme(axis.text.y = element_text(face="bold", color="black", size=8))
# you can also abbreviate the labels if necessary
p <- p + scale_x_discrete(labels = abbreviate)
p
Your plot could be something like:
library(forcats)
library(ggplot2)
# data
set.seed(1)
Events <- paste(sample(letters[1:22], 500, TRUE),sample(letters[1:2], 500, TRUE),'abcdefghijklmnopqrstuvwxyz')
data <- data.frame(Events)
bar_plt <- ggplot(data, aes(fct_infreq(Events))) + geom_bar(fill = "dodgerblue", width = .4) + coord_flip()
bar_plt <- bar_plt + xlab("Event Names") + ylab("Number of Observations")
bar_plt <- bar_plt + theme(axis.text.y = element_text(face="bold", color="black", size=8))
bar_plt <- bar_plt + scale_x_discrete(labels = abbreviate)
bar_plt

ggplot2: doughnuts, how to conditional color fill with if_else

Following guides like ggplot Donut chart I am trying to draw small gauges, doughnuts with a label in the middle, with the intention to put them later on on a map.
If the value reaches a certain threshold I would like the fill of the doughnut to change to red. Is it possible to achieve with if_else (it would be most natural but it does not work).
library(tidyverse)
df <- tibble(ID=c("A","B"),value=c(0.7,0.5)) %>% gather(key = cat,value = val,-ID)
ggplot(df, aes(x = val, fill = cat)) + scale_fill_manual(aes,values = c("red", "yellow"))+
geom_bar(position="fill") + coord_polar(start = 0, theta="y")
ymax <- max(df$val)
ymin <- min(df$val)
p2 = ggplot(df, aes(fill=cat, y=0, ymax=1, ymin=val, xmax=4, xmin=3)) +
geom_rect(colour="black",stat = "identity") +
scale_fill_manual(values = if_else (val > 0.5, "red", "black")) +
geom_text( aes(x=0, y=0, label= scales::percent (1-val)), position = position_dodge(0.9))+
coord_polar(theta="y") +
xlim(c(0, 4)) +
theme_void() +
theme(legend.position="none") +
scale_y_reverse() + facet_wrap(facets = "ID")
Scale fill manual values= if else.... this part does not work, the error says: Error in if_else(val > 0.5, "red", "black") : object 'val' not found. Is it my error, or some other solution exists?
I also realize my code is not optimal, initially gather waited for more variables to be included in the plot, but I failed to stack one variable on top of the other. Now one variable should be enough to indicate the percentage of completion. I realise my code is redundant for the purpose. Can you help me out?
A solution for the color problem is to first create a variable in the data and then use that to map the color in the plot:
df <- tibble(ID=c("A","B"),value=c(0.7,0.5)) %>% gather(key = cat,value = val,-ID) %>%
mutate(color = if_else(val > 0.5, "red", "black"))
p2 = ggplot(df, aes(fill=color, y=0, ymax=1, ymin=val, xmax=4, xmin=3)) +
geom_rect(colour="black",stat = "identity") +
scale_fill_manual(values = c(`red` = "red", `black` = "black")) +
geom_text( aes(x=0, y=0, label= scales::percent (1-val)), position = position_dodge(0.9))+
coord_polar(theta="y") +
xlim(c(0, 4)) +
theme_void() +
theme(legend.position="none") +
scale_y_reverse() + facet_wrap(facets = "ID")
The result would be:

Deleting an entire row of facets of unused factor level combination

I want to remove the 2nd row of facets from my plot below because there is no data for that factor combination.
library(ggplot2)
library(grid)
set.seed(5000)
# generate first df
df1 = data.frame(x=rep(rep(seq(2,8,2),4),6),
y=rep(rep(seq(2,8,2),each=4),6),
v1=c(rep("x1",32),rep("x2",64)),
v2=c(rep("y1",64),rep("y2",32)),
v3=rep(rep(c("t1","t2"),each=16),3),
v4=rbinom(96,1,0.5))
# generate second df
df2 = data.frame(x=runif(20)*10, y=runif(20)*10,
v1=sample(c("x1","x2"),20,T))
# plot
ggplot() +
geom_point(data=df1, aes(x=x, y=y, colour = factor(v4)), shape=15, size=5) +
scale_colour_manual(values = c(NA,"black")) + facet_grid(v1+v2~v3, drop = T) +
geom_point(data=df2, aes(x=x,y=y), shape=23 , colour="black", fill="white", size=4) +
coord_equal(ratio=1) + xlim(0, 10) + ylim(0, 10)
I tried to use the idea from this post..
g=ggplotGrob(y)
pos=which(g$layout$t==5 | g$layout$t==6)
g$layout=g$layout[-c(pos),]
g$grobs=g$grobs[-c(pos)]
grid.newpage()
grid.draw(g)
..but got this.
How do I eliminate the white space? Also, is there a straightforward solution to this, without having to manipulate the grobs, etc?
Just modify the data:
df2 <- rbind(cbind(df2, v2 = "y1"),
cbind(df2, v2 = "y2"))
df2 <- df2[!(df2$v1 == "x1" & df2$v2 == "y2"),]
# plot
ggplot() +
geom_point(data=df1, aes(x=x, y=y, colour = factor(v4)), shape=15, size=5) +
scale_colour_manual(values = c(NA,"black")) + facet_grid(v1+v2~v3, drop = T) +
geom_point(data=df2, aes(x=x,y=y), shape=23 , colour="black", fill="white", size=4) +
coord_equal(ratio=1) + xlim(0, 10) + ylim(0, 10)

full text label on Boxplot, with added mean point

Am trying to get text label similar to what this https://stats.stackexchange.com/questions/8206/labeling-boxplots-in-r, but I cant get it to work. MWE similar to what I have is this:
data <- data.frame(replicate(5,sample(0:100,100,rep=TRUE)))
meanFunction <- function(x){
return(data.frame(y=round(mean(x),2),label=round(mean(x,na.rm=T),2)))}
ggplot(melt(data), aes(x=variable, y=value)) +
geom_boxplot(aes(fill=variable), width = 0.7) +
stat_summary(fun.y = mean, geom="point",colour="darkred", size=4) +
stat_summary(fun.data = meanFunction, geom="text", size = 4, vjust=1.3)
That produces something like "A" in the attached image, and I am trying to get something like "B" for each of the boxes. Thanks.
Here is my attempt. First, I reshaped your data. Then, I produced your boxplot. I changed the size and colour of text for mean. Then, I looked into the data that ggplot used, which you can access using ggplot_build(objectname)$data[[1]]. You can see the numbers you need. I selected necessary variables and reshaped the data, which is df. Using df, you can annotate the numbers you want.
library(dplyr)
library(tidyr)
library(ggplot2)
set.seed(123)
mydf <- data.frame(replicate(5,sample(0:100,100,rep=TRUE)))
mydf <- gather(mydf, variable, value)
meanFunction <- function(x){
return(data.frame(y=round(mean(x),2),label=round(mean(x,na.rm=T),2)))}
g <- ggplot(data = mydf, aes(x = variable, y = value, fill = variable)) +
geom_boxplot(width = 0.5) +
stat_summary(fun.y = mean, geom = "point",colour = "darkred", size=4) +
stat_summary(fun.data = meanFunction, geom ="text", color = "white", size = 3, vjust = 1.3)
df <- ggplot_build(g)$data[[1]] %>%
select(ymin:ymax, x) %>%
gather(type, value, - x) %>%
arrange(x)
g + annotate("text", x = df$x + 0.4, y = df$value, label = df$value, size = 3)
First, I would take your data and then calculate all the boxplot features yourself. Here's one way to do that
dd <- data.frame(replicate(5,sample(0:100,100,rep=TRUE)))
tt <- data.frame(t(sapply(dd, function(x) c(boxplot.stats(x)$stats, mean(x)))))
names(tt) <- c("ymin","lower","middle","upper","ymax", "mean")
tt$var <- factor(rownames(tt))
I'm sure there are prettier ways to do that with dplyr but this point is you'll need to calculate those values yourself so you know where to draw the labels. Then you can do
ggplot(tt) +
geom_boxplot(aes(x=var, ymin=ymin, lower=lower, middle=middle, upper=upper, ymax=ymax), stat="identity", width=.5) +
geom_text(aes(x=as.numeric(var)+.3, y=middle, label=formatC(middle,1, format="f")), hjust=0) +
geom_text(aes(x=as.numeric(var)+.3, y= lower, label=formatC(lower,1, format="f")), hjust=0) +
geom_text(aes(x=as.numeric(var)+.3, y= upper, label=formatC(upper,1, format="f")), hjust=0) +
geom_text(aes(x=as.numeric(var)+.3, y= ymax, label=formatC(ymax,1, format="f")), hjust=0) +
geom_text(aes(x=as.numeric(var)+.3, y= ymin, label=formatC(ymin,1, format="f")), hjust=0) +
geom_point(aes(x=var, y=mean)) +
geom_text(aes(x=as.numeric(var), y= mean, label=formatC(mean,1, format="f")), hjust=.5, vjust=1.5)
to draw each of the labels

ggplot: relative frequencies of two groups

I want a plot like this except that each facet sums to 100%. Right now group M is 0.05+0.25=0.30 instead of 0.20+0.80=1.00.
df <- rbind(
data.frame(gender=c(rep('M',5)), outcome=c(rep('1',4),'0')),
data.frame(gender=c(rep('F',10)), outcome=c(rep('1',7),rep('0',3)))
)
df
ggplot(df, aes(outcome)) +
geom_bar(aes(y = (..count..)/sum(..count..))) +
facet_wrap(~gender, nrow=2, ncol=1)
(Using y = ..density.. gives worse results.)
here's another way
ggplot(df, aes(outcome)) +
geom_bar(aes(y = ..count.. / sapply(PANEL, FUN=function(x) sum(count[PANEL == x])))) +
facet_wrap(~gender, nrow=2, ncol=1)
I usually do this by simply precalculating the values outside of ggplot2 and using stat = "identity":
df1 <- melt(ddply(df,.(gender),function(x){prop.table(table(x$outcome))}),id.vars = 1)
ggplot(df1, aes(x = variable,y = value)) +
facet_wrap(~gender, nrow=2, ncol=1) +
geom_bar(stat = "identity")

Resources