R: Boxplot filled with its "label" color - r

I want to fill the box with its color (like: blue ->blue).
goo1 <- ggplot(dataset1, aes(x=Color, y=Scored.Probabilities)) +
geom_boxplot() +
stat_summary(fun.y = mean, geom="point",colour="darkred", size=3) +
stat_summary(fun.data = fun_mean, geom="text", vjust=-0.7)
print (goo1)
Data can be found here: link

Add a scale (although you need to make sure first that every level of dataset1$Persons is indeed the name of a color recognized by R ; currently, dark and game value are not)
colors <- levels(dataset1$Color)
colors[colors == "dark"] <- "black"
colors[colors == "game value"] <- "cyan"
goo2 <- ggplot(dataset1, aes(x=Color, y=Scored.Probabilities)) +
geom_boxplot(aes(fill=Color)) +
stat_summary(fun.y = mean, geom="point",colour="darkred", size=3)
goo2 <- goo2 + scale_fill_manual(values=colors)

Related

why is my legend in ggplot2 in power bi backwards?

so i am running the r visual in power bi desktop, i put in my data set and written the code, but the legend shows up all messed up, can some one point out what i am doing wrong?
below is the code
x = dataset$yearnum
y = dataset$oddmean_interv
# y is red color
a = dataset$odd_ul95
b = dataset$odd_ll95
z = dataset$oddmean_base
# z is blue color
c = dataset$odd_ul95_base
d = dataset$odd_ll95_base
#converts column to vector
library(ggplot2)
p = ggplot(dataset, aes(x=x)) + geom_line(aes(y=y, color="blue")) + geom_line(aes(y=z, color="red")) +
ylim(0, 35) +
geom_errorbar(aes(ymin=b, ymax=a), width=0.2, position=position_dodge(0.05)) +
geom_errorbar(aes(ymin=d, ymax=c), width=0.2, position=position_dodge(0.05)) +
scale_colour_manual(values=c("red", "blue")) +
labs(x = "Year", y="Mean Overdose Per 100,000")
print(p)
here is what it shows
i am trying to get the red line to say oddmean_interv and the blue line to say oddmean_base.
any help is appreciated

How to highlight a column in ggplot2

I have the following graph and I want to highlight the columns (both) for watermelons as it has the highest juice_content and weight. I know how to change the color of the columns but I would like to WHOLE columns to be highlighted. Any idea on how to achieve this? There doesn't seems to be any similar online.
fruits <- c("apple","orange","watermelons")
juice_content <- c(10,1,1000)
weight <- c(5,2,2000)
df <- data.frame(fruits,juice_content,weight)
df <- gather(df,compare,measure,juice_content:weight, factor_key=TRUE)
plot <- ggplot(df, aes(fruits,measure, fill=compare)) + geom_bar(stat="identity", position=position_dodge()) + scale_y_log10()
An option is to use gghighlight
library(gghighlight)
ggplot(df, aes(fruits,measure, fill = compare)) +
geom_col(position = position_dodge()) +
scale_y_log10() +
gghighlight(fruits == "watermelons")
In response to your comment, how about working with different alpha values
ggplot(df, aes(fruits,measure)) +
geom_col(data = . %>% filter(fruits == "watermelons"),
mapping = aes(fill = compare),
position = position_dodge()) +
geom_col(data = . %>% filter(fruits != "watermelons"),
mapping = aes(fill = compare),
alpha = 0.2,
position = position_dodge()) +
scale_y_log10()
Or you can achieve the same with one geom_col and a conditional alpha (thanks #Tjebo)
ggplot(df, aes(fruits, measure)) +
geom_col(
mapping = aes(fill = compare, alpha = fruits == 'watermelons'),
position = position_dodge()) +
scale_alpha_manual(values = c(0.2, 1)) +
scale_y_log10()
You could use geom_area to highlight behind the bars. You have to force the x scale to discrete first which is why I've used geom_blank (see this answer geom_ribbon overlay when x-axis is discrete) noting that geom_ribbon and geom_area are effectively the same except geom_area always has 0 as ymin
#minor edit so that the level isn't hard coded
watermelon_level <- which(levels(df$fruits) == "watermelons")
AreaDF <- data.frame(fruits = c(watermelon_level-0.5,watermelon_level+0.5))
plot <- ggplot(df, aes(fruits)) +
geom_blank(aes(y=measure, fill=compare))+
geom_area(data = AreaDF, aes( y = max(df$measure)), fill= "yellow")+
geom_bar(aes(y=measure, fill=compare),stat="identity", position=position_dodge()) + scale_y_log10()
Edit to address comment
If you want to highlight multiple fruits then you could do something like this. You need a data.frame with where you want the geom_area x and y, including dropping it to 0 between. I'm sure there's slightly tidier methods of getting the data.frame but this one works
highlight_level <- which(levels(df$fruits) %in% c("apple", "watermelons"))
AreaDF <- data.frame(fruits = unlist(lapply(highlight_level, function(x) c(x -0.51,x -0.5,x+0.5,x+0.51))),
yval = rep(c(1,max(df$measure),max(df$measure),1), length(highlight_level)))
AreaDF <- AreaDF %>% mutate(
yval = ifelse(floor(fruits) %in% highlight_level & ceiling(fruits) %in% highlight_level, max(df$measure), yval)) %>%
arrange(fruits) %>% distinct()
plot <- ggplot(df, aes(fruits)) +
geom_blank(aes(y=measure, fill=compare))+
geom_area(data = AreaDF, aes(y = yval ), fill= "yellow")+
geom_bar(aes(y=measure, fill=compare),stat="identity", position=position_dodge()) + scale_y_log10()
plot

ggplot bubble graph custom colors

I have this dataframe that I'm trying to use custom colors
data <- data.frame(condition=c('1','1','1','1','1','2','2','2','2','2','3','3','3','3','3'), AssessmentGrade=c('400','410','420','430','440','500','510','520','530','540','300','310','320','330','340'), Freq=c('1','2','1','5','7','9','1','5','3','4','5','8','1','3','5'), MathGrade=c('A+','B-','C-','D','F','A-','B','C+','D-','F','A+','D','D','F','C'), Condition=c('Condition 1','Condition 1','Condition 1','Condition 1','Condition 1','Condition 2','Condition 2','Condition 2','Condition 2','Condition 2','Condition 3','Condition 3','Condition 3','Condition 3','Condition 3'))
I used ggplot to get abubble graph but I was wondering how I would edit it to use my company's standard colors
p <- ggplot(data, aes(x = MathGrade, y = AssessmentGrade, size = Freq, fill = Condition)) +
geom_point(aes(colour = Condition)) +
ggtitle("Main Title") +
labs(x = "First Math Grade", y = "Math Assessment Score")
I have a vector called colors:
colors
[1] "#101820" "#AF272F" "#EAAA00"
and I tried to graph it with this:
p <- p + scale_fill_manual(values = color)
nothing changed. I tried following directions here but nothing changed. Can someone assist?
You create a palette:
my_colors<- c("#101820", "#AF272F", "#EAAA00")
Then when it comes time you use that in your plot:
p <- ggplot(data, aes(x = MathGrade, y = AssessmentGrade, size = Freq, fill = Condition)) +
geom_point(shape=21) +
ggtitle("Main Title") +
labs(x = "First Math Grade", y = "Math Assessment Score") +
scale_fill_manual(values=my_colors) #or you could enter the color numbers directly here
This worked during testing but ggplot does not like how you used size in the main aesthetic.

How to reverse legend (labels and color) so high value starts at bottom?

whats the best way to invert the legend label order so the 7 is down and the 1 is upstairs?
df$day <- as.numeric(df3$day)
blues <- colorRampPalette(c('#132B43', '#56B1F7'))
p4 <-
ggplot(subset(df,feedback==1&stp>20), aes(x=correct, fill=day, colour=day)) +
geom_histogram(colour="black", binwidth=10) +
facet_grid(day ~ .) +
ggtitle("Over-pronation histogram") +
ylab("Count (150s period)") +
xlab("% Steps in over-pronation") +guide_legend(reverse = false)
Your code is quite strange, with false instead of FALSE and incorrectly placed guide_legend. The correct usage is (#Harpal gives a hint on that):
ggplot(data.frame(x=1:4, y=4:1, col=factor(1:4)), aes(x=x, y=y, col=col)) +
geom_point(size=10)
ggplot(data.frame(x=1:4, y=4:1, col=factor(1:4)), aes(x=x, y=y, col=col)) +
geom_point(size=10) + guides(colour = guide_legend(reverse=T))
If you're putting it in numeric and it's a continuous scale, you're better off with scale_fill_continuous(trans = 'reverse') or scale_colour_continuous. Using your code, this would give:
ggplot(subset(df,feedback==1&stp>20), aes(x=correct, fill=day, colour=day)) +
geom_histogram(colour="black", binwidth=10) +
facet_grid(day ~ .) +
ggtitle("Over-pronation histogram") +
ylab("Count (150s period)") +
xlab("% Steps in over-pronation")+
scale_fill_continuous(trans = 'reverse')
For continuous scales, guide_colorbar is required.
Here I reverse color direction. Then I reverse color and size order with different functions
library(tidyverse)
library(janitor)
iris %>%
as_tibble() %>%
clean_names() %>%
ggplot(aes(x = sepal_length,
y = petal_width,
size = sepal_width,
color = petal_length)) +
geom_point() +
facet_wrap(~species,scales = "free") +
#reverse color direction (the higher in value, the darker in color)
scale_color_continuous(trans = 'reverse') +
#edit legends
guides(
#reverse color order (higher value on top)
color = guide_colorbar(reverse = TRUE),
#reverse size order (higher diameter on top)
size = guide_legend(reverse = TRUE))

Boxplot show the value of mean

In this boxplot we can see the mean but how can we have also the number value on the plot for every mean of every box plot?
ggplot(data=PlantGrowth, aes(x=group, y=weight, fill=group)) + geom_boxplot() +
stat_summary(fun.y=mean, colour="darkred", geom="point",
shape=18, size=3,show_guide = FALSE)
First, you can calculate the group means with aggregate:
means <- aggregate(weight ~ group, PlantGrowth, mean)
This dataset can be used with geom_text:
library(ggplot2)
ggplot(data=PlantGrowth, aes(x=group, y=weight, fill=group)) + geom_boxplot() +
stat_summary(fun=mean, colour="darkred", geom="point",
shape=18, size=3, show.legend=FALSE) +
geom_text(data = means, aes(label = weight, y = weight + 0.08))
Here, + 0.08 is used to place the label above the point representing the mean.
An alternative version without ggplot2:
means <- aggregate(weight ~ group, PlantGrowth, mean)
boxplot(weight ~ group, PlantGrowth)
points(1:3, means$weight, col = "red")
text(1:3, means$weight + 0.08, labels = means$weight)
You can use the output value from stat_summary()
ggplot(data=PlantGrowth, aes(x=group, y=weight, fill=group))
+ geom_boxplot()
+ stat_summary(fun.y=mean, colour="darkred", geom="point", hape=18, size=3,show_guide = FALSE)
+ stat_summary(fun.y=mean, colour="red", geom="text", show_guide = FALSE,
vjust=-0.7, aes( label=round(..y.., digits=1)))
You can also use a function within stat_summary to calculate the mean and the hjust argument to place the text, you need a additional function but no additional data frame:
fun_mean <- function(x){
return(data.frame(y=mean(x),label=mean(x,na.rm=T)))}
ggplot(PlantGrowth,aes(x=group,y=weight)) +
geom_boxplot(aes(fill=group)) +
stat_summary(fun.y = mean, geom="point",colour="darkred", size=3) +
stat_summary(fun.data = fun_mean, geom="text", vjust=-0.7)
The Magrittr way
I know there is an accepted answer already, but I wanted to show one cool way to do it in single command with the help of magrittr package.
PlantGrowth %$% # open dataset and make colnames accessible with '$'
split(weight,group) %T>% # split by group and side-pipe it into boxplot
boxplot %>% # plot
lapply(mean) %>% # data from split can still be used thanks to side-pipe '%T>%'
unlist %T>% # convert to atomic and side-pipe it to points
points(pch=18) %>% # add points for means to the boxplot
text(x=.+0.06,labels=.) # use the values to print text
This code will produce a boxplot with means printed as points and values:
I split the command on multiple lines so I can comment on what each part does, but it can also be entered as a oneliner. You can learn more about this in my gist.

Resources