Barplot subgroup factor reordering doesn't work - r

I want to order each bar within year like this: A, B, C.
I have looked at this post: https://ilari.scheinin.fi/ggplot-2-0-and-the-missing-order-aesthetic/
but it doesn't work at all.
Here's my code:
# data --------------------------------------------------------------------
ID <- c('1','2','3','4','5','6','7','7','8','9','10','11')
TypeCourtier <- c('A','A','A','A','B','B','B','B','C','C','C','C')
année_survenance <- c('2009','2010','2011','2012','2009','2010','2011','2012','2009','2010','2011','2012')
moyenne_charge <- c('1515','1551','89754','9148','787','9848','8474','3465','7488','884','8948','8484')
mediane_charge <- c('8185','5919','20409','8979','7777','9294','87484','8488','1881','18819','8484','84444')
totalComptage <- c('9989','849444','848','684','9845','1448','9844','2151','7171','5051','3959','9896')
data <- data.frame(ID, TypeCourtier, année_survenance, moyenne_charge, mediane_charge,totalComptage)
# main --------------------------------------------------------------------
install.packages("ggplot2")
library(ggplot2)
library(magrittr)
library(dplyr)
data$TypeCourtier <- factor(data$TypeCourtier, levels = c("A","B","C"), ordered= TRUE)
data %>%
ungroup() %>%
arrange(as.integer(TypeCourtier)) %>%
ggplot( aes(x=année_survenance, y = moyenne_charge, fill=TypeCourtier)) +
geom_bar(stat="identity", position=position_dodge())+
geom_text(aes(label=TypeCourtier),position = position_dodge(width = 0.9),vjust=-0.25) + theme(legend.position = "none")

The variable for the y-axis has to be numeric:
data$moyenne_charge <- as.numeric(as.character(data$moyenne_charge))

Related

Order facet by semester and year

I am trying to order a ggplot graph using facet_grid, example:
Sorry , I know that my data its no logic but its ok to show my problem with the facets...
In my real data I dont have this data, only i use this block below to get my data in this example:
################ only to get data in my example ######################
set.seed(12345)
Date <- seq(as.Date("2010/1/1"), as.Date("2013/1/1"), "6 months" )
Y <- rnorm(n=length(Date), mean=100, sd=1)
df <- data.frame(Date, Y)
df$Year <- format(df$Date, "%Y")
df$Sem <- format(df$Date, "%b")
df$Sem <- gsub("ene.", "1S",df$Sem )
df$Sem <- gsub("jul.", "2S",df$Sem )
df$MonthYear <- format(df$Date, "%b-%Y")
############## o #################
Variables that are in my real data are:
Sem, Semester and Y
I extract the year to sort in my second try:
df=df %>%
mutate(extract_year= sub('.*(\\d{4}).*', '\\1', MonthYear))
df$Semester=paste0(df$Sem,df$extract_year)
#Here I tried to sort the facet but I failed:
df2 <- within(df,Semester <- ordered(Semester, levels = rev(sort(unique(Semester)))))
df2 %>%
ggplot(aes(y = Y )) +
geom_bar() +
facet_grid(Semester~.)
I tried also with:
df2 %>%
arrange(extract_year) %>%
ggplot(aes(y = Y )) +
geom_bar() +
facet_grid(Semester~.)
I would like get this order:
1S2010/2S2010/1S2011/2S2011/1S2012/2S2012
Basically you have to set the order of the levels in your desired order. One option to achieve that would be to arrange the data in your desired order and make use of forcats::fct_inorder to set the order of the levels of your Semester variable.
library(ggplot2)
library(dplyr)
## Make Semester variable in the desired format
df <- mutate(df, Sem = recode(Sem, Jan = "1S", Jul = "2S"),
Semester = paste0(Sem, Year))
# Arrange and set order
df <- df %>%
arrange(gsub("^(\\dS)(\\d{4})$", "\\2\\1", Semester)) %>%
mutate(Semester = forcats::fct_inorder(Semester))
df %>%
ggplot(aes(y = Y )) +
geom_bar() +
facet_grid(Semester~.)

How to pass an expression to a geom_text label in ggplot? (Continued)

This is a follow-up my original question for how to pass an expression with subscript to a geom_text label in ggplot.
Duck provided a great solution using parse = T within the geom_text() command. However, I am now running into a problem because the variable I wish to pass an expression to contains other content that appears unreadable with parse = T
Here is my current code (again, thank you to Duck for this solution):
library(ggplot2)
library(tidyverse)
#Data
my_exp <- as.character(expression('my_exp'[s][u][b]))
my_data <-
data.frame(
var_1 = c("9R", "14M", "17C"),
var_2 = c(1, 2, 3),stringsAsFactors = F
)
#Mutate
my_data$label <- ifelse(my_data$var_1=='9R',my_exp,my_data$var_1)
#Plot
my_data %>%
ggplot(aes(x = var_1, y = var_2))+
geom_text(aes(label = label),parse = T)
And here is the error output that appears when I try to render the ggplot:
> library(ggplot2)
> library(tidyverse)
> #Data
> my_exp <- as.character(expression('my_exp'[s][u][b]))
> my_data <-
+ data.frame(
+ var_1 = c("9R", "14M", "17C"),
+ var_2 = c(1, 2, 3),stringsAsFactors = F
+ )
> #Mutate
> my_data$label <- ifelse(my_data$var_1=='9R',my_exp,my_data$var_1)
> #Plot
> my_data %>%
+ ggplot(aes(x = var_1, y = var_2))+
+ geom_text(aes(label = label),parse = T)
Error in parse(text = text[[i]]) : <text>:1:3: unexpected symbol
1: 14M
^
>
It appears R is having a hard time reading the cells where I have not passed the expression. Is there a way to have R only parse the relevant cell(s)?
Thanks!
As an alternative, you can use geom_richtext() from the ggtext package and create super- or subscripts with <sup>...</sup> or <sub>...</sub>.
library(ggplot2)
library(ggtext)
#Data
my_exp <- "my_exp<sub>sub</sub>"
my_data <-
data.frame(
var_1 = c("9R", "14M", "17C"),
var_2 = c(1, 2, 3), stringsAsFactors = F
)
#Mutate
my_data$label <- ifelse(my_data$var_1=='9R', my_exp, my_data$var_1)
#Plot
ggplot(my_data, aes(x = var_1, y = var_2)) +
geom_richtext(
aes(label = label),
# customization to remove background and border around labels
fill = NA,
label.colour = NA
)
Created on 2020-09-09 by the reprex package (v0.3.0)
Maybe this might not be optimal but you can create a label for your expressions and another for your classic text. Here the code:
library(ggplot2)
library(tidyverse)
#Data
my_exp <- as.character(expression('my_exp'[s][u][b]))
my_data <-
data.frame(
var_1 = c("9R", "14M", "17C"),
var_2 = c(1, 2, 3),stringsAsFactors = F
)
#Mutate label 1
my_data$label <- ifelse(my_data$var_1=='9R',my_exp,NA)
my_data$label2 <- ifelse(my_data$var_1=='9R',NA,my_data$var_1)
#Plot
my_data %>%
ggplot(aes(x = var_1, y = var_2))+
geom_text(aes(label = label),parse = T)+
geom_text(aes(label = label2))
Output:
Using geom_text() twice you can hack the plot.

Plot pvalue information for mean comparisons by grouping variable

I've put together a plot to view groups separately but now want to include significance levels for mean pairwise comparison in the plot. While I can do the comparison outside of the plot I'm wondering what the most efficient way of including the comparison in the plot would be?
Current Plot
library(tidyverse)
dsub <- diamonds[ sample(nrow(diamonds), 10000), ]
dsub <- dsub %>%
filter(clarity %in% c('VS2', 'VS1', 'VVS2'))
ggplot(dsub, aes(x = cut, y = carat, fill = clarity)) +
geom_boxplot(outlier.size = 0) +
geom_point(pch = 21, position = position_jitterdodge())
Now I want to add the comparisons within each level of the cut variable between all levels of the clarity variable. I prefer using ggpubr but couldn't see where this could be achieved.
EDITED to take OP preference for output into account
Ahhhh... okay well let me at least save you a bunch of vertical space and neaten things up by overcoming the fact that rstatix doesn't honor the order of your factors and ggpubr wants its groups as character not factor.
library(ggplot2)
library(dplyr)
dsub <- diamonds[ sample(nrow(diamonds), 10000), ]
dsub <- dsub %>%
filter(clarity %in% c('VS2', 'VS1', 'VVS2'))
dsub <- droplevels(dsub)
dsub_stats <-
dsub %>%
group_by(cut) %>%
rstatix::wilcox_test(carat~clarity) %>%
mutate(group1 = factor(group1,
ordered = TRUE,
levels = c("VS2", "VS1", "VVS2"))) %>%
arrange(cut, group1) %>%
mutate(group1 = as.character(group1)) %>%
rstatix::add_xy_position(x='cut')
ggpubr::ggboxplot(dsub, x = "cut", y = "carat",
color = "clarity",
add='jitter') +
ggpubr::stat_pvalue_manual(dsub_stats,
label = "p.adj.signif",
tip.length = 0.01)
Created on 2020-09-24 by the reprex package (v0.3.0)
library(tidyverse)
library(rstatix)
library(ggpubr)
dsub <- diamonds[ sample(nrow(diamonds), 10000), ]
dsub <- dsub %>%
filter(clarity %in% c('VS2', 'VS1', 'VVS2'))
dsub_stats <- dsub %>%
group_by(cut) %>%
wilcox_test(carat~clarity) %>% add_xy_position(x='cut')
ggboxplot(dsub, x = "cut", y = "carat",
color = "clarity",
add='jitter'
) +
stat_pvalue_manual(dsub_stats, label = "p.adj.signif", tip.length = 0.01)

How to use direct.label() with simple (two variables) ggplot2 chart

Here is a simple ggplot chart for two variables:
library("ggplot2")
library("directlabels")
library("tibble")
df <- tibble(
number = 1:10,
var1 = runif(10)*10,
var2 = runif(10)*10
)
ggplot(df, aes(number))+
geom_line(aes(y=var1), color='red')+
geom_line(aes(y=var2), color='blue')
Is it possible to label the last value of var1 and var2 using the expression like that:
direct.label(df, 'last.points')
In my case I get an error:
Error in UseMethod("direct.label") :
no applicable method for 'direct.label' applied to an object of
class
Maria, you initially need to structure your data frame by "stacking data". I like to use the melt function of the reshape2 package. This will allow you to use only one geom_line.
Later you need to generate an object from ggplot2. And this object you must apply the directlabels package.
library(ggplot2)
library(directlabels)
library(tibble)
library(dplyr)
library(reshape2)
set.seed(1)
df <- tibble::tibble(number = 1:10,
var1 = runif(10)*10,
var2 = runif(10)*10)
df <- df %>%
reshape2::melt(id.vars = "number")
p <- ggplot2::ggplot(df) +
geom_line(aes(x = number, y = value, col = variable), show.legend = F) +
scale_color_manual(values = c("red", "blue"))
p
directlabels::direct.label(p, 'last.points')

Facet skip value x-axis

Im working on this df:
library("ggplot2")
library("reshape2")
library("tidyr")
library("scales")
library("dplyr")
Col0 <- c("AA", "BB", "CC", "DD","EE","FF")
D01012015 <- c(2,2,2,6,1,NA)
D02012015 <- c(2,2,2,1,3,1)
D03012015 <- c(2,2,3,4,6,4)
D04012015 <- c(2,2,3,1,2,4)
D05012015 <- c(2,1,1,1,1,0)
D06012015 <- c(2,4,2,5,4,9)
D07012015 <- c(2,4,2,5,4,1)
D08012015 <- c(2,2,3,4,5,3)
D09012015 <- c(1,3,3,2,2,1)
D10012015 <- c(1,3,3,2,2,1)
D11012015 <- c(1,3,3,2,4,1)
D12012015 <- c(1,3,3,4,2,1)
D13012015 <- c(1,3,5,2,2,1)
D14012015 <- c(1,3,3,7,2,1)
D15012015 <- c(1,3,3,7,2,7)
df<-data.frame(Col0,D01012015,D02012015,D03012015,D04012015,D05012015,D06012015,D07012015,D08012015,D09012015,D10012015,D11012015,
D12012015,D13012015,D14012015,D15012015)
I know that normally, if i'd like to print a value per week on the x axis i should create this ggplot function:
f<-melt(df,id =c("Col0"))
f$date<-as.Date(f$variable, format="D%d%m%Y")
pl<- ggplot(f, aes(date, value, fill=Col0))+ geom_line(aes(color=Col0,group=Col0))+ scale_x_date(breaks = date_breaks("1 week"))
My problem is that i have to create the same x axis values, using this function:
plotfun = function(data) {
xval<-"dates"
column<- names(data)[1]
data %>%
gather_(xval, "Val", select_vars_(names(.),
names(.),
exclude = column)) %>%
ggplot(aes_string(xval, "Val", group = column, col = column)) +
facet_grid(as.formula(paste(column, "~."))) +
geom_line()
}
plotfun(df)
I don't know how to transform in dates the x values with gather and how to jump values as in the previous ggplot function
Can you not just put in a mutate statement?
plotfun <- function(data) {
xval <- "dates"
column <- names(data)[1]
data %>%
gather_(xval, "Val", select_vars_(names(.),
names(.),
exclude = column)) %>%
mutate(dates = as.Date(f$variable, format = "D%d%m%Y")) %>%
ggplot(aes_string(xval, "Val", group = column, col = column)) +
facet_grid(as.formula(paste(column, "~."))) +
geom_line()
}
plotfun(df)

Resources