Related
i have three variables in my dataset:
school (School)
actual score (actual_score)
expected score (expected_score)
and need to do this graph
So far I have
data%>%
mutate(School=fct_reorder(School, actual_score)
)%>%
ggplot(aes(x=School))+
geom_point(aes(y=actual_score), colour="red")+
geom_point(aes(y= expected_score), colour="blue")
But they are just points... how to connect them?
structure(list(School = structure(c(9L,
6L, 8L, 2L, 1L), levels = c("11278", "11274", "11285", "11289",
"11280", "01424", "11290", "11272", "01206", "11286"), class = "factor"),
actual_score = c(453.4875, 423.375757575758, 441.481481481482,
375.103846153846, 363.621428571429), expected_score = c(452.489150512886,
428.002515274828, 439.209772701724, 384.917346549729, 382.216349569884
)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), .rows = structure(list(
1:5), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
Your dput result is slightly corrupt, so I slightly modified it.
You can use geom_linerange to connect the points.
I also included the rest of the graph as placing the labels is a bit tricky.
library(tidyverse)
data <- tibble(
School = structure(
c(9L, 6L, 8L, 2L, 1L),
levels = c("11278", "11274", "11285", "11289", "11280", "01424", "11290", "11272", "01206", "11286"),
class = "factor"),
actual_score = c(453.4875, 423.375757575758, 441.481481481482, 375.103846153846, 363.621428571429),
expected_score = c(452.489150512886, 428.002515274828, 439.209772701724, 384.917346549729, 382.216349569884))
data%>%
mutate(School = fct_reorder(fct_relabel(School, ~ paste("School", LETTERS[1:(length(.))])), actual_score)) %>%
ggplot(aes(x = School)) +
geom_linerange(aes(ymin = actual_score, ymax = expected_score)) +
geom_point(aes(y = actual_score, color = "Actual", shape = "Acutal"), size = 3) +
geom_text(aes(y = actual_score - 5 + 10 * (actual_score > expected_score), label = round(actual_score))) +
geom_point(aes(y = expected_score, color = "Expected", shape = "Expected"), size = 3) +
geom_text(aes(y = expected_score - 5 + 10 * (actual_score < expected_score), label = round(expected_score))) +
scale_color_manual(name = NULL,
labels = c("Acutal", "Expected"),
values = c("blue", "red")) +
scale_shape_manual(name = NULL,
labels = c("Acutal", "Expected"),
values = c(16, 17)) +
labs(y = "Average NAPLAN score", x = NULL) +
theme_minimal() +
theme(legend.position = "bottom",
panel.grid.major.x = element_blank())
Created on 2022-12-19 with reprex v2.0.2
To connect your points you could use a geom_segment. And to get the different shapes map on the shape aesthetic. Also do the same for color to get a legend reflecting both shape and color. The rest is some styling plus some additional geom_text layers for the labels.
library(dplyr)
library(ggplot2)
library(forcats)
data %>%
mutate(School = fct_reorder(School, actual_score)) %>%
ggplot(aes(x = School)) +
geom_segment(aes(xend = School, y = actual_score, yend = expected_score),
colour = "grey80", linewidth = 1
) +
geom_point(aes(y = actual_score, colour = "Actual", shape = "Actual"), size = 3) +
geom_point(aes(y = expected_score, colour = "Expected", shape = "Expected"), size = 3) +
geom_label(aes(
y = actual_score, label = round(actual_score),
vjust = ifelse(actual_score > expected_score, 0, 1)
), label.size = NA, label.padding = unit(10, "pt"), fill = NA) +
geom_label(aes(
y = expected_score, label = round(expected_score),
vjust = ifelse(expected_score > actual_score, 0, 1)
), label.size = NA, label.padding = unit(10, "pt"), fill = NA) +
scale_color_manual(values = c("red", "blue")) +
scale_shape_manual(values = c(16, 17)) +
scale_y_continuous(breaks = seq(320, 480, 40), limits = c(320, 480)) +
labs(color = NULL, shape = NULL, x = NULL, y = "Average NAPLAN Score") +
theme_minimal() +
theme(
legend.position = "bottom",
axis.title.y = element_text(face = "bold"),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank()
)
DATA
data <- structure(list(
School = structure(c(9L, 6L, 8L, 2L, 1L), levels = c(
"11278", "11274", "11285", "11289",
"11280", "01424", "11290", "11272", "01206", "11286"
), class = "factor"),
actual_score = c(
453.4875, 423.375757575758, 441.481481481482,
375.103846153846, 363.621428571429
), expected_score = c(
452.489150512886,
428.002515274828, 439.209772701724, 384.917346549729, 382.216349569884
)
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L), .rows = structure(list(1:5), ptype = integer(0), class = c(
"vctrs_list_of",
"vctrs_vctr", "list"
)))
This is my data which I'm trying to plot
dput(results)
structure(list(ontology = c("CC", "BP", "MF", "CC", "BP", "MF",
"CC", "BP", "MF"), breadth = structure(c(3L, 3L, 3L, 2L, 2L,
2L, 1L, 1L, 1L), .Label = c("10", "30", "100"), class = "factor"),
enrichment = c(4.09685904270847, 8.04193317540539, 5.5801230522415,
4.52127958016442, 8.9221766387218, 5.68189764335457, 4.25046722366786,
9.49038239297713, 6.75423163834793), p = c(0, 0, 0, 0, 0,
0, 2.09057402562873e-221, 0, 0)), class = "data.frame", row.names = c(NA,
-9L))
My code
results = read.delim("data/GO/LC-GO-enrichment_new.txt") %>%
mutate(breadth = factor(breadth))
p = ggplot(results, aes(x = breadth, y = enrichment, fill = ontology,
color = ontology)) +
geom_col(position = 'dodge', width = 0.8) +
labs(x = "Breadth", y = "Odds ratio") +
scale_fill_manual(values = ryb8[c(1, 5, 8)], name = "Ontology") +
scale_color_manual(values = darken(ryb8[c(1, 5, 8)], 1.3),
name = "Ontology") +
scale_y_log10(expand = c(0.01, 0)) +
sci_theme
p
I get something like this
is there a way the pvalue can be added similar to this
or its done post making the figure manually .
Any help or suggestion would be really helpfu;
You could simply add the p values as a text layer. Note though, that in your data, each bar has a p value, so it's not clear where the groupwise p values are coming from.
library(ggplot2)
ggplot(results, aes(x = breadth, y = enrichment, fill = ontology)) +
geom_col(position = 'dodge', width = 0.8,
aes(color = after_scale(colorspace::darken(fill, 1.3)))) +
geom_text(aes(label = paste("p", scales::pvalue(p)), group = ontology),
vjust = -1, position = position_dodge(width = 0.8)) +
labs(x = "Breadth", y = "Odds ratio", fill = "Ontology") +
scale_fill_manual(values = c("#d63228", "#dff2f8", "#4575b5")) +
scale_y_log10(expand = c(0.05, 0)) +
theme_classic(base_size = 16) +
theme(legend.position = "top")
Hi, I'm trying to make a plot of a survey question with a likert scale using ggplots. I need to help to organize the middle "neutral" values correctly. I have used two data frames, one for the left side "low_col", and one for the right side "high_col", both have the neutral value divided by 2. This is the script I used for the plot, and how the graph looks. I would highly appreciate all the advice to correct the order, and also all the help to add the percentages that I have in both data frames in a column named per. I hope someone can help me. Thanks
ggplot()+ geom_bar(data = high_col, mapping = aes(x=Q6, y=per, fill=col), position = "stack", stat = "identity")+ geom_bar(data= low_col, mapping = aes(x=Q6, y=-per, fill=col), position = "stack", stat = "identity")+ geom_hline(yintercept = 0, color=c("white"))+coord_flip() +scale_fill_identity("", labels = mylevels, breaks=legend.pal, guide="legend") + theme_fivethirtyeight() + theme(plot.title = element_text(size=14, hjust=0.5)) + theme(axis.text.y = element_text(hjust=0)) + theme(legend.position = "bottom")
The structure of the low_col and high_col is as follows:
For high_col:
dput(high_col)
structure(list(Q6 = c("General", "0", "1", "2", "General", "0", "1", "2", "3", "General", "0", "1", "2", "3"), Q75 = c("Ni satisfecho, ni insatisfecho", "Ni satisfecho, ni insatisfecho", "Ni satisfecho, ni insatisfecho", "Ni satisfecho, ni insatisfecho", "Satisfecho", "Satisfecho", "Satisfecho", "Satisfecho", "Satisfecho", "Totalmente satisfecho", "Totalmente satisfecho", "Totalmente satisfecho", "Totalmente satisfecho", "Totalmente satisfecho"), n = c(5, 1, 3, 1, 53, 25, 19, 7, 2, 104, 52, 35, 14, 3), per = c(1.48809523809524, 0.609756097560975, 2.58620689655172, 2.17391304347826, 31.547619047619, 30.4878048780487, 32.7586206896551, 30.4347826086956, 40, 61.9047619047619, 63.4146341463414, 60.3448275862069, 60.8695652173913, 60), col = c("#DFDFDF", "#DFDFDF", "#DFDFDF", "#DFDFDF", "#92C5DE", "#92C5DE", "#92C5DE", "#92C5DE", "#92C5DE", "#0571B0", "#0571B0", "#0571B0", "#0571B0", "#0571B0")), row.names = c(NA, -14L), class = c("tbl_df", "tbl", "data.frame"))
For low_col:
dput(low_col)
structure(list(Q6 = c("General", "0", "General", "0", "1", "2", "General", "0", "1", "2"), Q75 = structure(c(2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L), .Label = c("Totalmente insatisfecho", "Insatisfecho", "Ni satisfecho, ni insatisfecho"), class = "factor"), n = c(2, 2, 5, 1, 3, 1, 4, 2, 1, 1), per = c(1.19047619047619, 2.4390243902439, 1.48809523809524, 0.609756097560975, 2.58620689655172, 2.17391304347826, 2.38095238095238, 2.4390243902439, 1.72413793103448, 4.34782608695652), col = c("#F4A582", "#F4A582", "#DFDFDF", "#DFDFDF", "#DFDFDF","#DFDFDF", "#CA0020", "#CA0020", "#CA0020", "#CA0020")), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame")).
As a reference, I'm trying to follow two blogs where some individuals already did this kind of likert plots. In the first, it uses the color "col" as fill in the geom_bar, however, in my case the order is not working properly. And the result is the graph I've attached.
In the second blog, I used the following script, but I cannot change the colors, nor the order of the legend, as the labels appear in alphabetical order, and the colors are selected by default. Thanks in advance for all the insights, and info.
ggplot()+ geom_bar(data = high_col, aes(x=Q6, y=per, fill=Q75), position = position_stack(reverse = TRUE), stat = "identity") + geom_bar(data= low_col, aes(x=Q6, y=-per, fill=Q75), position = "stack", stat = "identity") +coord_flip() + theme_fivethirtyeight() + theme(plot.title = element_text(size=14, hjust=0.5)) + theme(axis.text.y = element_text(hjust=0)) + theme(legend.position = "bottom") + scale_y_continuous(breaks = seq(-100,100, 5), limits = c(-25, 100)) + scale_color_manual(labels=c("Totalmente insatisfecho", "Insatisfecho", "Ni satisfecho, ni insatisfecho", "Satisfecho", "Totalmente satisfecho"), values=legend.pal, guide="legend")+geom_text(data = pro_labels, mapping = aes(x=Q6, y=left, label=paste(round(left), "%", sep = "")), hjust=2, color="white", size=3, position = "stack")+geom_text(data = pro_labels, mapping = aes(x=Q6, y=center, label=paste(round(center), "%")), hjust=1, color="white", size=3, position = "stack")+geom_text(data = pro_labels, mapping = aes(x=Q6, y=right, label=paste(round(right), "%")), hjust=2, color="white", size=3, position = "stack")+geom_hline(yintercept = 0, color=c("grey")) + scale_colour_manual("", values = legend.pal, guide="legend")
Updated version:
Hi, I have been trying to work this out and with the following code I got the neutral value in the middle, yet the colors do not match.
ggplot()+ geom_bar(data = high_col, aes(x=Q6, y=per, fill=Q75), position = position_stack(reverse = TRUE), stat = "identity") + geom_bar(data= low_col, aes(x=Q6, y=-per, fill=Q75), position = "stack", stat = "identity") +coord_flip() + theme_fivethirtyeight() + theme(plot.title = element_text(size=14, hjust=0.5)) + theme(axis.text.y = element_text(hjust=0)) + theme(legend.position = "bottom") + scale_y_continuous(breaks = seq(-100,100, 5), limits = c(-25, 100))+geom_text(data = pro_labels, mapping = aes(x=Q6, y=left, label=paste(round(left), "%", sep = "")), hjust=2, color="white", size=3, position = "stack")+geom_text(data = pro_labels, mapping = aes(x=Q6, y=center, label=paste(round(center), "%")), hjust=1, color="white", size=3, position = "stack")+geom_text(data = pro_labels, mapping = aes(x=Q6, y=right, label=paste(round(right), "%")), hjust=2, color="white", size=3, position = "stack")+ geom_hline(yintercept = 0, color=c("grey"))+ scale_fill_discrete(labels= c("Totalmente insatisfecho", "Insatisfecho", "Ni satisfecho, ni insatisfecho", "Satisfecho", "Totalmente satisfecho"))
The result is the second graph attached here.
This is what is the output.I have a data set which contains unit, weight of each unit and compliance score for each unit in year 2016.
I was not able to add the table but here is the screenshot for the data in csv
I have named the columns in the data as unit, weight and year(which is compliance score) .
I want to create a sunburst chart where the first ring will be the unit divided based on weight and the second ring will be the same but will have labels compliance score.
The colour for each ring will be different.
I was able to do some code with the help from an online blog and the output I have gotten is similar to what I want but I am facing difficulty in positioning of the labels and also the colour coding for each ring
#using ggplot
library(ggplot2) # Visualisation
library(dplyr) # data wrangling
library(scales) # formatting
#read file
weight.eg = read.csv("Dummy Data.csv", header = FALSE, sep =
";",encoding = "UTF-8")
#change column names
colnames(weight.eg) <- c ("unit","weight","year")
#as weight column is factor change into integer
weight.eg$weight = as.numeric(levels(weight.eg$weight))
[as.integer(weight.eg$weight)]
weight.eg$year = as.numeric(levels(weight.eg$year))
[as.integer(weight.eg$year)]
#Nas are introduced, remove
weight.eg <- na.omit(weight.eg)
#Sum of the total weight
sum_total_weight = sum(weight.eg$weight)
#First layer
firstLevel = weight.eg %>% summarize(total_weight=sum(weight))
sunburst_0 = ggplot(firstLevel) # Just a foundation
#this will generate a bar chart
sunburst_1 =
sunburst_0 +
geom_bar(data=firstLevel, aes(x=1, y=total_weight),
fill='darkgrey', stat='identity') +
geom_text(aes(x=1, y=sum_total_weight/2, label=paste("Total
Weight", comma(total_weight))), color='black')
#View
sunburst_1
#this argument is used to rotate the plot around the y-axis which
the total weight
sunburst_1 + coord_polar(theta = "y")
sunburst_2=
sunburst_1 +
geom_bar(data=weight.eg,
aes(x=2, y=weight.eg$weight, fill=weight.eg$weight),
color='white', position='stack', stat='identity', size=0.6)
+
geom_text(data=weight.eg, aes(label=paste(weight.eg$unit,
weight.eg$weight), x=2, y=weight.eg$weight), position='stack')
sunburst_2 + coord_polar(theta = "y")
sunburst_3 =
sunburst_2 +
geom_bar(data=weight.eg,
aes(x=3, y=weight.eg$weight,fill=weight.eg$weight),
color='white', position='stack', stat='identity',
size=0.6)+
geom_text(data = weight.eg,
aes(label=paste(weight.eg$year),x=3,y=weight.eg$weight),position =
'stack')
sunburst_3 + coord_polar(theta = "y")
sunburst_3 + scale_y_continuous(labels=comma) +
scale_fill_continuous(low='white', high='darkred') +
coord_polar('y') + theme_minimal()
Output for dput(weight.eg)
structure(list(unit = structure(2:7, .Label = c("", "A", "B",
"C", "D", "E", "F", "Unit"), class = "factor"), weight = c(30,
25, 10, 17, 5, 13), year = c(70, 80, 50, 30, 60, 40)), .Names =
c("unit",
"weight", "year"), row.names = 2:7, class = "data.frame", na.action
= structure(c(1L,
8L), .Names = c("1", "8"), class = "omit"))
output for dput(firstLevel)
structure(list(total_weight = 100), .Names = "total_weight", row.names
= c(NA,
-1L), na.action = structure(c(1L, 8L), .Names = c("1", "8"), class =
"omit"), class = "data.frame")
So I think I might have some sort of solution for you. I wasn't sure what you wanted to color-code on the outer ring; from your code it seems you wanted it to be the weight again, but it was not obvious to me. For different colour scales per ring, you could use the ggnewscale package:
library(ggnewscale)
For the centering of the labels you could write a function:
cs_fun <- function(x){(cumsum(x) + c(0, cumsum(head(x , -1))))/ 2}
Now the plotting code could look something like this:
ggplot(weight.eg) +
# Note: geom_col is equivalent to geom_bar(stat = "identity")
geom_col(data = firstLevel,
aes(x = 1, y = total_weight)) +
geom_text(data = firstLevel,
aes(x = 1, y = total_weight / 2,
label = paste("Total Weight:", total_weight)),
colour = "black") +
geom_col(aes(x = 2,
y = weight, fill = weight),
colour = "white", size = 0.6) +
scale_fill_gradient(name = "Weight",
low = "white", high = "darkred") +
# Open up new fill scale for next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 3, y = weight, fill = weight),
size = 0.6, colour = "white") +
scale_fill_gradient(name = "Another Weight?",
low = "forestgreen", high = "white") +
geom_text(aes(label = paste0(year), x = 3,
y = cs_fun(weight))) +
coord_polar(theta = "y")
Which looks like this:
I have got this data set and want to generate a sunburst plot. The data is of 4 columns which are unit, weight, year16 and year17. The sunburst is based on the values in the weight column. The code is there and when adding the coding for the third layer it is giving me an error. I think the error is coming when I am adding the third layer.
library("ggnewscale")
library(ggplot2)
#read file
weight.eg = read.csv("Dummy Data.csv", header = FALSE, sep =
";",encoding = "UTF-8")
#change column names
colnames(weight.eg) <- c
("unit","weight","year16","year17")
#check the class
sapply(weight.eg, class)
#View(weight.eg)
#as weight column is factor change into integer
weight.eg$weight = as.numeric(levels(weight.eg$weight))
[as.integer(weight.eg$weight)]
weight.eg$year16 = as.numeric(levels(weight.eg$year16))
[as.integer(weight.eg$year16)]
weight.eg$year17 = as.numeric(levels(weight.eg$year17))
[as.integer(weight.eg$year17)]
#Nas are introduced, remove
weight.eg <- na.omit(weight.eg)
#Sum of the total weight
sum_total_weight = sum(weight.eg$weight)
#First layer
firstLevel = weight.eg %>% summarize(total_weight=sum(weight))
cs_fun <- function(x){(cumsum(x) + c(0, cumsum(head(x , -1))))/ 2}
ggplot(weight.eg) +
geom_col(data = firstLevel,
aes(x = 1, y = total_weight)) +
geom_text(data = firstLevel,
aes(x = 1, y = total_weight / 2,
label = paste("Total Weight:", total_weight)),
colour = "black") +
geom_col(aes(x = 2,
y = weight, fill = weight),
colour = "black", size = 0.6) +
scale_fill_gradient(name = "Weight",
low = "white", high = "lightblue") +
# Open up new fill scale for next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 3, y = weight, fill = year16),
size = 0.6, colour = "black") +
scale_fill_gradient(name = "Year16",
low = "red", high = "green") +
geom_text(aes(label = paste0(unit,year16), x = 3,
y = cs_fun(weight))) +
#next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 4, y = weight, fill = year17),
size = 0.6, colour = "black") +
scale_fill_gradient(name = "Year17",
low = "red", high = "green") +
geom_text(aes(label = paste0(unit,year17), x = 4,
y = cs_fun(weight))) +
coord_polar(theta = "y")
The output for dput(weight.eg) is
structure(list(unit = structure(1:6, .Label = c("A", "B", "C",
"D", "E", "F", "Unit"), class = "factor"), weight = c(30, 25,
10, 17, 5, 13), year16 = c(70, 80, 50, 30, 60, 40), year17 = c(50,
100, 20, 30, 70, 60)), .Names = c("unit", "weight", "year16",
"year17"), row.names = 2:7, class = "data.frame", na.action =
structure(1L, .Names = "1", class = "omit"))
I want to include year17 as well and in the future there will be
columns, so that has to be added as well. Because of the error I
am not able to figure out what is wrong.