Related
I have a plot that looks at 2 quarts worth of data. I also included a target value (dashed line) and a YTD section (which is the cumsum(count).
I am having an issue trying to show the # in that section added for YTD but only for 1 of the quarters (since Q1 should already have a value inside the bar plot). Currently it is showing 0 and 2 in the plot below but I only want to show everything > Q1 values.
Current plot
I have tried with this current approach but does not seem to work:
**geom_text(aes(label = ifelse((quarter_2022= "Q1"), total_attainment, ifelse(quarter_2022="Q2",total_attainment+2)),
position = position_stack(vjust = 1))) +**
Plot Code
ggplot(df1, aes(x=quarter_2022, y=total_attainment)) +
geom_col(aes(y = YTD_TOTAL), fill = c("green1", "green2"), color = "black") +
geom_text(aes(y = YTD_TOTAL, label = scales::percent(YTD_PERCENT_ATTAINMENT)),
vjust = -0.5) +
geom_col(fill = "gray70", color = "gray20") +
geom_text(aes(label = YTD_TOTAL - total_attainment),
position = position_stack(vjust = 1.25))+
geom_text(aes(label = total_attainment),
position = position_stack(vjust = 0.5))+
geom_segment(aes(x = as.numeric(as.factor(quarter_2022)) - 0.4,
xend = as.numeric(as.factor(quarter_2022)) + 0.4,
y = attainment_target, yend = attainment_target),
linetype = "dashed") +
geom_text(aes(label = attainment_target),
position = position_stack(vjust = 4))
Here is the data:
structure(list(attainment_target = c(7.5, 15), quarter_2022 = c("Q1",
"Q2"), year = structure(c(1640995200, 1640995200), class = c("POSIXct",
"POSIXt"), tzone = ""), total_attainment = c(2, 4), percent_attainment_by_quarter = c(0.2666,
0.2666), ytd = c(2, 6), YTD_TOTAL = c(2, 6), YTD_PERCENT_ATTAINMENT = c(0.266666666666667,
0.4)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
))
Create a logical column in your dataset that indicates whether the label is 0. In the geom_text that creates the label, set the color aesthetic to the logical column. Use scale_color_manual(values = c(NA, "black"), na.value = NA) to assign no color to the labels that were 0s.
The dput(Q_Sheet) is below. How can properly introduce a second y-axis that is different in scale from the primary axis?
structure(list(Amino_acids = c(4, 12, 20, 28, 32), Protein_length_Ang = c(7,
24, 40, 56, 64), length_no_ratio = c(1.75, 2, 2, 2, 2), Corrected_aa = c(1.24459201924769e-12,
3.71007650662474e-09, 1.10594599229843e-05, 0.0319159404863842,
0.642857142857143), aa_frequency = c(3.99735380592756, 6.96840672963299,
4.58228895300999, 3.12310921028256, 4.67560843680985), T_degC = c(50.3857804818545,
52.8464583426248, 60.0760389538482, 58.1895053328481, 67.628202708438
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
), na.action = structure(c(`2` = 2L, `4` = 4L, `6` = 6L), class = "omit"))
`
ggplot(data = Q_Sheet, aes(x = T_degC))+
geom_line(aes(y = Amino_acids), color="red")+
geom_line(aes(y = Corrected_aa), color = "blue") +
scale_y_continuous(name = "Amino_acids", sec.axis = sec_axis(~.*10, name = "Corrected_aa"))
The output is as follows:
<ScaleContinuousPosition>
Range:
Limits: 0 -- 1
You can use the below formula to keep the secondary Y-axis at the same level as Corrected_aa.
library(ggplot2)
ggplot(data=Q_Sheet, aes(x=T_degC))+
geom_line(aes(y=Amino_acids),color="red")+
geom_line(aes(y=Corrected_aa),color="blue")+
scale_y_continuous(name="Amino_acids",
sec.axis=sec_axis(~{
a <- min(Q_Sheet$Corrected_aa)
b <- max(Q_Sheet$Corrected_aa)
((((b-a) * (. - min(.)))/diff(range(.))) + a)
},name="Corrected_aa"))
There are two issues - 1) scale_y_continuous typo and 2) there is a missing + connecting the last expression
ggplot(data=Q_Sheet, aes(x=T_degC))+
geom_line(aes(y=Amino_acids),color="red")+
geom_line(aes(y=Corrected_aa),color="blue") +
scale_y_continuous(name="Amino_acids",
sec.axis=sec_axis(~.*10,name="Corrected_aa"))
-ouptut
We could define a coefficient and then color the lines to indicate wich lines belongs to which y-scale:
library(ggplot2)
value used to transform the data
coeff <- 0.01
# colors
Amino_acidsColor = "red"
Corrected_aaColor = "blue"
ggplot(data=Q_Sheet, aes(x=T_degC))+
geom_line(aes(y=Amino_acids), size = 2, color=Amino_acidsColor)+
geom_line(aes(y=Corrected_aa/coeff), size = 2, color=Corrected_aaColor) +
scale_y_continuous(name="Amino_acids",
sec.axis=sec_axis(~.*coeff,name="Corrected_aa")) +
theme_bw() +
theme(
axis.title.y = element_text(color = Amino_acidsColor, size=13),
axis.title.y.right = element_text(color = Corrected_aaColor, size=13)
)
This is what is the output.I have a data set which contains unit, weight of each unit and compliance score for each unit in year 2016.
I was not able to add the table but here is the screenshot for the data in csv
I have named the columns in the data as unit, weight and year(which is compliance score) .
I want to create a sunburst chart where the first ring will be the unit divided based on weight and the second ring will be the same but will have labels compliance score.
The colour for each ring will be different.
I was able to do some code with the help from an online blog and the output I have gotten is similar to what I want but I am facing difficulty in positioning of the labels and also the colour coding for each ring
#using ggplot
library(ggplot2) # Visualisation
library(dplyr) # data wrangling
library(scales) # formatting
#read file
weight.eg = read.csv("Dummy Data.csv", header = FALSE, sep =
";",encoding = "UTF-8")
#change column names
colnames(weight.eg) <- c ("unit","weight","year")
#as weight column is factor change into integer
weight.eg$weight = as.numeric(levels(weight.eg$weight))
[as.integer(weight.eg$weight)]
weight.eg$year = as.numeric(levels(weight.eg$year))
[as.integer(weight.eg$year)]
#Nas are introduced, remove
weight.eg <- na.omit(weight.eg)
#Sum of the total weight
sum_total_weight = sum(weight.eg$weight)
#First layer
firstLevel = weight.eg %>% summarize(total_weight=sum(weight))
sunburst_0 = ggplot(firstLevel) # Just a foundation
#this will generate a bar chart
sunburst_1 =
sunburst_0 +
geom_bar(data=firstLevel, aes(x=1, y=total_weight),
fill='darkgrey', stat='identity') +
geom_text(aes(x=1, y=sum_total_weight/2, label=paste("Total
Weight", comma(total_weight))), color='black')
#View
sunburst_1
#this argument is used to rotate the plot around the y-axis which
the total weight
sunburst_1 + coord_polar(theta = "y")
sunburst_2=
sunburst_1 +
geom_bar(data=weight.eg,
aes(x=2, y=weight.eg$weight, fill=weight.eg$weight),
color='white', position='stack', stat='identity', size=0.6)
+
geom_text(data=weight.eg, aes(label=paste(weight.eg$unit,
weight.eg$weight), x=2, y=weight.eg$weight), position='stack')
sunburst_2 + coord_polar(theta = "y")
sunburst_3 =
sunburst_2 +
geom_bar(data=weight.eg,
aes(x=3, y=weight.eg$weight,fill=weight.eg$weight),
color='white', position='stack', stat='identity',
size=0.6)+
geom_text(data = weight.eg,
aes(label=paste(weight.eg$year),x=3,y=weight.eg$weight),position =
'stack')
sunburst_3 + coord_polar(theta = "y")
sunburst_3 + scale_y_continuous(labels=comma) +
scale_fill_continuous(low='white', high='darkred') +
coord_polar('y') + theme_minimal()
Output for dput(weight.eg)
structure(list(unit = structure(2:7, .Label = c("", "A", "B",
"C", "D", "E", "F", "Unit"), class = "factor"), weight = c(30,
25, 10, 17, 5, 13), year = c(70, 80, 50, 30, 60, 40)), .Names =
c("unit",
"weight", "year"), row.names = 2:7, class = "data.frame", na.action
= structure(c(1L,
8L), .Names = c("1", "8"), class = "omit"))
output for dput(firstLevel)
structure(list(total_weight = 100), .Names = "total_weight", row.names
= c(NA,
-1L), na.action = structure(c(1L, 8L), .Names = c("1", "8"), class =
"omit"), class = "data.frame")
So I think I might have some sort of solution for you. I wasn't sure what you wanted to color-code on the outer ring; from your code it seems you wanted it to be the weight again, but it was not obvious to me. For different colour scales per ring, you could use the ggnewscale package:
library(ggnewscale)
For the centering of the labels you could write a function:
cs_fun <- function(x){(cumsum(x) + c(0, cumsum(head(x , -1))))/ 2}
Now the plotting code could look something like this:
ggplot(weight.eg) +
# Note: geom_col is equivalent to geom_bar(stat = "identity")
geom_col(data = firstLevel,
aes(x = 1, y = total_weight)) +
geom_text(data = firstLevel,
aes(x = 1, y = total_weight / 2,
label = paste("Total Weight:", total_weight)),
colour = "black") +
geom_col(aes(x = 2,
y = weight, fill = weight),
colour = "white", size = 0.6) +
scale_fill_gradient(name = "Weight",
low = "white", high = "darkred") +
# Open up new fill scale for next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 3, y = weight, fill = weight),
size = 0.6, colour = "white") +
scale_fill_gradient(name = "Another Weight?",
low = "forestgreen", high = "white") +
geom_text(aes(label = paste0(year), x = 3,
y = cs_fun(weight))) +
coord_polar(theta = "y")
Which looks like this:
I have got this data set and want to generate a sunburst plot. The data is of 4 columns which are unit, weight, year16 and year17. The sunburst is based on the values in the weight column. The code is there and when adding the coding for the third layer it is giving me an error. I think the error is coming when I am adding the third layer.
library("ggnewscale")
library(ggplot2)
#read file
weight.eg = read.csv("Dummy Data.csv", header = FALSE, sep =
";",encoding = "UTF-8")
#change column names
colnames(weight.eg) <- c
("unit","weight","year16","year17")
#check the class
sapply(weight.eg, class)
#View(weight.eg)
#as weight column is factor change into integer
weight.eg$weight = as.numeric(levels(weight.eg$weight))
[as.integer(weight.eg$weight)]
weight.eg$year16 = as.numeric(levels(weight.eg$year16))
[as.integer(weight.eg$year16)]
weight.eg$year17 = as.numeric(levels(weight.eg$year17))
[as.integer(weight.eg$year17)]
#Nas are introduced, remove
weight.eg <- na.omit(weight.eg)
#Sum of the total weight
sum_total_weight = sum(weight.eg$weight)
#First layer
firstLevel = weight.eg %>% summarize(total_weight=sum(weight))
cs_fun <- function(x){(cumsum(x) + c(0, cumsum(head(x , -1))))/ 2}
ggplot(weight.eg) +
geom_col(data = firstLevel,
aes(x = 1, y = total_weight)) +
geom_text(data = firstLevel,
aes(x = 1, y = total_weight / 2,
label = paste("Total Weight:", total_weight)),
colour = "black") +
geom_col(aes(x = 2,
y = weight, fill = weight),
colour = "black", size = 0.6) +
scale_fill_gradient(name = "Weight",
low = "white", high = "lightblue") +
# Open up new fill scale for next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 3, y = weight, fill = year16),
size = 0.6, colour = "black") +
scale_fill_gradient(name = "Year16",
low = "red", high = "green") +
geom_text(aes(label = paste0(unit,year16), x = 3,
y = cs_fun(weight))) +
#next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 4, y = weight, fill = year17),
size = 0.6, colour = "black") +
scale_fill_gradient(name = "Year17",
low = "red", high = "green") +
geom_text(aes(label = paste0(unit,year17), x = 4,
y = cs_fun(weight))) +
coord_polar(theta = "y")
The output for dput(weight.eg) is
structure(list(unit = structure(1:6, .Label = c("A", "B", "C",
"D", "E", "F", "Unit"), class = "factor"), weight = c(30, 25,
10, 17, 5, 13), year16 = c(70, 80, 50, 30, 60, 40), year17 = c(50,
100, 20, 30, 70, 60)), .Names = c("unit", "weight", "year16",
"year17"), row.names = 2:7, class = "data.frame", na.action =
structure(1L, .Names = "1", class = "omit"))
I want to include year17 as well and in the future there will be
columns, so that has to be added as well. Because of the error I
am not able to figure out what is wrong.
I am using ggplot to make a bar chart. I have already used the scales package to change the scientific "2e5" formatting to the full number with commas to separate. I have been unable to change the axis tick labels so that a value of 1,000,000 appears as 1M, and 3,500,000 as 3.5M, etc. How do I do this?
See below for code:
# Generate dummy dataframe
df <- structure(list(month = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
), foo = c(2322636.14889234, 8676432.48522654, 207993.984222412,
3310791.19816422, 7540729.19022292, 7316447.75252789, 2410026.6979076,
6202864.60500211, 8700672.56037146, 1334956.53280988, 505991.168320179,
3106733.97500068)), row.names = c(NA, -12L), class = "data.frame")
# create plot
plot.1 <- ggplot2::ggplot(data = df, aes(x = month, y = foo)) +
geom_bar(stat = 'identity', fill = 'darkorchid4', width = 0.5) +
theme_minimal() +
labs(title = "Monthly foo measurements", x = "Month",
y = "Amount of foo" ) +
scale_y_continuous(labels = scales::comma)
Thanks in advance!
ggplot2::ggplot(data = df, aes(x = month, y = foo)) +
geom_bar(stat = 'identity', fill = 'darkorchid4', width = 0.5) +
theme_minimal() +
labs(title = "Monthly foo measurements", x = "Month",
y = "Amount of foo" ) +
scale_y_continuous(label = scales::unit_format(unit = "M", scale = 1e-6, sep = ""))