How to combine multiple ggplot geom_col() into one graph? - r

Question
I am trying to combine three ggplot geom_bar into one geom_bar plot utilising dodge so I can visually compare data across two categorical and one numeric variables. What am I doing wrong?
Individual graphs work
Each graph works on it's own (with formatting issues) and I've been following answers on SO like How to overlay two geom_bar? but I'm not understanding what's needed to be done.
ONE <- ggplot(Ireland, aes(TargetGroup, FirstDosePC))+geom_bar(stat = 'identity',width = 0.8, fill = "green") +
facet_grid(.~Vaccine) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title="1st Dose Ireland by Group & Vaccine Type",
caption = "(ECDC, 2021)",
x="Target Groups over 18",
y="First Dose Administered")
TWO <- ggplot(Italy, aes(TargetGroup, FirstDosePC))+ geom_bar(stat = 'identity',width = 0.8, fill = "blue") +
facet_grid(.~Vaccine) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title="1st Dose Italy by Group & Vaccine Type",
caption = "(ECDC, 2021)",
x="Target Groups over 18",
y="First Dose Administered")
THREE <- ggplot(Latvia, aes(TargetGroup, FirstDosePC))+geom_bar(stat = 'identity',width = 0.8, fill = "red") +
facet_grid(.~Vaccine) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title="1st Dose Latvia by Group & Vaccine Type",
caption = "(ECDC, 2021)",
x="Target Groups over 18",
y="First Dose Administered")
An example of failed code
My coding attempts look close to this but it seems to fail - I don't understand why. I am hoping to learn how to add three graphs together with labels and to use dodge
OneTwo <- ONE + geom_bar(FDPercent=Italy, aes(TargetGroup, FirstDose))+ geom_bar(stat = 'identity',width = 0.8, fill = "blue") +
facet_grid(.~Vaccine) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title="1st Dose Italy by Group & Vaccine Type",
caption = "(ECDC, 2021)",
x="Target Groups over 18",
y="First Dose Administered")
My individual graphs look like this
The graph type I'm aiming for
and what I am aiming for is something like this but breaking it out by vaccine type to stretch my learning, etc (source https://towardsdatascience.com/track-covid-19-data-yourself-with-r-eb3e641cd4b3)
My raw data comes from
data <- read.csv("https://opendata.ecdc.europa.eu/covid19/vaccine_tracker/csv/data.csv", na.strings = "", fileEncoding = "UTF-8-BOM")
and is manipulated to test out R functions that has left me with a dataframe called FDPercent with a numeric column called FirstDosePC (Percentage of 1st Dose per country population) that is linked to Country with 30 EU countries (ISO 3166-1-alpha-2 categorical data) and 10 TargetGroup types (categorical) in the data frame.
> dput(head(FDPercent,3))
structure(list(Country = c("AT", "AT", "AT"), NumberDosesReceived = c(0L,
0L, 61425L), NumberDosesExported = c(0L, 0L, 0L), FirstDose = c(0L,
0L, 87L), FirstDoseRefused = c(NA_integer_, NA_integer_, NA_integer_
), SecondDose = c(0L, 0L, 0L), UnknownDose = c(0L, 0L, 0L), TargetGroup = c("Age18_24",
"Age18_24", "Age18_24"), Vaccine = c("UNK", "AZ", "COM"), Population = c(8901064L,
8901064L, 8901064L), Date = structure(c(18624, 18624, 18624), class = "Date"),
FirstDosePC = c("0.0000", "0.0000", "0.0010")), row.names = 21:23, class = "data.frame")
> str(FDPercent)
'data.frame': 116532 obs. of 12 variables:
$ Country : chr "AT" "AT" "AT" "AT" ...
$ NumberDosesReceived: int 0 0 61425 0 0 0 61425 0 0 0 ...
$ NumberDosesExported: int 0 0 0 0 0 0 0 0 0 0 ...
$ FirstDose : int 0 0 87 0 0 0 1299 0 0 0 ...
$ FirstDoseRefused : int NA NA NA NA NA NA NA NA NA NA ...
$ SecondDose : int 0 0 0 0 0 0 0 0 0 0 ...
$ UnknownDose : int 0 0 0 0 0 0 0 0 0 0 ...
$ TargetGroup : chr "Age18_24" "Age18_24" "Age18_24" "Age18_24" ...
$ Vaccine : chr "UNK" "AZ" "COM" "MOD" ...
$ Population : int 8901064 8901064 8901064 8901064 8901064 8901064 8901064 8901064 8901064 8901064 ...
$ Date : Date, format: "2020-12-28" "2020-12-28" "2020-12-28" "2020-12-28" ...
$ FirstDosePC : chr "0.0000" "0.0000" "0.0010" "0.0000" ...

With help from #kat in the comments - changed from geom_bar() to geom_col() and dropped the third variable
Ireland <- subset(FDPercent, Country == "IE") #contructed a subset by country
Italy <- subset(FDPercent, Country == "IT")
Latvia <- subset(FDPercent, Country == "LV")
ONE1 <- ggplot(Italy, aes(Date, as.numeric(FirstDosePC))) +
geom_col(fill = "red", alpha = 1, width = 7) + theme_minimal(base_size = 8) +
xlab(NULL) + ylab(NULL) + scale_x_date(date_labels = "%Y/%m/%d") #reduced the theme formating
OneTwo <- ONE1 + geom_col(data=Ireland, aes(Date,
as.numeric(FirstDosePC)),
fill="Green", alpha = 1,width = 5)
OneTwoThree <- OneTwo + geom_col(data=Latvia, aes(Date,
as.numeric(FirstDosePC)),
fill="black", alpha = 1, width = 2)
OneTwoThree + labs(title="Ireland, Italy, & Latvia - First Dose comparision", #added labels to the data
subtitle="Using First Dose Delivered per day as a percentage of population",
caption = "(ECDC, 2021)",
x="Date Administered",
y="% Population treated")

Related

data restructure for ggplot geom_bar() stacked bar plotting

avg data frame
structure(list(cluster = 1:10, `B cells` = c(0.0369711424087593,
0.00526325696315245, 0.0601665087700304, 0.0231936137674591,
0.00766480549892195, 0.0285649960414246, 0.0044030329888148,
0.00345795624392323, 0.00309644760567017, 0.00757469580646642
), DCreg = c(0.0304752063136609, 0.174423402403555, 0.0163287878795231,
0.0192154395050034, 0.124511133655915, 0.0296144152010606, 0.205920199256583,
0.114542510479173, 0.485649315606826, 0.0260997195368302), `Dendritic cells` = c(0.156500506395882,
0.0106345235402551, 0.185348445999056, 0.395476210792188, 0.0719924126421944,
0.104614178324861, 0.0226961213600642, 0.00292885066859525, 0.0122661582750054,
0.118394797602606), `Dendritic cells CD103` = c(0.0482626330670718,
0.0140976438812366, 0.030373962919268, 0.0614351282717271, 0.189884617234425,
0.35658217311524, 0.0170390739879794, 0.0042469791834164, 0.0233514821789908,
0.0619204360724114), Endothelium = c(0.11337268119519, 0.027025412632833,
0.43869939276274, 0.0662483745710424, 0.0331520081202891, 0.164940771021627,
0.050135082662031, 0.00351285357934976, 0.0201434603120533, 0.0658151087814588
), Epithelium = c(0.00418217375070304, 0.000413203430326014,
0.0104665752013841, 0.00525017082076173, 0.00415698684351819,
0.0333637286413386, 0.000431569929321054, 0, 0.0011976402913935,
0.000419107154908937), Fibroblasts = c(0.00612607297867521, 0.0116371963351148,
0.0108995123396445, 0.0117009481628146, 0.00674570810846355,
0.0145571600114712, 0.0120879220427041, 0.00272604244680674,
0.00772202564316953, 0.0272894372187893), `Macrophages other` = c(0.00101589948056542,
0.000645130694683314, 0, 0, 0.000639755622911849, 0, 0.000197788594031649,
0.00136588418173722, 0, 0.000420171738310913), `Macrophages type 1` = c(0.221136736926214,
0.0101728310491049, 0.0295121583899105, 0.0455316207473085, 0.0230660380060092,
0.0222078529371378, 0.015179095607796, 0.00459851371158574, 0.0112212936162074,
0.02937463664781), `Macrophages type 2` = c(0.0411011962682536,
0.0522714029078864, 0.012334445025602, 0.0568282306829578, 0.0453391303748083,
0.0181451496347937, 0.239616155787136, 0.0115489617356957, 0.04981525808734,
0.462030477544264), Neutrophils = c(0.0766806635700175, 0.00442125133471751,
0.0476726698091672, 0.0236749605376406, 0.00911361867045396,
0.0236169696110325, 0.00537803767758349, 0.0032239571528306,
0.00201957474248881, 0.0160311845078706), `NK cells` = c(0, 0,
0.000108464194313773, 0, 0, 8.99698299254026e-05, 0.000114169258081956,
0, 4.57749702462694e-05, 2.78396436525612e-05), `T cells CD4` = c(0.0330641154468336,
0.0213946654236908, 0.0323515137814534, 0.148686432010321, 0.0500449048718068,
0.0685338874314457, 0.0273478878575203, 0.00472971607890761,
0.0328998359523529, 0.0354818425253482), `T cells CD8` = c(0.0172498783937768,
0.00877876825324442, 0.0156948623402281, 0.0207354640030442,
0.0145536348676947, 0.0146643634343241, 0.0155197086731341, 0.00171509323694132,
0.0135851481885585, 0.0159896002840603), `T reg cells` = c(0.00451599932441037,
0.0058712074137469, 0.00274652046695111, 0.0167445990360021,
0.0127422536359504, 0.0142171857157357, 0.00996063310868601,
0.00089148571457417, 0.0113706843090688, 0.00663049091849752),
Tumour = c(0.0765887917753441, 0.651476092235795, 0.0173767962070959,
0.0647526184622169, 0.395840854655601, 0.0472273714361081,
0.368387800802699, 0.839842321316499, 0.323145170321728,
0.111585860905902), Unclassified = c(0.132756302704642, 0.00147401150065844,
0.0899193839136316, 0.0405261886295129, 0.0105521371910369,
0.0590598276124738, 0.00558572040583437, 0.000668874269964592,
0.00247072989889988, 0.0149145931108126)), class = "data.frame", row.names = c(NA,
-10L))
cluster B cells DCreg Dendritic cells Dendritic cells CD103 Endothelium Epithelium Neutrophils NK cells T cells CD4 T cells CD8 T reg cells Tumour Unclassified
1 1 0.036971142 0.03047521 0.156500506 0.048262633 0.113372681 0.0041821738 0.076680664 0.000000e+00 0.033064115 0.017249878 0.0045159993 0.07658879 0.1327563027
2 2 0.005263257 0.17442340 0.010634524 0.014097644 0.027025413 0.0004132034 0.004421251 0.000000e+00 0.021394665 0.008778768 0.0058712074 0.65147609 0.0014740115
3 3 0.060166509 0.01632879 0.185348446 0.030373963 0.438699393 0.0104665752 0.047672670 1.084642e-04 0.032351514 0.015694862 0.0027465205 0.01737680 0.0899193839
4 4 0.023193614 0.01921544 0.395476211 0.061435128 0.066248375 0.0052501708 0.023674961 0.000000e+00 0.148686432 0.020735464 0.0167445990 0.06475262 0.0405261886
5 5 0.007664805 0.12451113 0.071992413 0.189884617 0.033152008 0.0041569868 0.009113619 0.000000e+00 0.050044905 0.014553635 0.0127422536 0.39584085 0.0105521372
6 6 0.028564996 0.02961442 0.104614178 0.356582173 0.164940771 0.0333637286 0.023616970 8.996983e-05 0.068533887 0.014664363 0.0142171857 0.04722737 0.0590598276
7 7 0.004403033 0.20592020 0.022696121 0.017039074 0.050135083 0.0004315699 0.005378038 1.141693e-04 0.027347888 0.015519709 0.0099606331 0.36838780 0.0055857204
8 8 0.003457956 0.11454251 0.002928851 0.004246979 0.003512854 0.0000000000 0.003223957 0.000000e+00 0.004729716 0.001715093 0.0008914857 0.83984232 0.0006688743
9 9 0.003096448 0.48564932 0.012266158 0.023351482 0.020143460 0.0011976403 0.002019575 4.577497e-05 0.032899836 0.013585148 0.0113706843 0.32314517 0.0024707299
10 10 0.007574696 0.02609972 0.118394798 0.061920436 0.065815109 0.0004191072 0.016031185 2.783964e-05 0.035481843 0.015989600 0.0066304909 0.11158586 0.0149145931
I have the above data frame and am trying to create a stacked bar using ggplot geom_bar() where each bar = 1 cluster (10 clusters, so 10 bars) and each bar is filled with the proportions of each cell type contributing to a cluster (proportion values for each cluster add up to 1).
I have started by changing the layout of the data :
avgt = avg %>% pivot_longer(cols = -cluster)
Which gave me this layout:
cluster name value
1 1 B cells 0.0370
2 1 DCreg 0.0305
3 1 Dendritic cells 0.157
4 1 Dendritic cells CD103 0.0483
5 1 Endothelium 0.113
6 1 Epithelium 0.00418
7 1 Fibroblasts 0.00613
8 1 Macrophages other 0.00102
9 1 Macrophages type 1 0.221
10 1 Macrophages type 2 0.0411
However I am not sure what to do next as if I use the 'cluster' column as X and 'name' column for the 'fill' I, as expected, get equal proportions for each cell type
p = ggplot(avgt, aes(x = as.factor(cluster), fill = as.factor(name)))+
geom_bar(position = "fill") +
theme_classic()+
scale_y_continuous(labels = scales::percent) +
coord_flip() +
theme(axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text=element_text(size=20)) +
theme(legend.text = element_text(size = 20)) +
xlab("Community")+
ylab("Percentage distribution") +
labs( fill = "")
p
geom_bar() stacked plot result
Any ideas of how I can get this to work?
Thanks in advance

Adding error bars from different columns in grouped barplot with ggplot2

I´d like to add error bars to my barplot. The data for standard deviations is in two different columns.
This is my barplot:
# load packages
> library(data.table)
> library(ggplot2)
> library(tidyr)
>
> results <- fread("Results.csv", header=TRUE, sep=";")
>
> str(results)
Classes ‘data.table’ and 'data.frame': 7 obs. of 5 variables:
$ Organism : chr "AC1432" "D3425" "BF3523" "XR2405" ...
$ Molecule1 : num 39.5 418.4 189.2 49.3 4610.9 ...
$ Molecule1sd: num 19.6 70.9 102.8 21.2 275.9 ...
$ Molecule2 : num 276 6511 235 500 11205 ...
$ Molecule2sd: num 21 291.1 109.7 67.1 94.5 ...
- attr(*, ".internal.selfref")=<externalptr>
>
> df <- data.frame(results)
>
> str(df)
'data.frame': 7 obs. of 5 variables:
$ Organism : chr "AC1432" "D3425" "BF3523" "XR2405" ...
$ Molecule1 : num 39.5 418.4 189.2 49.3 4610.9 ...
$ Molecule1sd: num 19.6 70.9 102.8 21.2 275.9 ...
$ Molecule2 : num 276 6511 235 500 11205 ...
$ Molecule2sd: num 21 291.1 109.7 67.1 94.5 ...
>
>
>
> # Manually set factor levels of 'Organism' column to plot in a logical order.
> df$Organism = factor(df$Organism,
+ levels=c("without organism", "AC1432", "BF3523", "XR2405", "D3425", "XR2463", "ATF259"))
>
> df.g <- gather(df, Molecule1, Molecule2, -Organism, -Molecule1sd, -Molecule2sd)
> df.sd <- gather(df, Molecule1sd, Molecule2sd, -Molecule1, -Molecule2, -Organism)
> ggplot(df.g, aes(Molecule1, Molecule2)) +
+ geom_bar(aes(fill = Organism), stat = "identity", position = "dodge")
barplot without error bar
used data:
> dput(df)
structure(list(Organism = structure(c(2L, 5L, 3L, 4L, 6L, 7L,
1L), .Label = c("without organism", "AC1432", "BF3523", "XR2405",
"D3425", "XR2463", "ATF259"), class = "factor"), Molecule1 = c(39.45920899,
418.4234805, 189.162295, 49.314698, 4610.921188, 751.7070352,
35), Molecule1sd = c(19.55450482, 70.91013667, 102.7566193, 21.20841393,
275.8934527, 71.62450643, NA), Molecule2 = c(275.9147606, 6510.974605,
235.247381, 499.8928585, 11205.33907, 9507.869294, 250), Molecule2sd = c(21.04668977,
291.1223384, 109.652064, 67.1000078, 94.54544271, 707.1950335,
NA)), row.names = c(NA, -7L), class = "data.frame")
and this is my trial for the error bars
ggplot(df.g, aes(Molecule1, Molecule2)) +
geom_bar(aes(fill = Organism), stat = "identity", position = "dodge") +geom_errorbar(df.sd, aes_Molecule1(ymin=Molecule1-Molecule1sd, ymax=Molecule1+Molecule1sd),aes_Molecule2(ymin=Molecule2-Molecule2sd, ymax=Molecule2+Molecule2sd), width=.2 )
but my idea doesn´t work. How can I add error bars from two different columns?
It might be easier if you reshape your dataset with columns for Organism, Molecule, mean and sd. Here is a tidyverse way to do it:
Package and Dataset
library(tidyverse)
df <- data.frame(Organism = c("AC1432", "D3425", "BF3523", "XR2405",
"XR2463", "ATF259", "without organism"),
Molecule1 = c(39.5, 418.4, 189.2, 49.3,
4610.9, 800, 10),
Molecule1sd = c(19.6, 70.9, 102.8, 21.2,
275.9, 100, 1),
Molecule2 = c(276, 6511, 235, 500,
11205, 9500, 250),
Molecule2sd = c( 21, 291.1, 109.7, 67.1,
94.5, 50, 2))
# I estimated the not shown values in your str(result)
Reshaping
df2 <- df %>%
# add meaningful ending to columnnames containing mean (m)
select(Molecule1m = Molecule1,
Molecule2m = Molecule2,
everything()) %>%
# gather whole dataset into Molecule, mean, sd
pivot_longer(cols = -Organism,
names_to = c("Molecule", ".value"),
names_pattern = "(Molecule[12])(.)") %>%
# factor reorder levels
mutate(Organism = factor(Organism,
levels=c("without organism", "AC1432",
"BF3523", "XR2405",
"D3425", "XR2463", "ATF259")))
Plot
ggplot(df2, aes(x = Molecule,
y = m,
fill = Organism)) +
geom_col(position = "dodge") +
geom_errorbar(aes(ymin = m - s, ymax = m + s),
position = "dodge")

What is wrong with my custom colour palette in this plot?

Using ggsurvplot to draw some Kaplan-Meier curves.
5 curves should be plotted and I want control over their colours.
Here is the output of the survfit being plotted:
> elective_30Decadesurv
Call: survfit(formula = elective30Surv ~ electives$Decade)
n events median 0.95LCL 0.95UCL
electives$Decade=50 14 0 NA NA NA
electives$Decade=60 173 2 NA NA NA
electives$Decade=70 442 5 NA NA NA
electives$Decade=80 168 4 NA NA NA
electives$Decade=90 2 0 NA NA NA
Here is a working plot using the default colour palette, "hue":
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = "hue",
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
See plot in section 3.1.4 of this webpage for the output of the above
The Decade group has 5 entries, so I'm trying to provide five colours to palette.
However, both:
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = c("#440154",
"#3B528B",
"#21908C",
"#5DC863",
"#5DC863"
),
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
And:
> fiveColours <- c("#440154",
"#3B528B",
"#21908C",
"#5DC863",
"#5DC863"
)
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = fiveColours,
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
Give the same error:
Error in names(.cols) <- grp.levels :
'names' attribute [5] must be the same length as the vector [4]
What vector is length [4]?
Is 'names' attribute my colour vector?
If I take one of the colours out of the custom palette, eg fiveColours <- c("#440154","#3B528B","#21908C","#5DC863") I get this error:
Error: Insufficient values in manual scale. 5 needed but only 4 provided.
Which implies the number of colours provided is correct but something else is causing the issue.
I've troubleshot to the limits of my own ability. Help please!
FYI:
> electives %>% select(Decade) %>% group_by(Decade) %>% summarise(n())
# A tibble: 5 x 2
Decade `n()`
<fct> <int>
1 50 14
2 60 173
3 70 442
4 80 168
5 90 2
Should prove the length of the Decade variable and here is how the survival object and survfit were generated:
> elective5Surv <- Surv(electives$surv5Y, electives$dead5Y)
> elective_5Decadesurv <- survfit(elective5Surv ~ electives$Decade)
Ok, I have sorted my own mistake by proof-reading!
Of the five hex colours I’d provided, two were identical (not on purpose.)
I changed the fifth colour to a different hex value (what it was meant to be in the first place) and it works now.
Thanks, Rui, for your response earlier, it helped me down the path!

Adjust area from geom_area to a line from geom_line

I'm trying to make a hourly dispatch curve with generation and energy consumpsion data, which have the characteristic that when we do a power balance (generation minus consumpsion) we get values nearly to zero.
Into the generation data there are also net interchange values, that be negative when de power system are exporting energy and positive when the system are importing energy to complete the consumption.
Thus, to the plot created with geom_area and geom_line be ok, the black line (consumption) needs be adjusted with the generation area, so that there's no gap between the area and the black line. But, in my attempts I couldn't do it. How you can see, same the energy balence resulting in zero, there are a gap beetwen 19 and 20 hours. I don't know what is wrong. Someone have idea how to do that?
Thanks in advance.
Data to the plot:
generation <-
data.frame('dayHour' = c('18/11/2018 18:00','18/11/2018 19:00','18/11/2018 20:00','18/11/2018 21:00','18/11/2018 18:00','18/11/2018 19:00','18/11/2018 20:00','18/11/2018 21:00','18/11/2018 18:00','18/11/2018 19:00','18/11/2018 20:00','18/11/2018 21:00','18/11/2018 18:00','18/11/2018 19:00','18/11/2018 20:00','18/11/2018 21:00'),
'power' = c(-1364.290, -433.110, 1132.39, 749.48, 463.75, 467.8, 469.35, 436.51, 2025.5, 2133.07, 2306.85, 2304.91, 211.52, 213.16, 214.33, 214.59),
'label' = c('net interchange', 'net interchange', 'net interchange', 'net interchange', 'gas', 'gas', 'gas', 'gas', 'hydro', 'hydro', 'hydro', 'hydro', 'biomass', 'biomass', 'biomass', 'biomass'))
generation$label <- factor(generation$label, levels = c('net interchange', 'gas', 'hydro', 'biomass'))
net.load <-
data.frame('dayHour' = c('18/11/2018 18:00', '18/11/2018 19:00', '18/11/2018 20:00', '18/11/2018 21:00'), 'power' = c(1336.48, 2380.91, 4122.91, 3705.49), 'label' = c('net load', 'net load', 'net load', 'net load'))
generation$dayHour <-
as.POSIXct(strptime(generation$dayHour,format='%d/%m/%Y %H:%M'))
net.load$dayHour <-
as.POSIXct(strptime(net.load$dayHour,format='%d/%m/%Y %H:%M'))
Power balance
pb <-
filter(generation, label == "biomass")$power +
filter(generation, label == "hydro")$power +
filter(generation, label == "gas")$power +
filter(generation, label == "net interchange")$power -
net.load$power
summary(pb)
Dispatch curve
ggplot() +
geom_area(data = generation,
aes(y = power,
x = dayHour,
fill = label)) +
geom_line(data = net.load,
aes(y = power,
x = dayHour,
colour = label),
size = 1.2,
colour = "black") +
labs(fill = "generation",
colour = 'net load')
It looks like position_stack is getting confused when the interpolation crosses the x-axis.
To fix it, you can interpolate manually before plotting (e.g. with approx):
library(tidyverse)
generation <- data.frame(
dayHour = structure(c(1542585600, 1542589200, 1542592800, 1542596400, 1542585600, 1542589200, 1542592800, 1542596400, 1542585600, 1542589200, 1542592800, 1542596400, 1542585600, 1542589200, 1542592800, 1542596400), class = c("POSIXct", "POSIXt"), tzone = ""),
power = c(-1364.29, -433.11, 1132.39, 749.48, 463.75, 467.8, 469.35, 436.51, 2025.5, 2133.07, 2306.85, 2304.91, 211.52, 213.16, 214.33, 214.59),
label = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("net interchange", "gas", "hydro", "biomass"), class = "factor")
)
generation_interpolated <- generation %>%
group_by(label) %>%
summarise(data = list(as_tibble(approx(dayHour, power, n = 501)))) %>%
unnest() %>%
mutate(x = as.POSIXct(x, origin = '1970-01-01', tz = 'UTC'))
net_power_interpolated <- generation_interpolated %>%
group_by(x) %>%
summarise(y = sum(y))
ggplot(generation_interpolated, aes(x, y)) +
geom_area(aes(fill = label)) +
geom_line(data = net_power_interpolated)
To see how approx works, a simpler, ungrouped example:
df <- data.frame(x = c(0, 5, 10), y = c(0, 20, 10))
interpolated <- approx(df$x, df$y, n = 11)
str(interpolated)
#> List of 2
#> $ x: int [1:11] 0 1 2 3 4 5 6 7 8 9 ...
#> $ y: num [1:11] 0 4 8 12 16 20 18 16 14 12 ...
ggplot(as.data.frame(interpolated), aes(x, y)) +
geom_line() +
geom_point() +
geom_point(data = df, color = 'dodgerblue', size = 4)

R 3.2.1 incorrect mapping of color

This is based on R 3.2.1, reverse colors on map
I have two data points, one is more than 66%, which should be green, other is less than 33%, which should be red.
However, the less than 33% is orange.
Below is the code, which looks correct (but something is wrong)
sep <- read.csv("Out_SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
Percent_SEP12_Assets <- ifelse(sep[,8] <= 33, "Less than 33%", ifelse(sep[,8] >= 66, "More than 66%", "Between 33% and 66%"))
Percent_SEP12_Assets <- factor(Percent_SEP12_Assets,
levels = c("More than 66%", "Between 33% and 66%", "Less than 33%"))
# get the map
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 1)
map <- get_map(bbox)
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(values=c("green","orange","red"))
The dput(sep) is
structure(list(School = structure(1:2, .Label = c("Out of City\\00L001",
"Out of City\\O308"), class = "factor"), Latitude = c(40.821367,
41.310426), Longitude = c(-73.488313, -73.837612), Windows.SEP.11 = c(4L,
69L), Mac.SEP.11 = 0:1, Windows.SEP.12 = c(3L, 26L), Mac.SEP.12 = c(16L,
1L), newCol = c(82.6086956521739, 27.8350515463918)), .Names = c("School",
"Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12",
"Mac.SEP.12", "newCol"), row.names = c(NA, -2L), class = "data.frame")
Output is this (incorrect circled in red) ........ How to fix?
Responses
Coordinates are correct, I am asking why is the point incorrectly colored. I thought this logic is correct
Percent_SEP12_Assets <- ifelse(sep[,8] <= 33, "Less than 33%", ifelse(sep[,8] >= 66, "More than 66%", "Between 33% and 66%"))
Updated code
I tried this per #bondeded user and resulting map is same as before
sep <- read.csv("Out_SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
sep$Percent_SEP12_Assets <- ifelse(sep[,8] <= 33, "Less than 33%", ifelse(sep[,8] >= 66, "More than 66%", "Between 33% and 66%"))
sep$Percent_SEP12_Assets <- factor(sep$Percent_SEP12_Assets,
levels = c("More than 66%", "Between 33% and 66%", "Less than 33%"))
# get the map
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 1)
map <- get_map(bbox)
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=sep$Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(values=c("green","orange","red"))
Actual CSV
Here is actual CSV, two rows
School Latitude Longitude Windows-SEP-11 Mac-SEP-11 Windows-SEP-12 Mac-SEP-12
Out of City\00L001 40.821367 -73.488313 4 0 3 16
Out of City\O308 41.310426 -73.837612 69 1 26 1
The problem is that by default ggplot2 drops unused levels from factors. There are two options:
Specify drop = FALSE
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=sep$Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(values=c("green","orange","red"), drop = FALSE)
Specify the values for each level:
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=sep$Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(values=c(`More than 66%` = "green", `Between 33% and 66%` = "orange", `Less than 33%` = "red"))
Clearly you could also do both.
Now I got what you meant. The problem is in you ifelse structure. Maybe this can help:
ifelse(sep[,8] <= 33, "Less than 33%", ifelse(sep[,8] >= 66, "More than 66%", "Between 33% and 66%"))
[1] "More than 66%" "Less than 33%"

Resources