Colour dots based on conditions in ggplot - r

I have this dataset
a <- data.frame(PatientID = c("0002" ,"0004", "0005", "0006" ,"0009" ,"0010" ,"0018", "0019" ,"0020" ,"0027", "0039" ,"0041" ,"0042", "0043" ,"0044" ,"0045", "0046", "0047" ,"0048" ,"0049", "0055"),
volume = c( 200 , 100 , 243 , 99 , 275, 675 ,345 , 234 , 333 ,444, 123 , 274 , 442 , 456 ,666 , 567 , 355 , 623 , 105 , 677 ,876),
Status= c("New" , "Old" , "New" , "New" , "Old", "New" ,"Old" , "New" , "Old" , "New" , "New" ,"New" ,"Old" , "New" ,"New" ,"Old" , "New" , "Old" , "New" , "Old" ,"Old"),
sex = c( 1 , 1 , 1 , 1 , 0, 0 ,0 , 0 , 0 ,1 , 1 , 1 , 0 , 0 ,1 , 1 , 1 , 1 , 1 , 1 ,1), stringsAsFactors = F)
and this code
color <- c("#00B7EB","#EE2A7B")
ggplot(a, aes(y = a$volume, x = a$Status, fill = a$Status)) +
geom_boxplot() +
geom_point(alpha=0.4) +
scale_fill_manual(values=color) +
labs(x='', y='Volume') +
theme_classic() +
theme( text = element_text( size = 15))
This, produces the following plot
THE QUESTION:
What can I do to colour the dots in this ggplot based on the following condition?:
If volume is >100 in women (sex==1) red, otherwise black
If volume is >200 in men (sex==0) red, otherwise black
Thank you so much!

One way to do this is by setting the colour aesthetic of geom_point to your condition:
geom_point(alpha=0.4, aes(colour = (sex == 1 & volume > 100) | (sex == 0 & volume > 200))) +
Then use scale_colour_manual to set the colours to red and black:
scale_colour_manual(values = c("black", "red")) +

Related

How to combine multiple ggplot geom_col() into one graph?

Question
I am trying to combine three ggplot geom_bar into one geom_bar plot utilising dodge so I can visually compare data across two categorical and one numeric variables. What am I doing wrong?
Individual graphs work
Each graph works on it's own (with formatting issues) and I've been following answers on SO like How to overlay two geom_bar? but I'm not understanding what's needed to be done.
ONE <- ggplot(Ireland, aes(TargetGroup, FirstDosePC))+geom_bar(stat = 'identity',width = 0.8, fill = "green") +
facet_grid(.~Vaccine) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title="1st Dose Ireland by Group & Vaccine Type",
caption = "(ECDC, 2021)",
x="Target Groups over 18",
y="First Dose Administered")
TWO <- ggplot(Italy, aes(TargetGroup, FirstDosePC))+ geom_bar(stat = 'identity',width = 0.8, fill = "blue") +
facet_grid(.~Vaccine) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title="1st Dose Italy by Group & Vaccine Type",
caption = "(ECDC, 2021)",
x="Target Groups over 18",
y="First Dose Administered")
THREE <- ggplot(Latvia, aes(TargetGroup, FirstDosePC))+geom_bar(stat = 'identity',width = 0.8, fill = "red") +
facet_grid(.~Vaccine) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title="1st Dose Latvia by Group & Vaccine Type",
caption = "(ECDC, 2021)",
x="Target Groups over 18",
y="First Dose Administered")
An example of failed code
My coding attempts look close to this but it seems to fail - I don't understand why. I am hoping to learn how to add three graphs together with labels and to use dodge
OneTwo <- ONE + geom_bar(FDPercent=Italy, aes(TargetGroup, FirstDose))+ geom_bar(stat = 'identity',width = 0.8, fill = "blue") +
facet_grid(.~Vaccine) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title="1st Dose Italy by Group & Vaccine Type",
caption = "(ECDC, 2021)",
x="Target Groups over 18",
y="First Dose Administered")
My individual graphs look like this
The graph type I'm aiming for
and what I am aiming for is something like this but breaking it out by vaccine type to stretch my learning, etc (source https://towardsdatascience.com/track-covid-19-data-yourself-with-r-eb3e641cd4b3)
My raw data comes from
data <- read.csv("https://opendata.ecdc.europa.eu/covid19/vaccine_tracker/csv/data.csv", na.strings = "", fileEncoding = "UTF-8-BOM")
and is manipulated to test out R functions that has left me with a dataframe called FDPercent with a numeric column called FirstDosePC (Percentage of 1st Dose per country population) that is linked to Country with 30 EU countries (ISO 3166-1-alpha-2 categorical data) and 10 TargetGroup types (categorical) in the data frame.
> dput(head(FDPercent,3))
structure(list(Country = c("AT", "AT", "AT"), NumberDosesReceived = c(0L,
0L, 61425L), NumberDosesExported = c(0L, 0L, 0L), FirstDose = c(0L,
0L, 87L), FirstDoseRefused = c(NA_integer_, NA_integer_, NA_integer_
), SecondDose = c(0L, 0L, 0L), UnknownDose = c(0L, 0L, 0L), TargetGroup = c("Age18_24",
"Age18_24", "Age18_24"), Vaccine = c("UNK", "AZ", "COM"), Population = c(8901064L,
8901064L, 8901064L), Date = structure(c(18624, 18624, 18624), class = "Date"),
FirstDosePC = c("0.0000", "0.0000", "0.0010")), row.names = 21:23, class = "data.frame")
> str(FDPercent)
'data.frame': 116532 obs. of 12 variables:
$ Country : chr "AT" "AT" "AT" "AT" ...
$ NumberDosesReceived: int 0 0 61425 0 0 0 61425 0 0 0 ...
$ NumberDosesExported: int 0 0 0 0 0 0 0 0 0 0 ...
$ FirstDose : int 0 0 87 0 0 0 1299 0 0 0 ...
$ FirstDoseRefused : int NA NA NA NA NA NA NA NA NA NA ...
$ SecondDose : int 0 0 0 0 0 0 0 0 0 0 ...
$ UnknownDose : int 0 0 0 0 0 0 0 0 0 0 ...
$ TargetGroup : chr "Age18_24" "Age18_24" "Age18_24" "Age18_24" ...
$ Vaccine : chr "UNK" "AZ" "COM" "MOD" ...
$ Population : int 8901064 8901064 8901064 8901064 8901064 8901064 8901064 8901064 8901064 8901064 ...
$ Date : Date, format: "2020-12-28" "2020-12-28" "2020-12-28" "2020-12-28" ...
$ FirstDosePC : chr "0.0000" "0.0000" "0.0010" "0.0000" ...
With help from #kat in the comments - changed from geom_bar() to geom_col() and dropped the third variable
Ireland <- subset(FDPercent, Country == "IE") #contructed a subset by country
Italy <- subset(FDPercent, Country == "IT")
Latvia <- subset(FDPercent, Country == "LV")
ONE1 <- ggplot(Italy, aes(Date, as.numeric(FirstDosePC))) +
geom_col(fill = "red", alpha = 1, width = 7) + theme_minimal(base_size = 8) +
xlab(NULL) + ylab(NULL) + scale_x_date(date_labels = "%Y/%m/%d") #reduced the theme formating
OneTwo <- ONE1 + geom_col(data=Ireland, aes(Date,
as.numeric(FirstDosePC)),
fill="Green", alpha = 1,width = 5)
OneTwoThree <- OneTwo + geom_col(data=Latvia, aes(Date,
as.numeric(FirstDosePC)),
fill="black", alpha = 1, width = 2)
OneTwoThree + labs(title="Ireland, Italy, & Latvia - First Dose comparision", #added labels to the data
subtitle="Using First Dose Delivered per day as a percentage of population",
caption = "(ECDC, 2021)",
x="Date Administered",
y="% Population treated")

Fine tuning vjust ggplot2

I am trying to make a plot similar to this one:
Right now, I am working on the IL-b1 plot (since it comes first in the dataset) and will apply the settings across all plots.
I am currently working on the x-axis labels, but am running into problems with vjust no matter how small I make it. Here is the image of my current code:
Any advice on how to make the labels closer to the x-axis? No matter how small i make vjust is doesn't get any closer...
Here is my current code:
il1b <- ggplot(data = mouse) +
geom_violin(aes(x = Treatment, y = `IL-1b_fold`)) +
geom_jitter(aes(x = Treatment, y = `IL-1b_fold`)) +
theme_classic() +
ggtitle(label = "IL-1\u03b2") +
ylab("fold mock control") +
theme(plot.title = element_text(face = "bold"),
axis.title.x = element_text(face = "bold", size = 12),
axis.title.y = element_text(face = "bold", size = 12),
axis.text.x = element_text(face = "bold", angle = 45, size = 11, color = "black", vjust = -0.00001),
axis.text.y = element_text(size = 11, color = "black"))
setwd(output)
ggsave("il1b.png", il1b, width = 5, height = 5, units = "in")
output is a stored file path to the output directory. This is all within an R Markdown code chunk.
Reprex of first 10 obs in dataset:
wrapr::build_frame(\n \"GBM#\" , \"Treatment\", \"IL-1b\", \"IL-6\" , \"TNF-a\", \"IP-10\" , \"IL-29\", \"IFN-a2\", \"IFN-b\", \"IL-10\", \"IFN-y\", \"IL-1b_fold\", \"IL-6_fold\", \"TNF-a_fold\", \"IP-10_fold\", \"IL-29_fold\", \"IFN-a2_fold\", \"IFN-b_fold\", \"IL-10_fold\", \"IFN-y_fold\" |\n 1 , \"Mock\" , 484.4 , \"2290.62\" , 2055 , 951.4 , 433.4 , 567.8 , 400.4 , 2595 , 60.64 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 |\n 1 , \"PVSRIPO\" , 383.6 , \"23233.55\", 1555 , 9865 , 298.8 , 546.6 , 240.9 , 4816 , 136.4 , 0.792 , 10.14 , 0.7564 , 10.37 , 0.6894 , 0.9628 , 0.6017 , 1.856 , 2.249 |\n 1 , \"Poly(I:C)\", 849.5 , \"37969.47\", 3451 , 1377 , 265 , 457.2 , 255.3 , 8435 , 53.69 , 1.754 , 16.58 , 1.679 , 1.448 , 0.6115 , 0.8052 , 0.6376 , 3.251 , 0.8854 |\n 1 , \"cGAMP\" , 472.2 , \"12495.37\", 4002 , 3.078e+04, 304.4 , 741.4 , 324.7 , 3411 , 42.48 , 0.9748 , 5.455 , 1.947 , 32.35 , 0.7024 , 1.306 , 0.8109 , 1.315 , 0.7005 |\n 2 , \"Mock\" , 23 , \"2851.31\" , 5.25 , 35.06 , 4.54 , 8.69 , 21.84 , 4.11 , 8.55 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 |\n 2 , \"PVSRIPO\" , 11.57 , \"142.84\" , 16.69 , 6116 , 13.57 , 23.46 , 21.84 , 5.63 , 5.14 , 0.503 , 0.0501 , 3.179 , 174.5 , 2.989 , 2.7 , 1 , 1.37 , 0.6012 |\n 2 , \"Poly(I:C)\", 28.41 , \"158.04\" , 38.85 , 822.7 , 7.18 , 3.11 , 6.61 , 5.48 , 4.47 , 1.235 , 0.05543 , 7.4 , 23.47 , 1.581 , 0.3579 , 0.3027 , 1.333 , 0.5228 |\n 2 , \"cGAMP\" , 71.07 , \"1166.8\" , 162.7 , 1794 , 8.08 , 7.72 , 22.55 , 25.62 , 9.94 , 3.09 , 0.4092 , 30.98 , 51.18 , 1.78 , 0.8884 , 1.033 , 6.234 , 1.163 |\n 4 , \"Mock\" , 8.67 , \"1148.81\" , 203.6 , 1.86 , 1.88 , 35.52 , 39.1 , 8.21 , 74.05 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 |\n 4 , \"PVSRIPO\" , 88.52 , \"1997.02\" , 551 , 211.4 , 1.88 , 35.52 , 39.1 , 4.71 , 74.05 , 10.21 , 1.738 , 2.706 , 113.6 , 1 , 1 , 1 , 0.5737 , 1 )\n
Thanks so much to #Daniel Molitor and #teunbrand! I had a general misunderstanding of how vjust functions. The following setting got the desired output: vjust = 0.65
Here is the code for the revised output, which is attached:
il1b <- ggplot(data = mouse) +
geom_violin(aes(x = Treatment, y = `IL-1b_fold`)) +
geom_jitter(aes(x = Treatment, y = `IL-1b_fold`)) +
theme_classic() +
ggtitle(label = "IL-1\u03b2") +
ylab("fold mock control") +
theme(plot.title = element_text(face = "bold"),
axis.title.x = element_text(face = "bold", size = 12),
axis.title.y = element_text(face = "bold", size = 12),
axis.text.x = element_text(face = "bold", angle = 45, size = 11, color = "black", vjust = 0.65),
axis.text.y = element_text(size = 11, color = "black"))
setwd(output)
ggsave("il1b.png", il1b, width = 5, height = 5, units = "in")
Revised output:

create directed arrow plots of two variables using ggplot2 in R

I have two variables (V1,V2) measured on same subject (id) at two time points (timepoint). I want to have a scatterplot with arrow paths to show how values moved from T1 to T2 for the same subject.
In my example, some subjects do not have change in V1 nor V2, it would be ideal to show just as one dot for those sub (sub 1 for example), but I am OK with two dots for two visits, since they will be overlap. There are also sub with a decrease in either V1 or V2 (sub 2 for example), those sub were shown in red arrow above. The third group of subjects show an increase in either V1 or V2 (sub 6 and 7): these sub were in green.
However, what I really need is all arrows point from T1 to T2. That is I hope the green arrow change direction.
The dataset can be generated by:
datatest <- data.frame(timepoint =rep(seq(2,1),8),
id = c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8),
V1= c( 30.29, 30.29, 21.60, 31.43, 20.75,20.75, 21.60, 30.03, 21.60, 31.30, 31.60, 21.72, 31.6, 20.02, 11.60, 20.16),
V2=c(40, 40, 30.78, 41.63, 40.41, 40.41,30.78, 40.97, 20.78, 40.84, 41.85, 41.85, 40.78, 31.79,20.78, 30.23))
which looks like this:
timepoint id V1 V2
1 2 1 30.29 40.00
2 1 1 30.29 40.00
3 2 2 21.60 30.78
4 1 2 31.43 41.63
5 2 3 20.75 40.41
6 1 3 20.75 40.41
7 2 4 21.60 30.78
8 1 4 30.03 40.97
9 2 5 21.60 20.78
10 1 5 31.30 40.84
11 2 6 31.60 41.85
12 1 6 21.72 41.85
13 2 7 31.60 40.78
14 1 7 20.02 31.79
15 2 8 11.60 20.78
16 1 8 20.16 30.23
To generate the (wrong) plot I currently have, please run the codes below:
library(ggplot2)
library(lemon)
ggplot(datatest, aes(V1,V2,color=as.factor(timepoint),group=id)) +ggtitle("V2 vs V1 from T1 to T2")+
geom_pointline(linesize=1, size=2, distance=4, arrow = arrow(angle = 30, length = unit(0.1, "inches"), ends = "first", type = "open") )+
scale_x_continuous(limits = c(0,33), breaks=seq(0,30,10), expand = c(0, 0)) +
scale_y_continuous(limits = c(0,43), breaks=seq(0,44,10),expand = c(0, 0))+
scale_color_manual(values=c("green","red"))+labs(color = "Timepoint")
The plot currently looks like this:
Thank you!
Would this get you closer?
library(dplyr)
library(tidyr)
library(ggplot2)
data <- data.frame(timepoint =rep(seq(2,1),8),
id = c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8),
V1= c( 30.29, 30.29, 21.60, 31.43, 20.75,20.75, 21.60, 30.03, 21.60, 31.30, 31.60, 21.72, 31.6, 20.02, 11.60, 20.16),
V2=c(40, 40, 30.78, 41.63, 40.41, 40.41,30.78, 40.97, 20.78, 40.84, 41.85, 41.85, 40.78, 31.79,20.78, 30.23))
data <- data %>%
mutate(row_id = paste0("T", timepoint)) %>%
pivot_wider(id_cols = id,
names_from = row_id,
values_from = c(V1, V2)) %>%
mutate(colour = ifelse((V1_T1 > V1_T2) | (V2_T1 > V2_T2), "red", "green"))
ggplot(data = data) +
geom_point(aes(x = V1_T1, y = V2_T1)) +
geom_point(aes(x = V1_T2, y = V2_T2)) +
geom_segment(aes(x = V1_T1, xend = V1_T2, y = V2_T1 , yend = V2_T2, colour = colour),
arrow = arrow(length = unit(0.3,"cm"))) +
scale_x_continuous(
limits = c(0, 33),
breaks = seq(0, 30, 10),
expand = c(0, 0)
) +
scale_y_continuous(
limits = c(0, 43),
breaks = seq(0, 44, 10),
expand = c(0, 0)
)
You can filter the object data to remove those lines where V1 and V2 do not change and not draw the lines with length zero.

The scale_fill_gradientn() successfully fills map but fails showing all colors in the legend

I am trying to create a map plot using ggplot2, I want to show the counties with 0 acreages with the color blue and higher acreage with a gradient from red to green color. I am able to achieve this in the map's fill-in colors, but the legend does not show the blue for the 0.0 acreage value. I pre-divided the color and values and used scale_fill_gradientn(), however the legend is not to success. I am very unsure of what is going wrong, help is highly appreciated. Please refer to the reproducible code below.
library(tidyverse)
library(BBmisc)
d <- data.frame(fips = c(10001 , 10003 , 10005 , 21001 , 21003 , 21005 , 21007 , 21009 , 21011 , 21015 , 21017 , 21021 , 21023 , 21027 , 21029 , 21031 , 21033 , 21035 , 21037 , 21039 , 21041 , 21043 , 21045 , 21047 , 21049 , 21053 , 21055 , 21057 , 21059 , 21061 , 21065 , 21067 , 21069 , 21073 , 21075 , 21077 , 21079 , 21081 , 21083 , 21085 , 21087 , 21089 , 21091 , 21093 , 21097 , 21099 , 21101 , 21103 , 21105 , 21107 , 21109 , 21111 , 21113 , 21117 , 21121 , 21123 , 21125 , 21135 , 21137 , 21139 , 21141 , 21143 , 21145 , 21147 , 21149 , 21151 , 21155 , 21157 , 21159 , 21161 , 21163 , 21165 , 21167 , 21169 , 21171 , 21173 , 21175 , 21177 , 21179 , 21181 , 21183 , 21185 , 21187 , 21191 , 21197 , 21199 , 21201 , 21203 , 21205 , 21207 , 21209 , 21211 , 21213 , 21215 , 21217 , 21219 , 21221 , 21223 , 21225 , 21227 , 21229 , 21231 , 21233 , 21235 , 21239 , 24001 , 24003 , 24005 , 24009 , 24011 , 24013 , 24015 , 24017 , 24019 , 24021 , 24023 , 24025 , 24027 , 24029 , 24031 , 24033 , 24035 , 24037 , 24039 , 24041 , 24043 , 24045 , 24047 , 37001 , 37003 , 37005 , 37007 , 37009 , 37011 , 37013 , 37015 , 37017 , 37019 , 37021 , 37023 , 37025 , 37027 , 37029 , 37031 , 37033 , 37035 , 37037 , 37039 , 37041 , 37043 , 37045 , 37047 , 37049 , 37051 , 37053 , 37055 , 37057 , 37059 , 37061 , 37063 , 37065 , 37067 , 37069 , 37071 , 37073 , 37077 , 37079 , 37081 , 37083 , 37085 , 37087 , 37089 , 37091 , 37093 , 37095 , 37097 , 37099 , 37101 , 37103 , 37105 , 37107 , 37109 , 37111 , 37113 , 37115 , 37117 , 37119 , 37123 , 37125 , 37127 , 37129 , 37131 , 37133 , 37135 , 37137 , 37139 , 37141 , 37143 , 37145 , 37147 , 37149 , 37151 , 37153 , 37155 , 37157 , 37159 , 37161 , 37163 , 37165 , 37167 , 37169 , 37171 , 37173 , 37175 , 37177 , 37179 , 37181 , 37183 , 37185 , 37187 , 37189 , 37191 , 37193 , 37195 , 37197 , 37199 , 42009 , 42011 , 42013 , 42017 , 42025 , 42027 , 42029 , 42037 , 42041 , 42043 , 42045 , 42055 , 42057 , 42061 , 42067 , 42071 , 42075 , 42077 , 42087 , 42089 , 42091 , 42093 , 42095 , 42097 , 42099 , 42101 , 42107 , 42109 , 42119 , 42133 , 51001 , 51003 , 51005 , 51007 , 51009 , 51011 , 51015 , 51017 , 51019 , 51021 , 51023 , 51025 , 51029 , 51031 , 51033 , 51035 , 51036 , 51037 , 51041 , 51043 , 51045 , 51047 , 51049 , 51051 , 51053 , 51057 , 51059 , 51061 , 51063 , 51065 , 51067 , 51069 , 51071 , 51073 , 51075 , 51077 , 51079 , 51081 , 51083 , 51085 , 51087 , 51089 , 51091 , 51093 , 51095 , 51097 , 51099 , 51101 , 51103 , 51105 , 51107 , 51109 , 51111 , 51113 , 51115 , 51117 , 51119 , 51121 , 51125 , 51127 , 51131 , 51133 , 51135 , 51137 , 51139 , 51141 , 51143 , 51145 , 51147 , 51149 , 51153 , 51155 , 51157 , 51159 , 51161 , 51163 , 51165 , 51167 , 51169 , 51171 , 51173 , 51175 , 51177 , 51179 , 51181 , 51183 , 51185 , 51187 , 51191 , 51193 , 51195 , 51197 , 51199 , 51515 , 51520 , 51530 , 51550 , 51590 , 51595 , 51620 , 51630 , 51640 , 51650 , 51660 , 51678 , 51680 , 51683 , 51700 , 51730 , 51740 , 51750 , 51770 , 51775 , 51790 , 51800 , 51810 , 51820 , 51840),
avg_area_acres = c(274826347 , 111810520 , 356958995 , 12225467 , 9850285 , 0 , 161517982 , 36513962 , 10627321 , 8500976 , 11011159 , 5127695 , 0 , 67608594 , 16920827 , 57891895 , 90677813 , 174713438 , 0 , 111737090 , 9175389 , 0 , 9109121 , 256162406 , 4402135 , 2327886 , 52759747 , 5463256 , 319309902 , 15382455 , 3036180 , 10039222 , 16030896 , 3197991 , 212521576 , 4046856 , 0 , 0 , 264745451 , 38377025 , 14006839 , 3338657 , 49178736 , 109703257 , 8824662 , 11173159 , 320523707 , 16273015 , 186374168 , 154012358 , 0 , 6070285 , 3642904 , 0 , 0 , 78336064 , 0 , 13759312 , 17568827 , 61018542 , 236490220 , 27424132 , 96956005 , 0 , 206767773 , 0 , 31173369 , 63229941 , 0 , 15128662 , 61045205 , 0 , 4586437 , 4128988 , 2226557 , 3643585 , 0 , 65578941 , 59265028 , 2428114 , 126995190 , 13925398 , 0 , 4209674 , 8296056 , 28642983 , 0 , 3710771 , 3339207 , 14816021 , 4856228 , 76817157 , 155771635 , 20563941 , 28336357 , 178198387 , 72704615 , 12853799 , 238138534 , 107598640 , 13661127 , 22993123 , 155120806 , 0 , 6206647 , 0 , 18314776 , 43712808 , 11738189 , 196002816 , 82823831 , 73270421 , 46040458 , 189830024 , 112293371 , 0 , 43395064 , 13963462 , 160870422 , 58778462 , 16594705 , 238589996 , 64963441 , 69212640 , 169997441 , 36023074 , 96909783 , 141439602 , 14451553 , 5627142 , 0 , 45419938 , 0 , 0 , 250418385 , 85662981 , 82094768 , 36026453 , 0 , 3060241 , 28376260 , 3218257 , 121200604 , 100712697 , 8906102 , 26029460 , 10120473 , 910542.7 , 29137366 , 890308.4 , 41413487 , 209038639 , 90114900 , 79827082 , 63393639 , 4586437 , 31454134 , 26271517 , 195279264 , 3895807 , 137398970 , 12913433 , 74078849 , 4301357 , 61049669 , 20523472 , 109219080 , 36594962 , 98945220 , 108001879 , 0 , 0 , 55219178 , 46797190 , 108652520 , 50683743 , 0 , 226125804 , 53359636 , 26675951 , 116590206 , 21697878 , 0 , 0 , 0 , 62506851 , 4857862 , 2175578 , 21050066 , 122578107 , 2495561 , 109878020 , 55702537 , 13561244 , 78614157 , 186377501 , 58814313 , 128087113 , 49549681 , 227994714 , 1315228 , 40116230 , 7958818 , 399060448 , 18095735 , 66551696 , 4047642 , 229363918 , 40847556 , 75376798 , 6880835 , 35299653 , 0 , 0 , 139744197 , 212121480 , 29671505 , 70598678 , 31292071 , 133027044 , 0 , 229286061 , 1854809 , 139092802 , 58819874 , 0 , 12750034 , 100796778 , 13602342 , 36732059 , 3508323 , 35866276 , 60717516 , 44689240 , 57780664 , 46795932 , 0 , 61018149 , 3238271 , 13053705 , 24693872 , 108004520 , 80801534 , 54323399 , 17708926 , 7993956 , 14673155 , 24285225 , 55048171 , 80636580 , 30279791 , 0 , 31268017 , 32222596 , 40641504 , 164917845 , 136580985 , 6476463 , 0 , 23802746 , 0 , 4654828 , 14087839 , 0 , 0 , 0 , 2023428 , 29851146 , 2428114 , 6315359 , 73594672 , 0 , 33032534 , 7083806 , 6962731 , 6396171 , 0 , 28336357 , 2226714 , 0 , 60721708 , 86062133 , 0 , 19673255 , 0 , 2428114 , 3374162 , 3136942 , 0 , 32708722 , 13196839 , 0 , 0 , 47560780 , 14006336 , 77238415 , 20642661 , 0 , 0 , 79749226 , 7014551 , 57969280 , 21351097 , 61288393 , 29446382 , 0 , 14370898 , 9958599 , 11536134 , 15545147 , 7589192 , 37162842 , 25584243 , 0 , 2430000 , 24936054 , 88654133 , 67926935 , 4453240 , 15464398 , 2529678 , 0 , 13035090 , 5505674 , 0 , 43800252 , 6638856 , 0 , 0 , 53434851 , 0 , 0 , 20806626 , 0 , 0 , 11820782 , 0 , 131809529 , 9067913 , 5424297 , 67685193 , 105413589 , 0 , 0 , 0 , 66551193 , 0 , 0 , 0 , 0 , 0 , 0 , 116260925 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 85624964 , 55463781 , 0 , 0))
st.id = unique(sapply(as.character(d$fip), function(x) substr(x, 1,2)))
us.map <- tigris::counties(state=st.id, cb = TRUE, year = 2016)
# Projuce counties map
county_map <- fortify(us.map, region="GEOID")
# Obtian state map, to build the state boundaries
#states_map <- map_data("state")
state_dat = tigris::states(cb = TRUE, year = 2016)
state_dat = state_dat[state_dat$STATEFP %in% c(st.id), ]
# Remove Alaska(2), Hawaii(15), Puerto Rico (72), Guam (66), Virgin Islands (78), American Samoa (60), Mariana Islands (69), Micronesia (64), Marshall Islands (68), Palau (70), Minor Islands (74)
state_dat <- state_dat[!state_dat$STATEFP %in% c("02", "15", "72", "66", "78", "60", "69", "64", "68", "70", "74"),]
# Make sure other outling islands are removed.
state_dat <- state_dat[!state_dat$STATEFP %in% c("81", "84", "86", "87", "89", "71", "76", "95", "79"),]
# Projuce state map
state_dat=fortify(state_dat, region = "STUSPS")
str_fipsselect <- 'all'
#str_fipsselect <- 'zero acres'
#str_fipsselect <- 'non-zero acres'
if(str_fipsselect == 'zero acres'){
d <- d %>% filter(avg_area_acres == 0)
} else if(str_fipsselect == 'non-zero acres'){
d <- d %>% filter(avg_area_acres > 0)
}
d$norm_avg_area_acres = normalize(d$avg_area_acres, method = "range")
if (min(d$avg_area_acres) <= 0){
c_cols = c("blue","red","green")
v_cols = c(as.numeric(d[which(d$avg_area_acres==min(d$avg_area_acres))[1],"norm_avg_area_acres"]),
as.numeric(d[which(d$avg_area_acres == min(d$avg_area_acres[d$avg_area_acres!=min(d$avg_area_acres)]))[1], "norm_avg_area_acres"][1]),
as.numeric(d[which(d$avg_area_acres==max(d$avg_area_acres))[1],"norm_avg_area_acres"]))
} else {
c_cols = c("green", "red")
v_cols = c(as.numeric(d[which(d$avg_area_acres==min(d$avg_area_acres))[1],"norm_avg_area_acres"]),
as.numeric(d[which(d$avg_area_acres==max(d$avg_area_acres))[1],"norm_avg_area_acres"]))
}
ggplot() +
geom_polygon(data=state_dat,
aes(x=long, y=lat, group=group), fill="gray94", color=NA, show.legend = FALSE) + # this is for shade within state boundaries
geom_polygon(data=state_dat,
aes(x=long, y=lat, group=group), fill=NA, color="gray25", size=0.1, show.legend = FALSE) + # this is for state boundaries
geom_map(aes(fill=avg_area_acres, map_id = fips),
data = d, map=county_map, color="palevioletred3", size=0.05, show.legend = TRUE) + # this is to show the FIPS which are under study
scale_fill_gradientn(colors = adjustcolor(c_cols, alpha.f = 0.7),
values = v_cols,
labels = function(x) format(x, digits = 2, scientific = TRUE),
name = "Average acres") +
coord_map()
enter image description here
It seems that the issue is more to do with visualizing such a small block of color in the color_bar. The color_bar guide can help you tweak this.Two potential options:
Lengthen the color bar
You can lengthen the color bar to make the blue more visible. The example below is a bit ridiculous but you can play around with adjusting the color_bar height and the size of the overall plot.
ggplot() +
geom_polygon(data=state_dat,
aes(x=long, y=lat, group=group), fill="gray94", color=NA, show.legend = FALSE) + # this is for shade within state boundaries
geom_polygon(data=state_dat,
aes(x=long, y=lat, group=group), fill=NA, color="gray25", size=0.1, show.legend = FALSE) + # this is for state boundaries
geom_map(aes(fill=avg_area_acres, map_id = fips),
data = d, map=county_map, color="palevioletred3", size=0.05, show.legend = TRUE) + # this is to show the FIPS which are under study
scale_fill_gradientn(colors = adjustcolor(c_cols, alpha.f = 0.7),
values = v_cols,
labels = function(x) format(x, digits = 2, scientific = TRUE),
name = "Average acres",
guide = guide_colourbar(barheight = 40)) +
coord_map()
Adjust the bins in the colorbar
This will give more weight to the blue portion by reducing the number of nbin. It is kind of a misrepresentation of your data though, since you have a sharp break between 0 and those values above 0. Using something like color_step or a binned fill scale may be another way to look into this, depending on the distribution of your data.
ggplot() +
geom_polygon(data=state_dat,
aes(x=long, y=lat, group=group), fill="gray94", color=NA, show.legend = FALSE) + # this is for shade within state boundaries
geom_polygon(data=state_dat,
aes(x=long, y=lat, group=group), fill=NA, color="gray25", size=0.1, show.legend = FALSE) + # this is for state boundaries
geom_map(aes(fill=avg_area_acres, map_id = fips),
data = d, map=county_map, color="palevioletred3", size=0.05, show.legend = TRUE) + # this is to show the FIPS which are under study
scale_fill_gradientn(colors = adjustcolor(c_cols, alpha.f = 0.7),
values = v_cols,
labels = function(x) format(x, digits = 2, scientific = TRUE),
name = "Average acres",
guide = guide_colourbar(barheight = 10, nbin = 15)) +
coord_map()

R script applying roman numeral html color names

I have an R script that I am working to apply HTML color codes to specific Roman numeral values in the dataset. An example of my code is pasted below. I am having a problem with identifying the roman numerals. I will also include a sample of my data. I want to create a new column with the color codes.
white = "#F0F8FF"
aqua = "#00FFFF"
black = "#000000"
blue = "#0000FF"
violet = "#8A2BE2"
green = "#7FFF00"
red = "#DC143C"
dblue = "#191970"
orange = "#FFA500"
yellow = "#FFFF00"
data <- read.csv("protiencs.csv")
Data$Color <-{
for (i in data$MG){
if (data$MG == II){
data$Color <- white
}
}
}
Here is my data
PI MG
1 FC001547 II
2 FC002108 III
3 FC002109 III
4 FC003548 IV
5 FC003609 I
6 FC003654-1 IV
My desired output would be
PI MG Color
FC001547 II #F0F8FF
FC002108 III #00FFFF
Corrected code
myDat <- read.csv("protiencs.csv", header = TRUE)
myColors <- list(white = "#F0F8FF"
, aqua = "#00FFFF"
, black = "#000000"
, blue = "#0000FF"
, violet = "#8A2BE2"
, green = "#7FFF00"
, red = "#DC143C"
, dblue = "#191970"
, orange = "#FFA500"
, yellow = "#FFFF00")
myDat$MG <- factor(myDat$MG, levels = c("II", "III", "IV", "I"))
myDat$color <- myColors[myDat$MG]
head(myDat)
You can use a list. The contents of MG are a factor I am assuming:
read.table(text = "PI MG
1 FC001547 II
2 FC002108 III
3 FC002109 III
4 FC003548 IV
5 FC003609 I
6 FC003654-1 IV", header = TRUE) -> myDat
myColors <- list(white = "#F0F8FF"
, aqua = "#00FFFF"
, black = "#000000"
, blue = "#0000FF"
, violet = "#8A2BE2"
, green = "#7FFF00"
, red = "#DC143C"
, dblue = "#191970"
, orange = "#FFA500"
, yellow = "#FFFF00")
myDat$MG <- factor(myDat$MG, levels = c("II", "III", "IV", "I"))
myDat$color <- myColors[myDat$MG]
> myDat
PI MG color
1 FC001547 II #F0F8FF
2 FC002108 III #00FFFF
3 FC002109 III #00FFFF
4 FC003548 IV #000000
5 FC003609 I #0000FF
6 FC003654-1 IV #000000

Resources