Adding a second y axis in R - r

I am envisioning to use the following dataset to create a plot that combines a clustered bar chart and line chart with the following data:
structure(list(X = 1:14, ORIGIN = c("AUS", "AUS", "DAL", "DAL",
"DFW", "DFW", "IAH", "IAH", "OKC", "OKC", "SAT", "SAT", "SHV",
"SHV"), DEST = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L), .Label = c("ATL", "SEA"), class = "factor"),
flight19.x = c(293L, 93L, 284L, 93L, 558L, 284L, 441L, 175L,
171L, 31L, 262L, 31L, 175L, 0L), flight19.y = c(5526L, 5526L,
6106L, 6106L, 23808L, 23808L, 15550L, 15550L, 2055L, 2055L,
3621L, 3621L, 558L, 558L)), row.names = c(NA, -14L), class = "data.frame")
In Excel, the chart I am envisioning looks something like this:
I have already tried to used the sec.axis function to generate a second axis. However, the outcome looks like the line plot still uses the first y-axis instead of the second axis:
p1 <- ggplot()+
geom_bar(data = flight19, aes(ORIGIN, flight19.x, fill = DEST),stat = "identity", position = "dodge" )+
scale_fill_viridis(name = "Destinations", discrete = TRUE)+
labs(y= "Operation Counts", x = "Airports")
p2 <- p1 + geom_line(data = flight19, aes(as.character(ORIGIN), flight19.y, group = 1))+
geom_point(data = flight19, aes(as.character(ORIGIN), flight19.y, group = 1))+
scale_y_continuous(limit = c(0,600),sec.axis = sec_axis(~.*75/10, name = "Total Monthly Operations"))
The plot shows the warning below:
Warning messages:
1: Removed 12 row(s) containing missing values (geom_path).
2: Removed 12 rows containing missing values (geom_point).
And the codes produce the plot below:
Could someone teach me how to let the line plot corresponds to the second axis?
Thanks so much in advance.

Find a suitable transformation factor, here I used 50 just to get nice y-axis labels
#create x-axis
flight19$x_axis <- paste0(flight19$ORIGIN,'\n',flight19$DEST)
# The transformation factor
#transf_fact <- max(flight19$flight19.y)/max(flight19$flight19.x)
transf_fact <- 50
ggplot(flight19, aes(x = x_axis)) +
geom_bar(aes(y = flight19.x),stat = "identity", fill = "blue") +
geom_line(aes(y = flight19.y/transf_fact,group=1), color = "orange") +
scale_y_continuous(name = "Operation Counts",
limit = c(0,600),
breaks = seq(0,600,100),
sec.axis = sec_axis(~ (.*transf_fact),
breaks = function(limit)seq(0,limit[2],5000),
labels = scales::dollar_format(prefix = "$",suffix = " k",scale = .001),
name = "Total Monthly Operations")) +
xlab("Airports") +
theme_bw()

Related

change line colour ggplot (geom_line)

I still very new using GGPLOT, but ive created the following graphic in which i would like to switch the colors blue and red. Should be simple enough but i cannot figure it out.
df <- structure(list(Sex = c("M", "M", "M", "M", "M", "M", "M", "W",
"W", "W", "W", "W", "W", "W"), age_cat = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("<40",
"41-50", "51-60", "61-70", "71-80", "81-90", "90+"), class = "factor"),
DD = c(42L, 88L, 289L, 558L, 527L, 174L, 22L, 27L, 36L, 206L,
347L, 321L, 160L, 29L), pop = c(36642L, 16327L, 20232L, 18068L,
14025L, 5555L, 1293L, 35887L, 16444L, 20178L, 17965L, 14437L,
7150L, 2300L), proportion = c(0.114622564270509, 0.538984504195504,
1.428430209569, 3.08833296435687, 3.75757575757576, 3.13231323132313,
1.7014694508894, 0.0752361579402012, 0.218924835806373, 1.02091386658737,
1.9315335374339, 2.22345362609961, 2.23776223776224, 1.26086956521739
), lower = c(0.082621962613957, 0.432499099174075, 1.26946115577044,
2.8408823120445, 3.44891013496043, 2.69000601596679, 1.06929480146528,
0.0495867729368698, 0.153377923598767, 0.886828947142727,
1.73530361873497, 1.98914206124244, 1.90751612365318, 0.846006018532107
), upper = c(0.154905173671422, 0.663628389658291, 1.6015714811397,
3.35102939015014, 4.08561035940466, 3.6247050150149, 2.56476800800746,
0.10944592449968, 0.302956684874059, 1.16937842460687, 2.14353263545661,
2.47728262910991, 2.60770261266057, 1.80583393021473)), row.names = c(NA,
-14L), class = "data.frame")
Below is the script i've used, in which i get (sex = M) in red and (sex = W)in bliue.
ggplot(data = prevalence2021GGPLOT,
aes(x = age_cat, y = proportion, color = Sex))+
geom_point()+
labs(title="Prevalence 2021", y="Prevalence (%)", x="Age category") +
geom_errorbar(aes(ymin=(lower),
ymax=(upper)), width=.2) +
theme_bw()+
geom_line(aes(group = unlist(Sex)))
How do i make sex = M blue and sex = W red??
You can use scale_color_manual to manually change the colours in ggplot2. The first colour corresponds to the first modality of your fill variable.
ggplot(data = df,
aes(x = age_cat, y = proportion, color = Sex))+ geom_point()+
labs(title="Prevalence 2021", y="Prevalence (%)", x="Age category") +
geom_errorbar(aes(ymin=(lower),
ymax=(upper)), width=.2) +
theme_bw()+
geom_line(aes(group = unlist(Sex)))+
scale_color_manual(values=c("blue", "red"))
As the result :
you can find the documentation of the ggplot2 graphics on R here :
data_to_viz
scale_color_manual
This works. I need to point to df to use your sample df. Edited it per suggestion.
ggplot(data = df,
aes(x = age_cat, y = proportion, color = Sex))+
geom_point()+
labs(title="Prevalence 2021", y="Prevalence (%)", x="Age category") +
geom_errorbar(aes(ymin=(lower),
ymax=(upper)), width=.2) +
theme_bw()+
geom_line(aes(group = unlist(Sex))) +
ggplot(data = df,
aes(x = age_cat, y = proportion, color = Sex))+
geom_point()+
labs(title="Prevalence 2021", y="Prevalence (%)", x="Age category") +
geom_errorbar(aes(ymin=(lower),
ymax=(upper)), width=.2) +
theme_bw()+
geom_line(aes(group = unlist(Sex))) +
scale_color_manual(values=c(M="darkblue", W="darkred"))

second y-axis with other scale [duplicate]

This question already has answers here:
ggplot with 2 y axes on each side and different scales
(18 answers)
Closed 2 years ago.
I created a barchart with ggplot2 geom_bar and want to have two metrics in the bars. I used melt to do so. However, I now need a second y-axis with another scale, because the numbers of the two metrics are too different.
In the following the dataframe and the code I used:
df <- data.frame(categories = c("politics", "local", "economy", "cultural events",
"politics", "local", "economy", "cultural events"),
metric = c("page", "page", "page", "page",
"product", "product", "product", "product"),
value = c(100L, 50L, 20L, 19L,
950000L, 470000L, 50000L, 1320L))
In the following the code I used to create the plot and the second y-axis:
x <- ggplot(df, aes(x=categories, y=value, fill = metric))
x + geom_bar(stat = "identity", position = "dodge") +
scale_y_continuous(sec.axis=sec_axis(~. *1000), limits=c(1,1000))
However, now no bars appear in the chart anymore... Does anybody know how to solve this problem?
Another approach could be to use highcharter
library(dplyr)
library(tidyr)
library(highcharter)
#convert your data in wide format
df <- df %>% spread(metric, value)
#plot
highchart() %>%
hc_xAxis(categories = df$categories) %>%
hc_yAxis_multiples(
list(lineWidth = 3, title = list(text = "Page")),
list(opposite = TRUE, title = list(text = "Product"))
) %>%
hc_add_series(type = "column", data = df$page) %>%
hc_add_series(type = "line", data = df$product, yAxis=1) # replace "line" with "column" to have it in bar format
Output plot is:
Sample data:
df <- structure(list(categories = structure(c(4L, 3L, 2L, 1L, 4L, 3L,
2L, 1L), .Label = c("cultural events", "economy", "local", "politics"
), class = "factor"), metric = structure(c(1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L), .Label = c("page", "product"), class = "factor"),
value = c(100L, 50L, 20L, 19L, 950000L, 470000L, 50000L,
1320L)), .Names = c("categories", "metric", "value"), row.names = c(NA,
-8L), class = "data.frame")
Instead of displaying the different bars on the same plot you can stack them together (or facet them):
As #ConorNeilson already mentioned in the comments, you don't have to (and also shouldn't) specify the variables with df$. ggplot knows to look for them in the specified data.frame df.
The free_y argument in the facet_grid-call makes it possible to display different scales on the y-axes.
library(ggplot2)
ggplot(df, aes(x = categories, y = value, fill = metric)) +
geom_bar(stat = "identity", position = "dodge") +
facet_grid(metric~., scales = "free_y")
You could compare the different values on a log10-scale:
ggplot(df, aes(x = categories, y = value, fill = metric)) +
geom_bar(stat = "identity", position = "dodge") +
scale_y_log10(name = "value (log10 scale)",
breaks = 10^(0:10), labels = paste0(10, "^", 0:10))

how to change the color in geom_point or lines in ggplot [duplicate]

This question already has an answer here:
ggplot2: How to specify multiple fill colors for points that are connected by lines of different colors
(1 answer)
Closed 5 years ago.
I have a data like this
data<- structure(list(sample = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("A",
"B"), class = "factor"), y = c(0.99999652, 0.99626012, 0.94070452,
0.37332406, 0.57810894, 0.37673758, 0.22784684, 0.35358141, 0.21253558,
0.17715703, 0.99999652, 0.86403956, 0.64054516, 0.18448824, 0.40362691,
0.10791682, 0.06985696, 0.07384465, 0.0433271, 0.02875159), time = c(100L,
150L, 170L, 180L, 190L, 220L, 260L, 270L, 300L, 375L, 100L, 150L,
170L, 180L, 190L, 220L, 260L, 270L, 300L, 375L), x = c(0.9999965,
0.9981008, 0.9940164, 1.0842966, 0.9412978, 1.0627907, 0.9135079,
1.1982235, 0.9194105, 0.9361713, 0.9999965, 1.0494051, 0.9526752,
1.1594711, 0.9827104, 1.0223711, 1.1419197, 1.0328598, 0.6015229,
0.3745817)), .Names = c("sample", "y", "time", "x"), class = "data.frame", row.names = c(NA,
-20L))
I am interested in plotting it with a costumed color like black and red
I can plot it with two random different color like this but the problem is that
ggplot() +
geom_point(data = data, aes(x = time, y = y, color = sample),size=4)
if I want to assign the first one (A) to black and the (B) to red. how can I do that?
You could use scale_color_manual:
ggplot() +
geom_point(data = data, aes(x = time, y = y, color = sample),size=4) +
scale_color_manual(values = c("A" = "black", "B" = "red"))
Per OP's comment, to get lines with the same color as the points you could do:
ggplot(data = data, aes(x = time, y = y, color = sample)) +
geom_point(size=4) +
geom_line(aes(group = sample)) +
scale_color_manual(values = c("A" = "black", "B" = "red"))
I would do it like this (you can also use hexidecimal colors instead of red, black)
data <- data %>%
mutate(Color = ifelse(sample == "A", "black",
ifelse(sample == "B", "red", "none")))
ggplot() +
geom_point(data = data, aes(x = time, y = y, color = Color),size=4)+
scale_color_identity()

Is it possible to put space between stacks in ggplot2 stacked bar?

I took this example from here:
DF <- read.table(text="Rank F1 F2 F3
1 500 250 50
2 400 100 30
3 300 155 100
4 200 90 10", header=TRUE)
library(reshape2)
DF1 <- melt(DF, id.var="Rank")
library(ggplot2)
ggplot(DF1, aes(x = Rank, y = value, fill = variable)) +
geom_bar(stat = "identity")
Is it possible to create a stacked bar such as the following graph using ggplot2? I do not want to differentiate stacks by different colors.
EDIT: Based on Pascal's comments,
ggplot(DF1, aes(x = Rank, y = value)) +
geom_bar(stat = "identity",lwd=2, color="white")
I still have the white borders for the bars.
This is the closest I could get to your example figure. It is not much of an improvement beyond what you've already sorted but puts less of an emphasis on the white bar borders on the grey background.
library(ggplot2)
p <- ggplot(DF1, aes(x = Rank, y = value, group = variable))
p <- p + geom_bar(stat = "identity", position = "stack", lwd = 1.5,
width = 0.5, colour = "white", fill = "black")
p <- p + theme_classic()
p <- p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
p
That produces:
If you want to keep the grey background you can find out exactly what shade of grey it is and use that colour for the line while removing the background grids (this is not the right shade).
p <- ggplot(DF1, aes(x = Rank, y = value))
p <- p + geom_bar(stat = "identity", position = "stack", lwd = 1.5,
width = 0.5, colour = "grey", fill = "black")
p <- p + theme(panel.grid = element_blank())
p
An issue with this solution is that very small groups will not be seen (e.g., when Rank = 4 variable F3 = 10; this small value is completely covered by the white bar outline).
Your sample data:
DF1 <- structure(list(Rank = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L), variable = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L), .Label = c("F1", "F2", "F3"), class = "factor"),
value = c(500L, 400L, 300L, 200L, 250L, 100L, 155L, 90L,
50L, 30L, 100L, 10L)), row.names = c(NA, -12L), .Names = c("Rank",
"variable", "value"), class = "data.frame")

Custom legend for multi layer geom_bar plot

R version 3.1.1 (2014-07-10) Platform: i386-w64-mingw32/i386 (32-bit)
I am working on a barplot with ggplot2. The aim is to have a combination of a stacked and dodged barplot for the data. My problem is to include a legend, which includes both layers or shows them separately.
Data:
df <- structure(list(year = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("2008", "2009", "2010",
"2011", "2012", "2013", "2014"), class = "factor"), product = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("a",
"b"), class = "factor"), total = c(1663L, 1344L, 1844L, 444L,
1336L, 897L, 655L, 3433L, 3244L, 2044L, 3344L, 1771L, 1410L,
726L), partial = c(1663L, 1344L, 1844L, 444L, 949L, 302L, 5L,
3433L, 3244L, 2044L, 3344L, 1476L, 1158L, 457L)), .Names = c("year",
"product", "total", "partial"), row.names = c(NA, -14L), class = "data.frame")
The plan was, to plot two geom_bar layers to combine dodge and stacked. The first layer is the total amount, the second layer is the partial amount. The alpha value for the first layer is reduced to see the difference between the two layers. So far it worked.
Example:
ggplot(df, aes(x = year))+
geom_bar(aes(y = total, fill = product), alpha= 0.3, stat = "identity", position = "dodge", width = 0.3)+
geom_bar(aes(y = partial, fill = product), alpha= 1, stat = "identity", position = "dodge", width = 0.3)
Now the legend is not sufficiant. It shows the colour of fill = product and is not sensitive to the alpha value of the first layer.
My approach was to use scale_fill_manual and manually add a new lable with a new colour, which did not worked.
My idea:
ggplot(df, aes(x = year))+
geom_bar(aes(y = total, fill = product), alpha= 0.3, stat = "identity", position = "dodge", width = 0.3)+
geom_bar(aes(y = partial, fill = product), alpha= 1, stat = "identity", position = "dodge", width = 0.3)+
scale_fill_manual(name = "",
values=c("red", "black","blue"),
labels=c("a","b","test"))
Thank you for any help on my problem!
Try to use different fill values for total and partial data.
Quick and dirty solution:
ggplot(df, aes(x = year))+
geom_bar(aes(y = total, fill = factor(as.numeric(product))), alpha= 0.3, stat = "identity", position = "dodge", width = 0.3) +
geom_bar(aes(y = partial, fill = factor(as.numeric(product) * 3)), alpha= 1, stat = "identity", position = "dodge", width = 0.3) +
scale_fill_manual(name = "", values=c("red", "black","blue", "green"), labels=c("A","B","Partial A", "Partial B"))
Tested on
R x64 3.2.2

Resources