grid.arrange - make grid higher / extend - r

I would kindly like to ask for your help. I am trying to extend the graph grid below in order to fully display the three graphs. Everywhere I looked, I couldn't find any clues how to resize the actual grid, not each of the plots within them.
This is the code I am using:
p1 <- df %>%
filter(df$economic_area == "Poorer areas") %>%
group_by(cand_nm) %>%
summarise(sum = sum(contb_receipt_amt)) %>%
arrange(desc(sum)) %>%
ggplot(aes(x = reorder(cand_nm, -sum), y=sum)) + geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 55, hjust = 1)) +
scale_y_continuous(labels = comma) +
labs(x='Candidate',
y='Recieved donations in USD',
title='Donations per candidate from poorer areas')
p2 <- df %>%
filter(df$economic_area == "Middle income areas") %>%
group_by(cand_nm) %>%
summarise(sum = sum(contb_receipt_amt)) %>%
arrange(desc(sum)) %>%
ggplot(aes(x = reorder(cand_nm, -sum), y=sum)) + geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 55, hjust = 1)) +
scale_y_continuous(labels = comma) +
labs(x='Candidate',
y='Recieved donations in USD',
title='Donations per candidate from middle income areas')
p3 <- df %>%
filter(df$economic_area == "Upper class areas") %>%
group_by(cand_nm) %>%
summarise(sum = sum(contb_receipt_amt)) %>%
arrange(desc(sum)) %>%
ggplot(aes(x = reorder(cand_nm, -sum), y=sum)) + geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 55, hjust = 1)) +
scale_y_continuous(labels = comma) +
labs(x='Candidate',
y='Recieved donations in USD',
title='Donations per candidate from upper class areas')
grid.arrange(p1, p2, p3, nrow = 3, heights = c(10, 0.65, 0.65))
Thanks a lot for your help and best regards,
Trgovec
edit:
I have tried using facets as well, but it does not work.
p <- df %>%
group_by(cand_nm) %>%
summarise(sum = sum(contb_receipt_amt)) %>%
arrange(desc(sum)) %>%
ggplot(aes(x = reorder(cand_nm, -sum), y=sum)) + geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 55, hjust = 1)) +
scale_y_continuous(labels = comma) +
labs(x='Candidate',
y='Recieved donations in USD',
title='Recieved donations per candidate')
p + facet_grid(. ~ df$Median_household_income)
this is the error:
Error in `$<-.data.frame`(`*tmp*`, "PANEL", value = c(536L, 1279L, 380L, :
replacement has 618217 rows, data has 25
EDIT:
here is the reproducable dataset:
> dput(test)
structure(list(cand_nm = structure(c(4L, 4L, 20L, 23L, 1L, 20L,
19L, 5L, 23L, 1L, 12L, 3L, 19L, 2L, 5L, 8L, 16L, 6L, 12L, 3L,
4L, 15L, 24L, 22L, 11L, 2L, 17L, 6L, 9L, 17L, 11L, 19L, 8L, 16L,
22L, 20L, 13L, 24L, 5L, 23L, 15L, 25L, 9L, 14L, 9L, 10L, 21L,
13L, 1L, 10L, 18L, 12L, 21L, 14L, 2L, 7L, 17L, 16L, 25L, 22L,
3L, 8L, 15L, 18L, 6L, 24L, 7L, 11L), .Label = c("Bush, Jeb",
"Carson, Benjamin S.", "Christie, Christopher J.", "Clinton, Hillary Rodham",
"Cruz, Rafael Edward 'Ted'", "Fiorina, Carly", "Gilmore, James S III",
"Graham, Lindsey O.", "Huckabee, Mike", "Jindal, Bobby", "Johnson, Gary",
"Kasich, John R.", "Lessig, Lawrence", "McMullin, Evan", "O'Malley, Martin Joseph",
"Pataki, George E.", "Paul, Rand", "Perry, James R. (Rick)",
"Rubio, Marco", "Sanders, Bernard", "Santorum, Richard J.", "Stein, Jill",
"Trump, Donald J.", "Walker, Scott", "Webb, James Henry Jr."), class = "factor"),
economic_area = structure(c(2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L,
3L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 3L, 1L, 2L, 2L,
2L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 3L, 1L, 2L, 3L,
1L, 1L, 3L, 2L, 3L, 2L, 1L, 2L, 2L, 3L, 1L, 3L, 2L, 1L, 3L,
3L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 1L
), .Label = c("Poorer areas", "Middle income areas", "Upper class areas"
), class = "factor"), sum = c(125925526.85, 16410009.07,
5622760.67, 4233067.76, 2551429, 1985456.25, 1626777.31,
1154448.33, 1104488.41, 860788, 621133.75, 587637, 566709.69,
436575.16, 358550.75, 271350.07, 227328.63, 218724.23, 211130.5,
211075, 197447.84, 195618, 192773, 191260.1, 167872.43, 161952.1,
141996.17, 121297.5, 92806, 80349.55, 70725.29, 66755, 63672,
62400, 58971, 48382.4, 48291.56, 44000, 43426.5, 38671.22,
37890, 29900, 28964, 21753, 20750, 19626.42, 16655.1, 15980.75,
15021, 12550, 12250, 12150, 9900, 8434.5, 6092, 5800, 5450,
4425.66, 4050, 2936, 2700, 2700, 2700, 1750, 1000, 1000,
600, 575)), row.names = c(NA, -68L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = "cand_nm", drop = TRUE, .Names = c("cand_nm",
"economic_area", "sum"), indices = list(c(4L, 9L, 48L), c(13L,
25L, 54L), c(11L, 19L, 60L), c(0L, 1L, 20L), c(7L, 14L, 38L),
c(17L, 27L, 64L), c(55L, 66L), c(15L, 32L, 61L), c(28L, 42L,
44L), c(45L, 49L), c(24L, 30L, 67L), c(10L, 18L, 51L), c(36L,
47L), c(43L, 53L), c(21L, 40L, 62L), c(16L, 33L, 57L), c(26L,
29L, 56L), c(50L, 63L), c(6L, 12L, 31L), c(2L, 5L, 35L),
c(46L, 52L), c(23L, 34L, 59L), c(3L, 8L, 39L), c(22L, 37L,
65L), c(41L, 58L)), group_sizes = c(3L, 3L, 3L, 3L, 3L, 3L,
2L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L,
3L, 3L, 2L), biggest_group_size = 3L, labels = structure(list(
cand_nm = structure(1:25, .Label = c("Bush, Jeb", "Carson, Benjamin S.",
"Christie, Christopher J.", "Clinton, Hillary Rodham", "Cruz, Rafael Edward 'Ted'",
"Fiorina, Carly", "Gilmore, James S III", "Graham, Lindsey O.",
"Huckabee, Mike", "Jindal, Bobby", "Johnson, Gary", "Kasich, John R.",
"Lessig, Lawrence", "McMullin, Evan", "O'Malley, Martin Joseph",
"Pataki, George E.", "Paul, Rand", "Perry, James R. (Rick)",
"Rubio, Marco", "Sanders, Bernard", "Santorum, Richard J.",
"Stein, Jill", "Trump, Donald J.", "Walker, Scott", "Webb, James Henry Jr."
), class = "factor")), row.names = c(NA, -25L), class = "data.frame", vars = "cand_nm", drop = TRUE, .Names = "cand_nm"))
I would basically like to combine these three graphs into one extended graph grid / page:
1)
2)
3)

Related

Plot Y values against the time grouped by an ID

I want make a time series plot grouped by ID. My dataset has 42 different IDs with 7 different timeframes. The timeframe varies per ID and ranges from 9/2016 to 8/2018. I.e., ID1 can start 10/2016 and end 7/2017 (with 7 rows containing a different date) and ID40 can start 11/2016 and ends 6/2018 (also with 7 rows containing a different date). I try to plot this with the following code
p <- ggplot(data = df6, aes(x = START, y = AI, col = ID, group = ID))
p + geom_point(size = 1.2,
alpha = .8) + stat_smooth(aes(group = 1)) + stat_summary(aes(group = 1), geom =
"point", fun.y = mean,
shape = 17, size = 3) + theme_minimal() + theme(axis.text.x = element_text(angle =
90, vjust = 0.5, hjust=1))
This gives me the following graph:
As one can see the X-axis is not chronological. I should start at 09/2016 and end at 08/2018 and then correspond with the Y value based on the ID. I got the following dataset:
structure(list(ID = c("ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID10", "ID10", "ID10", "ID10", "ID10", "ID10", "ID10",
"ID11", "ID11", "ID11", "ID11", "ID11", "ID12"), Time = c("1",
"2", "3", "4", "5", "6", "7", "1", "2", "3", "4", "5", "6", "7",
"1", "2", "3", "4", "5", "1"), AI = c(0.393672183448241, 0.4876954603533,
0.411717908455957, 0.309769862660288, 0.149826889496538, 0.2448558592586,
0.123606753324621, 0.296109333767922, 0.309960002123076, 0.445886231347992,
0.370013553008003, 0.393414429902431, 0.318940511323733, 0.131112361225666,
0.31961673567578, 0.227268892979164, 0.433471105477564, 0.207184572401005,
0.144257239122978, 0.520204263001733), AI_VAR = c(0.154977788020905,
0.237846862049217, 0.169511636143347, 0.0959573678125739, 0.0224480968162077,
0.0599543918132674, 0.0152786294674538, 0.0876807375444826, 0.0960752029161373,
0.198814531305715, 0.136910029409606, 0.154774913655455, 0.101723049763444,
0.0171904512661696, 0.102154857724042, 0.0516511497159746, 0.187897199283942,
0.0429254470409874, 0.020810151039384, 0.270612475245176), activity = c(0,
0.303472222222222, 0.232638888888889, 0.228472222222222, 0.348611111111111,
0.215972222222222, 0.123611111111111, 0.357638888888889, 0.235416666666667,
0.233333333333333, 0.2875, 0.353472222222222, 0.356944444444444,
0.149305555555556, 0.448611111111111, 0.213888888888889, 0.248611111111111,
0.288888888888889, 0.25625, 0.238888888888889), ZIM_SD = c(0,
0.148002025121106, 0.095781596758851, 0.0707738088994687, 0.0522313184217097,
0.0528820640482116, 0.0152791681192935, 0.105900213118389, 0.0729697504998075,
0.104040120647865, 0.106378896489801, 0.139061072791901, 0.113844043625277,
0.0195758039329988, 0.143383618921218, 0.0486102909983211, 0.107765733167339,
0.059853320915846, 0.036965917525263, 0.124271018383747), ZIM_VAR = c(0,
0.0721799157746582, 0.039434998686126, 0.0219235930627339, 0.00782565597342798,
0.0129484832318932, 0.00188860836472692, 0.0313580415523671,
0.0226177040198407, 0.0463900573046668, 0.0393616334552618, 0.0547086326740462,
0.0363094774850072, 0.00256662987654616, 0.0458278042289798,
0.0110476070225835, 0.0467133314886466, 0.0124006847007297, 0.00533260120384214,
0.0646463135307921), CHECK = c(10L, 13L, 11L, 7L, 7L, 5L, 4L,
36L, 36L, 34L, 34L, 32L, 29L, 21L, 28L, 27L, 26L, 25L, 21L, 36L
), BULBAR = c(2L, 4L, 4L, 4L, 4L, 2L, 2L, 9L, 9L, 9L, 9L, 9L,
7L, 6L, 12L, 12L, 11L, 11L, 11L, 11L), FINE = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 9L, 9L, 8L, 8L, 7L, 6L, 4L, 2L, 1L, 1L, 1L, 0L, 7L
), GROSS = c(2L, 2L, 2L, 2L, 2L, 2L, 1L, 9L, 9L, 9L, 9L, 8L,
8L, 6L, 3L, 3L, 3L, 3L, 2L, 6L), RESPI = c(6L, 7L, 5L, 1L, 1L,
1L, 1L, 9L, 9L, 8L, 8L, 8L, 8L, 5L, 11L, 11L, 11L, 10L, 8L, 12L
), GROSS_RENEWD = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 6L, 6L, 6L,
5L, 5L, 4L, 3L, 3L, 3L, 3L, 2L, 3L), ACTIVE = c(2L, 2L, 2L, 2L,
2L, 2L, 1L, 18L, 18L, 17L, 17L, 15L, 14L, 10L, 5L, 4L, 4L, 4L,
2L, 13L), NON.ACTIVE = c(8L, 11L, 9L, 5L, 5L, 3L, 3L, 18L, 18L,
17L, 17L, 17L, 15L, 11L, 23L, 23L, 22L, 21L, 19L, 23L), START = c("09/2016",
"11/2016", "01/2017", "04/2017", "06/2017", "10/2017", "02/2018",
"10/2016", "12/2016", "02/2017", "04/2017", "07/2017", "11/2017",
"04/2018", "10/2016", "12/2016", "02/2017", "04/2017", "07/2017",
"10/2016"), STOP = c("10/2016", "11/2016", "01/2017", "04/2017",
"06/2017", "10/2017", "03/2018", "10/2016", "12/2016", "02/2017",
"04/2017", "07/2017", "11/2017", "04/2018", "10/2016", "12/2016",
"02/2017", "04/2017", "07/2017", "10/2016")), row.names = c(NA,
20L), class = "data.frame")
In general I want the column START to start with the begin date and end with the last date when it is plotted
You should convert your "START" column to a date format. You could use the package zoo with the function as.yearmon for that. To start the axis with your start date and end it with the end date, you could create a vector of date breaks using the min (start) date and max (end) date. Here is a reproducible example:
library(ggplot2)
library(zoo)
library(dplyr)
df6 <- df6 %>%
mutate(START = as.Date(as.yearmon(START, format = '%m/%Y')))
breaks.vec <- c(min(df6$START),
seq(from=min(df6$START), to=max(df6$START), by = 'month'))
ggplot(data = df6, aes(x = START, y = AI, col = ID, group = ID)) +
geom_point(size = 1.2, alpha = .8) +
stat_smooth(aes(group = 1)) +
stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3) +
scale_x_date(breaks = breaks.vec, date_labels = "%m/%Y") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#> Warning: `fun.y` is deprecated. Use `fun` instead.
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Created on 2022-10-17 with reprex v2.0.2

Cannot plot the correct x-axis in ggplot2

I am plotting the following data using ggplot2 in R.
dat<-structure(list(Month = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L,
8L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L), grp1 = structure(c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L), .Label = c("(-Inf,2]", "(2,7]", "(7,14]",
"(14, Inf]"), class = "factor"), n = c(71L, 59L, 36L, 10L, 55L,
73L, 18L, 10L, 97L, 82L, 22L, 5L, 120L, 79L, 15L, 2L, 140L, 62L,
15L, 174L, 60L, 11L, 188L, 71L, 2L, 183L, 53L, 2L, 211L, 50L,
2L, 171L, 69L, 7L, 1L, 98L, 85L, 13L, 6L, 72L, 62L, 24L, 9L)), class
= "data.frame", row.names = c(NA,-43L))
Here's my script:
library(ggplot2)
p<-ggplot(data=test,aes(Month, n, fill = grp1))
p<- p + geom_col()
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=20,colour="black",family="sans"),
axis.title=element_text(size=20,colour="black",family="sans"),
legend.position = "right", legend.key = element_rect(fill = 'white'))
p <- p + scale_y_continuous(limits = c(0,300),breaks=c(seq(0,300,50)), expand=c(0,0))
p <- p + scale_x_discrete(breaks=c(seq(1,12,1)),labels=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"),expand=c(0,0))
p <- p + labs(x = "Month", y = "Number of Days")
Here's the output:
Why is it that I cannot plot the x-axis values?
If I don't set the scale_x_discrete, the plot will look like this:
Any ideas on how to solve this?
I'll appreciate any help.
If you want the Month name along the xaxis, then you can add in as.factor(Month) to your ggplot script. Heres an example:-
p<-ggplot(data=dat,aes(as.factor(Month), n, fill = grp1))
p<- p + geom_col()
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=20,colour="black",family="sans"),
axis.title=element_text(size=20,colour="black",family="sans"),
legend.position = "right", legend.key = element_rect(fill = 'white'))
p <- p + scale_y_continuous(limits = c(0,300),breaks=c(seq(0,300,50)), expand=c(0,0))
p <- p + scale_x_discrete(breaks=c(seq(1,12,1)),labels=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"),expand=c(0,0))
p <- p + labs(x = "Month", y = "Number of Days")
p
Which gives you this:-

Change x axis labels to hours (time) on geom_tile()

Here is a geom_tile displaying hours and days of the week, how can it made to display each hour (i.e. 00:00 through to 23:00 on the x axis)?
library(tidyverse)
df %>%
ggplot(aes(hour, day, fill = value)) +
geom_tile(colour = "ivory")
Currently it displays every fifth hour:
I have tried a bunch of different things, and would prefer a 'best practice' way (i.e. without manually generating labels), but in case labels are needed, here's one way to produce them hour_labs <- 0:23 %>% { ifelse(nchar(.) == 1, paste0("0", .), .) } %>% paste0(., ":00")
Data for reproducible example
df <- structure(list(day = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Sunday",
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
), class = c("ordered", "factor")), hour = c(0L, 2L, 3L, 5L,
6L, 7L, 8L, 10L, 11L, 12L, 13L, 18L, 21L, 22L, 23L, 0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 20L, 21L, 22L,
23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 13L, 14L, 20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 15L, 20L, 21L, 22L, 23L, 0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 13L, 14L, 15L, 16L,
19L, 21L, 0L, 1L, 2L, 3L, 7L, 8L, 10L, 13L, 14L, 22L, 23L), value = c(1L,
1L, 1L, 2L, 1L, 3L, 1L, 1L, 2L, 1L, 3L, 1L, 2L, 13L, 13L, 24L,
39L, 21L, 17L, 25L, 22L, 27L, 28L, 19L, 6L, 2L, 2L, 1L, 2L, 2L,
7L, 23L, 38L, 18L, 26L, 21L, 20L, 31L, 40L, 35L, 22L, 5L, 3L,
2L, 7L, 4L, 3L, 3L, 3L, 17L, 13L, 23L, 24L, 19L, 31L, 13L, 35L,
50L, 22L, 13L, 7L, 2L, 1L, 1L, 1L, 1L, 3L, 14L, 17L, 33L, 32L,
32L, 25L, 29L, 27L, 38L, 26L, 11L, 8L, 4L, 5L, 5L, 3L, 1L, 1L,
3L, 14L, 21L, 24L, 22L, 25L, 26L, 23L, 58L, 36L, 26L, 6L, 3L,
1L, 5L, 3L, 1L, 1L, 3L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 1L)), row.names = c(NA, -116L), groups = structure(list(day = structure(1:7, .Label = c("Sunday",
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
), class = c("ordered", "factor")), .rows = structure(list(1:15,
16:33, 34:51, 52:69, 70:88, 89:105, 106:116), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr"))), row.names = c(NA, 7L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
Here's one way using sprintf to construct labels.
library(dplyr)
library(ggplot2)
df %>%
mutate(lab = sprintf('%02d:00', hour)) %>%
ggplot() + aes(lab, day, fill = value) +
geom_tile(colour = "ivory") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
To complete the missing times apart from #Eric Watt's suggestion we can also use complete.
df %>%
mutate(lab = sprintf('%02d:00', hour)) %>%
tidyr::complete(lab = sprintf('%02d:00', 0:23)) %>%
ggplot() + aes(lab, day, fill = value) +
geom_tile(colour = "ivory") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
I would suggest making sure your data type is correctly representing your data. If your hour column is representing time in hours, then it should be a time based structure. For example:
df$hour <- as.POSIXct(as.character(df$hour), format = "%H", tz = "UTC")
Then you can tell ggplot that the x axis is a datetime variable using scale_x_datetime.
ggplot(df, aes(hour, day, fill = value)) +
geom_tile(colour = "ivory") +
scale_x_datetime(labels = date_format("%H:%M")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
If you want a break for every hour, you can input that as breaks:
ggplot(df, aes(hour, day, fill = value)) +
geom_tile(colour = "ivory") +
scale_x_datetime(breaks = as.POSIXct(as.character(0:23), format = "%H", tz = "UTC"),
labels = date_format("%H:%M")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
You can also use the scales package which has handy formatting options such as date_breaks:
library(scales)
ggplot(df, aes(hour, day, fill = value)) +
geom_tile(colour = "ivory") +
scale_x_datetime(breaks = date_breaks("1 hour"),
labels = date_format("%H:%M")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

Remove shape from legend of combined geom_line() and geom_pont()

I want to create a graph of geom_line() coloured by a variable (Var1) then plot geom_point() with shapes according to a different variable (Var2) with the same colours as geom_line().
After reading a lot about this but not being able to find anything that I could interpret as being the same issue I have attempted the following:
ggplot(data, aes(X, Y)) +
geom_line(aes(color = Var1)) +
geom_point(data = subset(data, Var2 != 0), aes(shape = Var2, colour = Var1), size = 3) +
scale_color_manual(values=c("#7CAE00", "#00BFC4", "#000000", "#C77CFF")) +
scale_x_continuous(breaks=seq(0,30,5)) +
theme_bw()
Which results in the above. The issue with this graph is that the second legend has both IDs are circles when one is a circle and one is a triangle. I would ideally like it to just be a coloured line with no shapes at all.
I've also tried this:
ggplot(data, aes(X, Y)) +
geom_line(aes(color = Var1)) +
geom_point(data = subset(data, Var2 != 0), aes(shape = Var2), size = 3) +
scale_color_manual(values=c("#7CAE00", "#00BFC4", "#000000", "#C77CFF")) +
scale_x_continuous(breaks=seq(0,30,5)) +
theme_bw()
This issue with this graph is that the shapes are not filled in by colour in the graph.
This is my data.
dput(data)
structure(list(X = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L), Y = c(1L, 1L, 1L, 2L, 4L, 13L, 18L, 19L,
21L, 24L, 34L, 43L, 70L, 90L, 129L, 169L, 1L, 3L, 3L, 3L, 3L,
4L, 21L, 79L, 157L, 229L, 323L, 470L, 655L, 889L, 1128L, 1701L,
2036L, 2502L, 3089L, 3858L, 4636L, 5883L, 7375L, 9172L, 10149L
), Var1 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("",
"ID1", "ID2"), class = "factor"), Var2 = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 4L, 2L, 2L), .Label = c("", "0", "Point1", "Point2"
), class = "factor")), row.names = c(NA, -41L), class = "data.frame")
How about this
ggplot(data, aes(X, Y))+
geom_line(aes(color = Var1)) +
geom_point(data = subset(data, Var2 != 0), aes(shape = Var2, color=Var1), size = 3) +
scale_color_manual(values=c("#7CAE00", "#00BFC4", "#000000", "#C77CFF")) +
scale_x_continuous(breaks=seq(0,30,5)) +
theme_bw()+
guides(colour = guide_legend(override.aes = list(shape = NA)))

Why does ggtern distort data

I can't work out why my data points in the ternary diagram appear distorted, particularly visible in Fe02 scale where none of the values approaching 50% seem to be plotting correctly. Does ggtern require some data transformation or am I missing something?
The dataset:
KiDaSm<-structure(list(Site = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Dakawa", "Fukuchani",
"Kilwa", "Mkokotoni", "Tumbe Chwaka", "Unguja Ukuu"), class = "factor"),
Sample = structure(c(7L, 8L, 9L, 10L, 11L, 14L, 15L, 16L,
17L, 19L, 20L, 21L, 23L, 24L, 25L, 26L), .Label = c("EB005",
"EB008", "EB009", "EB017", "EB018", "EB023", "EB028", "EB030",
"EB033", "EB034", "EB035", "EB036", "EB037", "EB038", "EB040",
"EBDAK002", "EBDAK006", "EBDAK007", "EBDAK009", "EBDAK012",
"EBDAK014", "EBDAK015", "EBDAK017", "EBDAK020", "EBDAK021",
"EBDAK022", "FKCH002", "FKCH003", "FKCH005", "FKCH006", "FKCH008",
"FKCH009", "FKCH010", "FKCH012", "FKCH014", "FKCH015", "FKCH016",
"FKCH017", "FKCH018", "FKCH019", "FKCH023", "MKK002", "MKK003",
"MKK007", "MKK009", "MKK011", "MKK013", "MKK014", "MKK017",
"MKK018", "MKK020", "MKK06", "TBCH001", "TBCH002", "TBCH003",
"TBCH005", "TBCH007", "TBCH008", "TBCH009", "TBCH010", "TBCH011",
"TBCH014", "TBCH017", "TBCH018", "TBCH021", "TBCH022", "UU001",
"UU003", "UU004", "UU005", "UU007", "UU008", "UU010", "UU011",
"UU012", "UU014", "UU018", "UU020", "UU022", "UU023", "UU026",
"UU031", "UU033"), class = "factor"), ID = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("ND", "Smelting", "Smithing"), class = "factor"),
Iron = c(52.2866002788889, 57.437955161, 55.880450631, 50.213473286,
53.068958017, 55.776340727, 56.764639409, 61.37738424, 75.741474131,
75.459980082, 69.785922113, 76.298245515, 75.860464737, 77.221978734,
76.602317775, 67.582636787), Aluminium = c(8.07348620588889,
6.9369729006, 6.4314347298, 7.7061493869, 7.3254949831, 7.2108549156,
7.2113019865, 8.2022565362, 4.570137602, 4.3668232665, 5.8538177888,
4.5660791632, 4.2671637947, 4.727287541, 4.7084385736, 6.0287010895
), Silicon = c(24.6786504477778, 22.516695383, 24.261662172,
26.81463386, 25.558654883, 23.062108874, 23.144722305, 26.480492462,
17.138349267, 16.917779397, 19.620246624, 16.265818105, 17.628059944,
15.696017597, 15.786928218, 22.04500569)), .Names = c("Site",
"Sample", "ID", "Iron", "Aluminium", "Silicon"), row.names = c(NA,
-16L), class = "data.frame")
My code:
library(ggtern)
ggtern(KiDaSm, aes(Iron,Silicon, Aluminium, color=Site, shape=Site )) + geom_point() +
labs(x = expression(FeO[2]), y=expression(SiO[2]), z=expression(Al[2]*O[3])) +
scale_color_manual(values = c("#FFC300", "#FF5733")) +
theme_bw()
Ternary diagram:

Resources