I have hourly temperature data from 1970 to 2021. I would like to summarize the dataset into a graph in ggplot showing the mean temperature of each month and year. And I also want to show the average temperature and temperature volatility increased. And I would like to show average temperature in the '2010s is higher than in the '1990s.
Here is the code of temperature density plot in 2010s and 1990s. But I don't know how to set the x axis.
data_re <- data
data_re$Year910 <- ifelse(data$Year %in% c(1990:1999), "1990s",
ifelse(data$Year %in% c(2010:2020), "2010s",NA))
data_re1 <- subset(data_re, Year910 %in% c("1990s","2010s"))
data_re1$Year910 <- factor(data_re1$Year910)
ggplot(data_re1, aes(x=, color=Year910)) +
xlab('Temperature')+
geom_density()
Here is the data file.
https://docs.google.com/spreadsheets/d/1HwPFJ1wKMMr0845Et60tV36WIWXM66Ig/edit?usp=sharing&ouid=111186166036061320361&rtpof=true&sd=true
Any advice on how to best go about this would be greatly appreciated.
I used lubridate library to convert the character to Date.
df = df %>%
pivot_longer(!c(Date,Month,Year), names_to = "HourNo", values_to = "Temp")
df$Date = as.Date(df$Date, format = "%Y-%m-%d")
df = na.omit(df)
df$Abbr = paste(month.abb[month(df$Date)], "-", year(df$Date), sep="")
df_Aggr = df %>%
group_by(Abbr) %>%
summarise(Avg = mean(Temp),
Begin = min(Date))
ggplot(df_Aggr, aes(x =Begin, y=Avg))+geom_line(color="blue", size=1)+
scale_x_date(date_breaks = "24 month", date_labels = "%b-%Y")+
theme(axis.text.x = element_text(vjust = 0.8,
angle = 35, hjust = 0.9))+ggtitle("Average Temperature")
I would first of all ensure your data is in the correct format after importing it. I am assuming you have already loaded your data frame from Excel and named it df:
df <- df[1:624,]
df[2:28] <- lapply(df[2:28], as.numeric)
df$Date <- as.POSIXct(df$Date)
Now we can load a couple of useful packages:
library(tidyverse)
library(geomtextpath)
Then we could summarize and plot with something like the following:
df %>%
rowwise() %>%
mutate(Temp = sum(across(starts_with('Hour')), na.rm = TRUE) / 24) %>%
select(-starts_with('Hour')) %>%
filter(Date > as.POSIXct('1989-12-31')) %>%
group_by(Year) %>%
summarize(Temp = mean(Temp), Date = median(Date)) %>% {
ggplot(., aes(Date, Temp)) +
geom_line(color = 'gray') +
geom_point(color = 'gray75') +
geom_textsegment(aes(x = as.POSIXct('1990-01-01'),
xend = as.POSIXct('1999-12-31'),
y = mean(Temp), yend = mean(Temp), color = '1990s',
label = '1990s'), vjust = -0.2, size = 6,
data = .[.$Date < as.POSIXct('2000-01-01'),], linetype = 2) +
geom_textsegment(aes(x = as.POSIXct('2000-01-01'),
xend = as.POSIXct('2009-12-31'),
y = mean(Temp), yend = mean(Temp), color = '2000s',
label = '2000s'), vjust = -0.2, size = 6,
data = .[.$Date < as.POSIXct('2010-01-01') &
.$Date > as.POSIXct('1999-12-31'),], linetype = 2) +
geom_textsegment(aes(x = as.POSIXct('2010-01-01'),
xend = as.POSIXct('2019-12-31'),
y = mean(Temp), yend = mean(Temp), color = '2010s',
label = '2010s'), vjust = -0.2, size = 6,
data = .[.$Date < as.POSIXct('2020-01-01') &
.$Date > as.POSIXct('2009-12-31'),], linetype = 2) +
theme_light(base_size = 16) +
scale_color_brewer(palette = 'Set1') +
theme(legend.position = 'none') +
labs(title = 'Annual mean temperature')
Note that I have used an annual summary of the temperature here. If you use monthly temperatures, the range of temperatures becomes much larger and the message of the plot becomes much weaker:
df %>%
rowwise() %>%
mutate(Temp = sum(across(starts_with('Hour')), na.rm = TRUE) / 24) %>%
select(-starts_with('Hour')) %>%
filter(Date > as.POSIXct('1989-12-31')) %>% {
ggplot(., aes(Date, Temp)) +
geom_line(color = 'gray') +
geom_point(color = 'gray75') +
geom_textsegment(aes(x = as.POSIXct('1990-01-01'),
xend = as.POSIXct('1999-12-31'),
y = mean(Temp), yend = mean(Temp), color = '1990s',
label = '1990s'), vjust = -0.2, size = 6,
data = .[.$Date < as.POSIXct('2000-01-01'),], linetype = 2) +
geom_textsegment(aes(x = as.POSIXct('2000-01-01'),
xend = as.POSIXct('2009-12-31'),
y = mean(Temp), yend = mean(Temp), color = '2000s',
label = '2000s'), vjust = -0.2, size = 6,
data = .[.$Date < as.POSIXct('2010-01-01') &
.$Date > as.POSIXct('1999-12-31'),], linetype = 2) +
geom_textsegment(aes(x = as.POSIXct('2010-01-01'),
xend = as.POSIXct('2019-12-31'),
y = mean(Temp), yend = mean(Temp), color = '2010s',
label = '2010s'), vjust = -0.2, size = 6,
data = .[.$Date < as.POSIXct('2020-01-01') &
.$Date > as.POSIXct('2009-12-31'),], linetype = 2) +
theme_light(base_size = 16) +
scale_color_brewer(palette = 'Set1') +
theme(legend.position = 'none') +
labs(title = 'Annual mean temperature')
}
Related
I have a population and two samples. One sample is a random sample and another is proportional to the population. I want to create plots that show how proportional the samples are by some factors in this case year and gear. I want a plot that has a circle and cross the same size when they sample is proportional to population for given factors and if the cross is larger than the circle this indicates the sample sample more relative to the population and vice versa. See below for my code.
library(ggplot2)
library(dplyr)
library(tidyr)
n_y = 5
min_year = 1900
years = min_year:(min_year + n_y - 1)
fixed_gear_catch = rlnorm(n = n_y, meanlog = log(2500), 0.5)
trawl_gear_catch = rlnorm(n = n_y, meanlog = log(1200), 0.3)
fixed_gear_obs = rlnorm(n = n_y, meanlog = log(250), 0.5)
trawl_gear_obs = rlnorm(n = n_y, meanlog = log(120), 0.3)
population_df = data.frame(fixed_gear = fixed_gear_catch,
trawl_gear = trawl_gear_catch,
years = years)
sample_data = data.frame(fixed_gear = fixed_gear_obs,
trawl_gear = trawl_gear_obs,
years = years)
proportional_sample_data = data.frame(fixed_gear = fixed_gear_catch * 0.2,
trawl_gear = trawl_gear_catch * 0.2,
years = years)
population_df = population_df %>% gather("gear", "catch", -years)
sample_data = sample_data %>% gather("gear", "catch", -years)
proportional_sample_data = proportional_sample_data %>% gather("gear", "catch", -years)
## give type and merge
population_df$type = "Catch"
sample_data$type = "Observed"
proportional_sample_data$type = "Observed"
full_df = rbind(population_df, sample_data)
full_proportional_df = rbind(population_df, proportional_sample_data)
## convert to proporitons
full_df = full_df %>% group_by(years, type) %>% mutate(percentage = catch / sum(catch) * 100)
full_proportional_df = full_proportional_df %>% group_by(years, type) %>% mutate(percentage = catch / sum(catch) * 100)
## check the perfect proportions are what we think they should be
full_proportional_df %>% pivot_wider(id_cols = years, values_from = percentage, names_from = c(gear, type))
full_df %>% pivot_wider(id_cols = years, values_from = percentage, names_from = c(gear, type))
## plot with circle and crosses
shpe_manual = c("Catch" = 1, "Observed" = 3)
col_manual = c("Catch" = "red", "Observed" = "blue")
ggplot(full_df, aes(x = gear, y = years)) +
geom_point(aes(shape = type, col = type, size = percentage)) +
scale_shape_manual(values=shpe_manual) +
scale_size_continuous(limits = c(0,100), range = c(0,15)) +
scale_color_manual(values = col_manual)
## this should have perfec sized circles and crosses but doesn't
ggplot(full_proportional_df, aes(x = gear, y = years)) +
geom_point(aes(shape = type, col = type, size = percentage)) +
scale_shape_manual(values=shpe_manual) +
scale_size_continuous(limits = c(0,100), range = c(0,15)) +
scale_color_manual(values = col_manual)
The cross is naturally 2x as tall/wide, so I think this fixes it visually:
ggplot(full_df, aes(x = gear, y = years)) +
geom_point(aes(shape = type, col = type, size = percentage * if_else(type == "Observed", 0.5, 1))) +
scale_shape_manual(values=shpe_manual) +
# scale_size_continuous(limits = c(0,100), range = c(0,15)) +
scale_color_manual(values = col_manual) +
scale_size_area(max_size = 15)
As a check:
ggplot(full_proportional_df, aes(x = gear, y = years)) +
geom_point(aes(shape = type, col = type,
size = percentage * if_else(type == "Observed", 0.5, 1))) +
scale_shape_manual(values=shpe_manual) +
scale_size_continuous(limits = c(0,100), range = c(0,15)) +
scale_color_manual(values = col_manual)
I have below ggplot:
library(ggplot2)
data = rbind(data.frame('val' = c(10, 30, 15), 'name' = c('A', 'B', 'C'), group = 'gr1'), data.frame('val' = c(30, 40, 12), 'name' = c('A', 'B', 'C'), group = 'gr2'))
ggplot(data, # Draw barplot with grouping & stacking
aes(x = group,
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1)
With this, I am getting below plot
However, I want to connect these bars with a curved area where the area would be equal to the value of the corresponding bar-component. A close example could be like,
Is there any way to achieve this with ggplot?
Any pointer will be very helpful.
This is something like an alluvial plot. There are various extension packages that could help you create such a plot, but it is possible to do it in ggplot directly using a bit of data manipulation.
library(tidyverse)
alluvia <- data %>%
group_by(name) %>%
summarize(x = seq(1, 2, 0.01),
val = pnorm(x, 1.5, 0.15) * diff(val) + first(val))
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = 1:2, labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)
EDIT
A more generalized solution for more than 2 groups would be
library(tidyverse)
alluvia <- data %>%
mutate(group = as.numeric(factor(group)),
name = factor(name)) %>%
arrange(group) %>%
group_by(name) %>%
mutate(next_group = lead(group),
next_val = lead(val)) %>%
filter(!is.na(next_val)) %>%
group_by(name, group) %>%
summarise(x = seq(group + 0.01, next_group - 0.01, 0.01),
val = (next_val - val) * pnorm(x, group + 0.5, 0.15) + val)
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = seq(length(unique(data$group))),
labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)
How can I do to insert in the "X axis" the months abbreviations ("xi") instead of the numbers?
I need to switch in the X axis the numbers for months abbreviations ("xi").
Reproductive example
library(ggplot2)
library(dplyr)
x<-c("2014-06","2014-07","2014-08","2014-09","2014-10","2014-11","2014-12")
xi<-c("Jun","Jul","Aug","Sep","Oct","Nov","Dez")
values.observed<-c(3.698,2.132,2.716,4.279,3.918,4.493,4.265)
values.estimated<-c(2.670,2.689,3.078,3.735,3.963,4.238,4.315)
yii<-c(0.629,1.394,1.957,2.677,2.913,3.190,3.299)
yiii<-c(4.567,3.982,4.185,4.785,4.996,5.279,5.349)
df<-data.frame(x,xi,values.observed,values.estimated,yii,yiii)
Year <- seq(min(as.integer(df$x)), max(as.integer(df$x)), by = 1)
df %>%
mutate(x = as.integer(x)) %>%
tidyr::pivot_longer(
cols = starts_with('values'),
names_to = 'group',
values_to = 'values'
) %>%
mutate(group = ifelse(group == "values.observed", "observed", "estimated")) %>%
ggplot(aes(x = x, y = values)) +
geom_line(aes(color = group), size=1.3) +
geom_ribbon(aes(ymin = yii, ymax = yiii), alpha = 0.3, show.legend = FALSE) +
scale_color_manual(values = c(observed = 'green', estimated = 'red'))+
scale_x_continuous(breaks = Year, labels = Year) +
ylab("X") +
xlab("Months") +
theme(axis.text.x = element_text(angle = -15, vjust = 0))
You can group the first geom_line with group and force the second geom_ribbon to take use as.numeric(xi) :
df$xi = factor(df$xi,levels=df$xi)
df %>%
tidyr::pivot_longer(
cols = starts_with('values'),
names_to = 'group',
values_to = 'values'
) %>%
mutate(group = ifelse(group == "values.observed", "observed", "estimated")) %>%
ggplot() +
geom_line(aes(x = xi, y = values,color = group,group = group), size=1.3) +
geom_ribbon(aes(x = as.numeric(xi),y = values,
ymin = yii, ymax = yiii), alpha = 0.3, show.legend = FALSE) +
scale_color_manual(values = c(observed = 'green', estimated = 'red'))+
ylab("X") +
xlab("Months") +
theme(axis.text.x = element_text(angle = -15, vjust = 0))
Or with what you have done, just provide the labels:
labels = split(as.character(df$xi),as.integer(df$xi))
df %>%
mutate(x = as.integer(x)) %>%
tidyr::pivot_longer(
cols = starts_with('values'),
names_to = 'group',
values_to = 'values'
) %>%
mutate(group = ifelse(group == "values.observed", "observed", "estimated")) %>%
ggplot(aes(x = x, y = values)) +
geom_line(aes(color = group), size=1.3) +
geom_ribbon(aes(ymin = yii, ymax = yiii), alpha = 0.3, show.legend = FALSE) +
scale_color_manual(values = c(observed = 'green', estimated = 'red'))+
scale_x_continuous(breaks = as.numeric(names(labels)), labels = labels) +
ylab("X") +
xlab("Months") +
theme(axis.text.x = element_text(angle = -15, vjust = 0))
I want text labels were above or under of bar cap depending on where is more space for them. Now it's always down which is not always looks good:
Here is my code:
library(tidyr)
library(ggplot2)
library(dplyr)
library(stringr)
library(purrr)
numa.nodes <- tibble (
numa_name = c("numa_01","numa_01","numa_01","numa_01","numa_01","numa_01","numa_02","numa_02","numa_02","numa_02"),
counter_name =c("cpu01","cpu02","cpu03","cpu04","memory_used","memory_total","cpu01","cpu02","memory_used","memory_total"),
value = c(sample(0:100,4), sample(0:32,1), 32, sample(0:100,1), sample(0:100,1), sample(0:128,1), 128)
)
numa.nodes <- numa.nodes %>% add_row(
numa_name = c("numa_03","numa_03","numa_03","numa_03","numa_03","numa_03","numa_04","numa_04","numa_04","numa_04"),
counter_name =c("cpu01","cpu02","cpu03","cpu04","memory_used","memory_total","cpu01","cpu02","memory_used","memory_total"),
value = c(sample(0:100,4), sample(0:32,1), 32, sample(0:100,1), sample(0:100,1), sample(0:128,1), 128)
)
numa.nodes <- numa.nodes %>% add_row(
numa_name = c("numa_05","numa_05","numa_05","numa_05","numa_05","numa_05","numa_05"),
counter_name =c("cpu01","cpu02","cpu03","cpu04","cpu05","memory_used","memory_total"),
value = c(sample(1:100,5), sample(1:64,1), 64)
)
numa.nodes <- numa.nodes %>% mutate(counter_name=factor(counter_name,levels = unique(counter_name),ordered = T))
memory_columns <- numa.nodes %>% filter(counter_name=='memory_total')
memory_y_scale <- max(memory_columns$value, na.rm = TRUE) + 6
plot_numa = function(num){
df = numa.nodes %>% filter(str_detect(numa_name, num))
cpu_plot = df %>%
filter(str_detect(counter_name, "cpu")) %>%
ggplot(aes(x = counter_name)) +
geom_col(aes(y = 100), fill = "white", color = "black") +
geom_col(aes(y = value), fill = "#00AFBB", color = "black") +
geom_text(aes(y = value, label = paste0(value,"%")), nudge_y = 5, color = "black") +
theme_bw() +
labs(x = "CPU", y = "")
memory_plot = df %>%
filter(str_detect(counter_name, "memory")) %>%
pivot_wider(names_from = counter_name, values_from = value) %>%
ggplot(aes(x = "") ) +
geom_col(aes(y = memory_total), fill = "white", color = "black") +
geom_col(aes(y = memory_used), fill = "#FC4E07", color = "black") +
geom_text(aes(label = paste(memory_total, "GB"), y = memory_total), nudge_y = 5, color = "black") +
geom_text(aes(label = paste(memory_used, "GB"), y = memory_used), nudge_y = -3, color = "black") +
theme_bw() +
ylim(0, memory_y_scale) +
labs(x = "Memory", y = "")
ggpubr::ggarrange(cpu_plot, memory_plot, ncol = 2) %>% ggpubr::annotate_figure(top = paste("NUMA",num))
}
numa_numbers <- unique(numa.nodes$numa_name) %>% str_remove ("numa_")
ggpubr::ggarrange(plotlist = map(.x = numa_numbers, .f = ~plot_numa(num = .x)))
I tried to change this line:
geom_text(aes(label = paste(memory_used, "GB"), y = memory_used), nudge_y = -3, color = "black")
to something like that:
geom_text(aes(label = paste(memory_used, "GB"), y = memory_used),nudge_y = ifelse( (memory_total-memory_used) > 10, 5, -3)
, color = "black")
But I've got an error:
Error in ifelse((memory_total - memory_used) > 10, 5, -3) :
object 'memory_total' not found
Is there a better way to print labels optimal way?
What am I doing wrong?
How to change color of label to more contrast ie black on white, white on red?
Think of it this way: The nudge value will be different (potentially) for every observation in your data frame. That means that this is something that should be handled within aes(), where stuff is designed to change with your data, rather than nudge_y, which is designed to be a constant (and complains if used otherwise).
So, the solution is to do away entirely with nudge_y and build your ifelse() statement directly into aes(y=...).
In this case, here's the replacement for that particular geom_text() line:
# to see the same plot posted here, put this at the top of your code
set.seed(7331)
...
# plot code...
... +
geom_text(aes(
label = paste(memory_used, "GB"),
y = ifelse((memory_total-memory_used > 10), memory_used + 5, memory_used - 3)),
color = "black") +
My goal is to produce labels with commas, but no decimals. Let's say I have a ggplot with the following section:
geom_text(aes(y = var,
label = scales::comma(round(var))), hjust = 0, nudge_y = 300 )
This is almost what I need. It gives me the commas, but has a decimal. I have seen here (axis labels with comma but no decimals ggplot) that comma_format() could be good, but I think the label in my case needs a data argument, which comma_format() does not take. What can I do?
Update:
As an example of when this problem occurs, see the following, which uses gganimate and has a lot more going on. Code derived from Jon Spring's answer at Animated sorted bar chart with bars overtaking each other
library(gapminder)
library(gganimate)
library(tidyverse)
gap_smoother <- gapminder %>%
filter(continent == "Asia") %>%
group_by(country) %>%
complete(year = full_seq(year, 1)) %>%
mutate(gdpPercap = spline(x = year, y = gdpPercap, xout = year)$y) %>%
group_by(year) %>%
mutate(rank = min_rank(-gdpPercap) * 1) %>%
ungroup() %>%
group_by(country) %>%
complete(year = full_seq(year, .5)) %>%
mutate(gdpPercap = spline(x = year, y = gdpPercap, xout = year)$y) %>%
mutate(rank = approx(x = year, y = rank, xout = year)$y) %>%
ungroup() %>%
arrange(country,year)
gap_smoother2 <- gap_smoother %>% filter(year<=2007 & year>=1999)
gap_smoother3 <- gap_smoother2 %<>% filter(rank<=8)
p <- ggplot(gap_smoother3, aes(rank, group = country,
fill = as.factor(country), color = as.factor(country))) +
geom_tile(aes(y = gdpPercap/2,
height = gdpPercap,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = 0, label = paste(country, " ")), vjust = 0.2, hjust = 1) +
geom_text(aes(y = gdpPercap,
label = scales::comma(round(gdpPercap))), hjust = 0, nudge_y = 300 ) +
coord_flip(clip = "off", expand = FALSE) +
scale_x_reverse() +
guides(color = FALSE, fill = FALSE) +
labs(title='{closest_state %>% as.numeric %>% floor}',
x = "", y = "GFP per capita") +
theme(plot.title = element_text(hjust = 0, size = 22),
axis.ticks.y = element_blank(), # These relate to the axes post-flip
axis.text.y = element_blank(), # These relate to the axes post-flip
plot.margin = margin(1,1,1,4, "cm")) +
transition_states(year, transition_length = 1, state_length = 0) +
enter_grow() +
exit_shrink() +
ease_aes('linear')
animate(p, fps = 2, duration = 5, width = 600, height = 500)
In addition to the solution provided by #drf, you need to add scale_y_continuous(scales::comma) to your ggplot commands. But put it before the coord_flip function.
p <- ggplot(gap_smoother3, aes(rank, group = country,
fill = as.factor(country), color = as.factor(country))) +
geom_tile(aes(y = gdpPercap/2,
height = gdpPercap,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = gdpPercap,
label = scales::comma(round(gdpPercap), accuracy=1)),
hjust = 0, nudge_y = 300 ) +
scale_y_continuous(labels = scales::comma) +
... etc.