I have a dataframe with 30 columns and I would like to create 30 (gg)plots based on these columns. When creating a plot through ggplot, you have to create a variable to which all the information of the plot is added.
Is there a way how I can create 30 of such variable names in a for loop (so that I don't have to create and store them all locally?
In earlier code I repeated the below steps 30 times:
In earlier code, I had the following:
a1 = ggplot(data = results_round_one,
aes(results_round_one$`R-0,01`))
a1 = a1 + geom_histogram()
a1 = a1 + xlim(0.46, 0.55)
a1 = a1 + geom_vline(xintercept= mean(results_round_one$`R-0,01`),
col = 'blue')
a1 = a1 + geom_vline(xintercept = max(results_round_one$`R-0,01`),
col = 'red')
a1= a1 + labs(y = 'Frequency',
x= 'Validated accuracy',
title = 'Optimizer = RMSProp',
subtitle = 'Learning rate = 0.01')
However, since I only have to change the aes and the labels, I think I should be able to do this process in a for loop as well.
In absence of some example data, here is some code that would loop through iris columns, creating density plots:
library(purrr)
library(dplyr)
df <- iris %>%
select(Sepal.Length:Petal.Width)
df %>%
map2(names(df), ~ .x %>%
as.data.frame %>%
set_names(.y) %>%
ggplot(aes_string(.y)) + geom_density() + ggtitle(.y))
Using your code, something along the lines of:
results_round_one %>%
map2(names(results_round_one), ~ .x %>%
as.data.frame %>%
set_names(.y) %>%
ggplot(aes_string(.y)) +
geom_histogram() +
xlim(0.46, 0.55) +
geom_vline(xintercept = mean(.x), col = 'blue') +
geom_vline(xintercept = max(.x), col = 'red') +
labs(y = 'Frequency',
x= 'Validated accuracy',
title = 'Optimizer = RMSProp',
subtitle = 'Learning rate = 0.01'))
You could apply histogram function:
getImage <- function(col){
a1 = ggplot(data = results_round_one,
aes(results_round_one[, col])) +
geom_histogram() +
xlim(0.46, 0.55) +
geom_vline(xintercept= mean(results_round_one[, col]),
col = 'blue') +
labs(y = 'Frequency',
x= 'Validated accuracy',
title = 'Optimizer = RMSProp',
subtitle = 'Learning rate = 0.01')
return(a1)
}
to a vector of columns iteratively. In this case col_30 is a vector of column names
# e.g. col_30 = c("col1", "col2") etc.
for(col in col_30){
getImage(col)
}
This would generate different plots.
Related
desired_output_sample
I have following data:
#1. dates of 15 day frequency:
dates = seq(as.Date("2016-09-01"), as.Date("2020-07-30"), by=15) #96 times observation
#2. water content in crops corresponding to the times given.
water <- c(0.5702722, 0.5631781, 0.5560839, 0.5555985, 0.5519783, 0.5463459,
0.5511598, 0.546652, 0.5361545, 0.530012, 0.5360571, 0.5396569,
0.5683526, 0.6031535, 0.6417821, 0.671358, 0.7015542, 0.7177007,
0.7103561, 0.7036985, 0.6958607, 0.6775161, 0.6545367, 0.6380155,
0.6113306, 0.5846186, 0.5561815, 0.5251135, 0.5085149, 0.495352,
0.485819, 0.4730029, 0.4686458, 0.4616468, 0.4613918, 0.4615532,
0.4827496, 0.5149105, 0.5447824, 0.5776764, 0.6090217, 0.6297454,
0.6399422, 0.6428941, 0.6586344, 0.6507473, 0.6290631, 0.6011123,
0.5744375, 0.5313527, 0.5008027, 0.4770338, 0.4564025, 0.4464508,
0.4309046, 0.4351668, 0.4490393, 0.4701232, 0.4911582, 0.5162941,
0.5490387, 0.5737573, 0.6031149, 0.6400073, 0.6770058, 0.7048311,
0.7255012, 0.739107, 0.7338938, 0.7265202, 0.6940718, 0.6757214,
0.6460862, 0.6163091, 0.5743775, 0.5450822, 0.5057753, 0.4715266,
0.4469859, 0.4303232, 0.4187793, 0.4119401, 0.4201316, 0.426369,
0.4419331, 0.4757525, 0.5070846, 0.5248457, 0.5607567, 0.5859825,
0.6107531, 0.6201754, 0.6356589, 0.6336177, 0.6275579, 0.6214981)
I want to compute trend of the water content or moisture data corresponding to different subperiods. Lets say: one trend from 2016 - 09-01 to 2019-11-30.
and other trend from 2019-12-15 to the last date (in this case 2020-07-27).
And I want to make a plot like the one attached.
Appreciate your help. Can be in R or in python.
To draw a trend line, you can look on this tutorial
https://www.statology.org/ggplot-trendline/
Or on this stackoverflow question
Draw a trend line using ggplot
To split your dataset in two groups you simply need to do something like this (in R).
data <- data.frame(dates, water)
#This neat trick allows you to turn a logical value into a number
data$group <- 1 + (data$dates > "2019-11-30")
old <- subset(data,group == 1)
new <- subset(data,group == 2)
For the plots:
library(ggplot2)
ggplot(old,aes(x = dates, y = water)) +
geom_smooth(method = "lm", col = "blue") +
geom_point()
ggplot(new,aes(x = dates, y = water)) +
geom_smooth(method = "lm", col = "red") +
geom_point()
Here is a full-fledged example with added labels:
library(dplyr)
library(ggplot2)
dates <- seq(as.Date("2016-09-01"), as.Date("2020-07-30"), by=15)
wc <- as.numeric(strsplit("0.5702722 0.5631781 0.5560839 0.5555985 0.5519783 0.5463459 0.5511598 0.5466520 0.5361545 0.5300120 0.5360571 0.5396569 0.5683526 0.6031535 0.6417821 0.6713580 0.7015542 0.7177007 0.7103561 0.7036985 0.6958607 0.6775161 0.6545367 0.6380155 0.6113306 0.5846186 0.5561815 0.5251135 0.5085149 0.4953520 0.4858190 0.4730029 0.4686458 0.4616468 0.4613918 0.4615532 0.4827496 0.5149105 0.5447824 0.5776764 0.6090217 0.6297454 0.6399422 0.6428941 0.6586344 0.6507473 0.6290631 0.6011123 0.5744375 0.5313527 0.5008027 0.4770338 0.4564025 0.4464508 0.4309046 0.4351668 0.4490393 0.4701232 0.4911582 0.5162941 0.5490387 0.5737573 0.6031149 0.6400073 0.6770058 0.7048311 0.7255012 0.7391070 0.7338938 0.7265202 0.6940718 0.6757214 0.6460862 0.6163091 0.5743775 0.5450822 0.5057753 0.4715266 0.4469859 0.4303232 0.4187793 0.4119401 0.4201316 0.4263690 0.4419331 0.4757525 0.5070846 0.5248457 0.5607567 0.5859825 0.6107531 0.6201754 0.6356589 0.6336177 0.6275579 0.6214981", " |\\n")[[1]])
data <- data.frame(date=dates, water_content=wc) %>%
mutate(group = ifelse(date <= as.Date("2019-11-30"), "g1", "g2"))
# calculate linear regression and create labels
lmo <- data %>%
group_by(group) %>%
summarise(res=list(stats::lm(water_content ~ date, data = cur_data()))) %>%
.$res
lab <- sapply(lmo, \(x)
paste("Slope=", signif(x$coef[[2]], 5),
"\nAdj R2=", signif(summary(x)$adj.r.squared, 5),
"\nP=", signif(summary(x)$coef[2,4], 5)))
ggplot(data=data, aes(x=date, y=water_content, col=group)) +
geom_point() +
stat_smooth(geom="smooth", method="lm") +
geom_text(aes(date, y, label=lab),
data=data.frame(data %>% group_by(group) %>%
summarise(date=first(date)), y=Inf, lab=lab),
vjust=1, hjust=.2)
Created on 2022-11-23 with reprex v2.0.2
Here is a way. Create a grouping variable by dates, coerce it to factor and geom_smooth will draw the two regression lines.
suppressPackageStartupMessages({
library(ggplot2)
library(ggpubr)
})
df1 <- data.frame(dates, water)
breakpoint <- as.Date("2019-11-30")
df1$group <- factor(df1$dates > breakpoint, labels = c("before", "after"))
ggplot(df1, aes(dates, water, colour = group)) +
geom_line() +
geom_point(shape = 21, fill = 'white') +
geom_smooth(formula = y ~ x, method = lm) +
geom_vline(xintercept = breakpoint, linetype = "dotdash", linewidth = 1) +
stat_cor(label.y = c(0.43, 0.38), show.legend = FALSE) +
stat_regline_equation(label.y = c(0.45, 0.4), show.legend = FALSE) +
scale_color_manual(values = c(before = 'red', after = 'blue')) +
theme_bw(base_size = 15)
Created on 2022-11-23 with reprex v2.0.2
This question already has answers here:
Alternating color of individual dashes in a geom_line
(4 answers)
Closed 8 months ago.
I was wondering if it is possible to create a multicolored dashed line in ggplot.
Basically I have a plot displaying savings based on two packages.
A orange line with savings based on package A
A green line with savings based on package B
I also have a third line and I would like that one to be dashed alterenating between orange and green. Is that something that somebody has been able to do?
Here is an example:
library(tidyverse)
S <- seq(0, 5, by = 0.05)
a <- S ^ 2
b <- S
a_b = a + b #This data should have the dashed multicolor line, since it is the sum of the other two lines.
S <- data.frame(S)
temp <- cbind(S, a, b, a_b)
temp <- gather(temp, variable, value, -S)
desiredOrder <- c("a", "b", "a_b")
temp$variable <- factor(temp$variable, levels = desiredOrder)
temp <- temp[order(temp$variable),]
p <- ggplot(temp, aes(x = S, y = value, colour = variable)) +
theme_minimal() +
geom_line(size = 1) +
scale_color_manual(name = "Legend", values = c("orange", "green", "#0085bd"),
breaks = c("a", "b", "a_b"))
p
I basically want to have a multicolored (dashed or dotted) line for "c"
This is, to my best knowledge, currently only possible via creation of new segments for each alternate color. This is fiddly.
Below I've tried a largely programmatic approach in which you can define the size of the repeating segment (based on your x unit). The positioning of y values is slightly convoluted and it will also result in slightly irregular segment lengths when dealing with different slopes. I also haven't tested it on many data, either. But I guess it's a good start :)
For the legend, I'm taking the same approach, by creating a fake legend and stitching it onto the other plot. The challenges here include:
positioning of legend elements relative to the plot
relative distance between the legend elements
update
For a much neater way to create those segments and a Stat implementation see this thread
library(tidyverse)
library(patchwork)
S <- seq(0, 5, by = 0.05)
a <- S^2
b <- S
a_b <- a + b
df <- data.frame(x = S, a, b, a_b) %>%
pivot_longer(-x, names_to = "variable", values_to = "value")
## a function to create modifiable cuts in order to get segments.
## this looks convoluted - and it is! there are a few if/else statements.
## Why? The assigment of new y to x values depends on how many original values
## you have.
## There might be more direct ways to get there
alt_colors <- function(df, x, y, seg_length, my_cols) {
x <- df[[x]]
y <- df[[y]]
## create new x for each tiny segment
length_seg <- seg_length / length(my_cols)
new_x <- seq(min(x, na.rm = TRUE), x[length(x)], length_seg)
## now we need to interpolate y values for each new x
## This is different depending on how many x and new x you have
if (length(new_x) < length(x)) {
ind_int <- findInterval(new_x, x)
new_y <- sapply(seq_along(ind_int), function(i) {
if (y[ind_int[i]] == y[ind_int[length(ind_int)]]) {
y[ind_int[i]]
} else {
seq_y <- seq(y[ind_int[i]], y[ind_int[i] + 1], length.out = length(my_cols))
head(seq_y, -1)
}
})
} else {
ind_int <- findInterval(new_x, x)
rle_int <- rle(ind_int)
new_y <- sapply(rle_int$values, function(i) {
if (y[i] == y[max(rle_int$values)]) {
y[i]
} else {
seq_y <- seq(y[i], y[i + 1], length.out = rle_int$lengths[i] + 1)
head(seq_y, -1)
}
})
}
## THis is also a bit painful and might cause other bugs that I haven't
## discovered yet.
if (length(unlist(new_y)) < length(new_x)) {
newdat <- data.frame(
x = new_x,
y = rep_len(unlist(new_y), length.out = length(new_x))
)
} else {
newdat <- data.frame(x = new_x, y = unlist(new_y))
}
newdat <- newdat %>%
mutate(xend = lead(x), yend = lead(y)) %>%
drop_na(xend)
newdat$color <- my_cols
newdat
}
## the below is just a demonstration of how the function would work
## using different segment widths
df_alt1 <-
df %>%
filter(variable == "a_b") %>%
alt_colors("x", "value", 1, c("orange", "green"))
df_alt.5 <-
df %>%
filter(variable == "a_b") %>%
alt_colors("x", "value", .5, c("orange", "green"))
df_ab <-
df %>%
filter(variable != "a_b") %>%
# for the identity mapping
mutate(color = ifelse(variable == "a", "green", "orange"))
## create data frame for the legend, also using the alt_colors function as per above
## the amount of x is a bit of trial and error, this is just a quick hack
## this is a trick to center the legend more or less relative to the main plot
y_leg <- ceiling(mean(range(df$value, na.rm = TRUE)))
dist_y <- 2
df_legend <-
data.frame(
variable = rep(unique(df$variable), each = 2),
x = 1:2,
y = rep(seq(y_leg - dist_y, y_leg + dist_y, by = dist_y), each = 2)
)
df_leg_onecol <-
df_legend %>%
filter(variable != "a_b") %>%
mutate(color = ifelse(variable == "a", "green", "orange"))
df_leg_alt <-
df_legend %>%
filter(variable == "a_b") %>%
alt_colors("x", "y", .5, c("orange", "green"))
## I am mapping the colors globally using identity mapping (see scale_identity).
p1 <-
ggplot(mapping = aes(x, value, colour = color)) +
theme_minimal() +
geom_line(data = df_ab, size = 1) +
geom_segment(data = df_alt1, aes(y = y, xend = xend, yend = yend), size = 1) +
scale_color_identity() +
ggtitle("alternating every 1 unit")
p.5 <-
ggplot(mapping = aes(x, value, colour = color)) +
theme_minimal() +
geom_line(data = df_ab, size = 1) +
geom_segment(data = df_alt.5, aes(y = y, xend = xend, yend = yend), size = 1) +
scale_color_identity() +
ggtitle("alternating every .5 unit")
p_leg <-
ggplot(mapping = aes(x, y, colour = color)) +
theme_void() +
geom_line(data = df_leg_onecol, size = 1) +
geom_segment(data = df_leg_alt, aes(xend = xend, yend = yend), size = 1) +
scale_color_identity() +
annotate(
geom = "text", y = unique(df_legend$y), label = unique(df_legend$variable),
x = max(df_legend$x + 1), hjust = 0
)
## set y limits to the range of the main plot
## in order to make the labels visible you need to adjust the plot margin and
## turn clipping off
p1 + p.5 +
(p_leg + coord_cartesian(ylim = range(df$value), clip = "off") +
theme(plot.margin = margin(r = 20, unit = "pt"))) +
plot_layout(widths = c(1, 1, .2))
Created on 2022-01-18 by the reprex package (v2.0.1)
(Copied this over from Alternating color of individual dashes in a geom_line)
Here's a ggplot hack that is simple, but works for two colors only. It results in two lines being overlayed, one a solid line, the other a dashed line.
library(dplyr)
library(ggplot2)
library(reshape2)
# Create df
x_value <- 1:10
group1 <- c(0,1,2,3,4,5,6,7,8,9)
group2 <- c(0,2,4,6,8,10,12,14,16,18)
dat <- data.frame(x_value, group1, group2) %>%
mutate(group2_2 = group2) %>% # Duplicate the column that you want to be alternating colors
melt(id.vars = "x_value", variable.name = "group", value.name ="y_value") # Long format
# Put in your selected order
dat$group <- factor(dat$group, levels=c("group1", "group2", "group2_2"))
# Plot
ggplot(dat, aes(x=x_value, y=y_value)) +
geom_line(aes(color=group, linetype=group), size=1) +
scale_color_manual(values=c("black", "red", "black")) +
scale_linetype_manual(values=c("solid", "solid", "dashed"))
Unfortunately the legend still needs to be edited by hand. Here's the example plot.
I would like to show the mean of two groups in a scatterplot. I have sorted the data so the groups are next to each other. Group 1 is the first 11 records and group2 is the next 133. How can I tell ggplot to draw one line across the range for the first group (House 1-11) and a second line for the second (House 12-133).
Here is what I have so far:
And the code is here:
library(tidyverse)
library(tidymodels)
data(ames)
ames <- AmesHousing::make_ames()
set.seed(1)
split <- initial_split(ames, prop = 0.95, strata = "Sale_Price")
ames_plot <- testing(split)
model1 <- lm(Sale_Price ~ Central_Air, data = ames_plot)
p1 <- model1 %>%
broom::augment() %>%
arrange(Central_Air) %>%
mutate(House = row_number()) %>%
ggplot(aes(House, Sale_Price, color = Central_Air)) +
geom_point(size = 1, alpha = 0.3) +
geom_segment(aes(x = 1, y = .fitted, xend = 144, yend =.fitted)) +
scale_y_continuous(labels = scales::dollar)
p1
Using geom_smooth(formula = 'y ~ x', se = FALSE, method = "lm") instead of geom_segment() gets me close to what I want but I want to show the actual predicted values coming form the lm().
It would be best just to summarize your data for that layer. For example
model1 %>%
broom::augment() %>%
arrange(Central_Air) %>%
mutate(House = row_number()) %>%
ggplot(aes(House, Sale_Price, color = Central_Air)) +
geom_point(size = 1, alpha=.3) +
geom_segment(aes(x = first, y = .fitted, xend = last, yend =.fitted),
data = function(x) {
x %>%
group_by(Central_Air) %>%
summarize(first=first(House), last=last(House), .fitted=mean(.fitted), .groups="drop_last")
}) +
scale_y_continuous(labels = scales::dollar)
So i have a dataframe with 2 columns : "ID" and "Score"
ID contain the name of a simulation and each simulation have 58 different scores that are listed in the column Score.
There is 10 simulations.
I am doing a geom_density plot :
my_dataframe %>%
ggplot(aes(x=`Score`), xlim = c(0, 1)) +
geom_density(aes(color = ID)) +
theme_bw() +
labs(title = "Scores")
https://imgur.com/a/9DUTmWw
How can i tell ggplot that i want the curves of Simulation1 and Simulation2 to not be like the others, i want them to be in red and with an higher width than all the other one.
Thank you for your help,
Best,
Maxime
Something like this?
my_dataframe %>% mutate(group = ifelse(ID %in% c(1,2), 'special', 'NonSpecial')) %>%
ggplot(aes(x=`Score`, lty = group), xlim = c(0, 1)) +
geom_density(aes(color = ID)) +
theme_bw() +
labs(title = "Scores")
I used this data:
my_dataframe <- data.frame(ID = factor(sample(1:4, 100, T)), Score = sin(1:100))
Sample data
set.seed(123)
df <- data.frame(loc.id = rep(c(1:3), each = 4*10),
year = rep(rep(c(1980:1983), each = 10), times = 3),
day = rep(1:10, times = 3*4),
x = sample(123:200, 4*3*10, replace = T),
start = 123,
end = 200)
I want to save the plot of each loc.id for all years in a single page using facet_wrap and each loc.id in separate pages as a pdf. Following
loop does this:
loc.vec <- 1:3
pdf("my.pdf")
for(l in seq_along(loc.vec)){
loc.id <- loc.vec[l]
df.sub <- df[df$loc.id == loc.id,]
pp <- ggplot(df.sub,aes(x = day, y = x)) + geom_line() +
facet_wrap(~year) +
geom_vline(aes(xintercept = df.sub$start)) +
geom_vline(aes(xintercept = df.sub$end))
print(pp)
}
dev.off()
Can I achieve without the loop?
Thanks
Here is a solution using purrr:
library(tidyverse)
f_plot <- function(id) {
df %>%
filter(loc.id == id) %>%
ggplot(., aes(x = day, y = x)) +
geom_line() +
facet_wrap(~year) +
geom_vline(aes(xintercept = start)) +
geom_vline(aes(xintercept = end))
}
pdf("my2.pdf")
map(loc.vec, f_plot)
dev.off()
Consider by (being the object-oriented wrapper to tapply) to slice dataframe by the loc.vec factor and run subsets through plot:
process_plots <- function(df.sub) {
ggplot(df.sub, aes(x = day, y = x)) +
geom_line() + facet_wrap(~year) +
geom_vline(aes(xintercept = df.sub$start)) +
geom_vline(aes(xintercept = df.sub$end))
}
pdf("my.pdf")
by(df, df$loc.vec, process_plots)
dev.off()