Time series bar graph reordering - r

I have the following script I'm working on, I want to re order the bar graph in descending order by their values.
library(tidyverse)
library(lubridate)
library(ggplot2)
#df <- read_csv('dataframe.csv')
df %>%
mutate(date=mdy(date), year=year(date), year = year + (date >= mdy(paste0("10/01/", year))))%>%
group_by(year) %>%
summarize(avg = mean(flow)) -> df
y <- df$avg
x <- ymd(sprintf("%d-01-01",df$year))
d <- data.frame(x = x, y = y)
# interpolate values from zero to y and create corresponding number of x values
vals <- lapply(d$y, function(y) seq(0, y, by = 0.1))
y <- unlist(vals)
mid <- rep(d$x, lengths(vals))
d2 <- data.frame(x = mid - 100,
xend = mid + 100,
y = y,
yend = y)
ggplot(data = d2, aes(x = x, xend = xend, y = y, yend = yend, color = y)) +
geom_segment(size = 2)
Results
I want to reorder the bars in descending order by values
The dataset can be found through the following link
https://drive.google.com/file/d/11PVub9avzMFhUz02cHfceGh9DrlVQDbD/view?usp=sharing
the output I'm looking for is like this.
Kindly assist.

To arrange the data you need to adjust the factor levels. You could arrange the data based on avg column and change year to factor.
library(dplyr)
library(ggplot2)
df %>%
arrange(desc(avg)) %>%
mutate(year = factor(year, unique(year))) %>%
ggplot() + aes(year, avg) + geom_col(aes(fill = 'red')) + guides(fill=FALSE)
Or :
df %>%
arrange(desc(avg)) %>%
mutate(year = factor(year, unique(year))) %>%
ggplot() + aes(year, avg, fill = avg) + geom_col()

Try this:
library(scales)
#Custom Transform function
dttrans <- function(a, b, breaks = b$breaks, format = b$format) {
a <- as.trans(a)
b <- as.trans(b)
name <- paste(a$name, b$name, sep = "-")
trans <- function(x) a$trans(b$trans(x))
inv <- function(x) b$inverse(a$inverse(x))
trans_new(name, trans, inv, breaks, format = format)
}
ggplot(data = d2, aes(x = x, xend = xend, y = y, yend = yend, color = y)) +
geom_segment(size = 2) +
scale_x_continuous(trans = dttrans("reverse", "date"))
Credits: Mikko Marttila

Related

How do you add trendline to part of data in ggplot2?

I have data and a plot like this,
x = c(1,2,3,4,5,6,7,8,9,10,11,12)
y1 = x^2-5
y2 = -x^2+1
data <- data.frame(x,y1,y2)
data1 = data.frame(pivot_longer(data,2:3))
ggplot(data1, aes(x, y = value, color = name))+
geom_point()+
geom_smooth(method = 'lm',se = FALSE)
Is there a way to have the trendline only applying to values for x greater than a certain number, like 3?
You can do this:
ggplot(data1, aes(x, y = value, color = name))+
geom_point()+
geom_smooth(data=dplyr::filter(data1,x>3), method = 'lm',se = FALSE)
You can apply the current aes to geom_point only, and create a new column (i.e. x2 in my code) for mapping to geom_smooth.
library(tidyverse)
x = c(1,2,3,4,5,6,7,8,9,10,11,12)
y1 = x^2-5
y2 = -x^2+1
data <- data.frame(x,y1,y2)
data1 = data.frame(pivot_longer(data,2:3))
data1 %>% mutate(x2 = ifelse(x > 3, x, NA)) %>%
ggplot()+
geom_point(aes(x, y = value, color = name)) +
geom_smooth(aes(x2, y = value, color = name), method = 'lm',se = FALSE)
Created on 2022-05-07 by the reprex package (v2.0.1)
Similar to both above just using subset:
ggplot(data1, aes(x, y = value, color = name))+
geom_point()+
geom_smooth(data=subset(data1, x > 3), method = 'lm',se = FALSE)

Using ggplot with lapply does not show y labels

I have the following data:
library(dplyr)
countries <- c('Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Republic of Cyprus')
year <- rep(2009:2022, length(countries))
country <- as.data.frame(rep(countries, length(2009:2022)))
country <- country[order(country$`rep(countries, length(2009:2022))`),]
df<- cbind.data.frame(country, year)
df$year <- as.numeric(df$year)
df <- df %>%
group_by(country) %>%
mutate(n_obs = 1:n())
df <- df %>% group_by(country) %>%
mutate(gdp = rnorm(n = 1, mean = 3000, sd = 300) + 20.64*n_obs,
inflation = rnorm(n = 1, mean = 5, sd = 3) + 1.23*n_obs)
I want to make line plots for gdp and inflation one by one like so:
ggplot(df, aes(x = year, y = inflation, color = country)) + geom_line()
ggplot(df, aes(x = year, y = gdp, color = country)) + geom_line()
However, in the real data, I have a lot of variables that I want to plot, and I was wondering how I could use lapply to achieve that. I tried the following code:
lapply(df[,c(4,5)], function(var)
ggplot(data = df, aes(x = year, y = var, color = country))
+ geom_line() + labs(x = "year", y = var))
This works, but I cannot get the y variable label on the plot. Any help would be appreciated.
Regards
You can use the following code:
lapply(names(df)[4:5], function(var)
ggplot(data = df, aes(x = year, y = .data[[var]], color = country))
+ geom_line() + ylab(var))
Output gdp:
Output inflation:

Add significance lines to facets in ggplot2

How to adjust the height of each geom_line depending on the facet group (y-lims differ depending on the group, see image below)?
I tried to build a custom data.frame which contains heights for each condition but this is not accepted by geom_line.
I have this little working example:
carData <- mtcars
carData$cyl <- factor(carData$cyl)
maxval <- max(carData$mpg)
maxval <- maxval * 1.1
lowval <- maxval - maxval * 0.02
txtval <- maxval * 1.04
llev <- "4"
rlev <- "6"
lpos <- which(levels(carData$cyl) == llev)
rpos <- which(levels(carData$cyl) == rlev)
mpos <- (lpos + rpos) / 2
df1 <- data.frame(a = c(lpos,lpos,rpos,rpos), b = c(lowval, maxval, maxval, lowval))
p <- ggplot(carData, aes(cyl, mpg))
p <- p + geom_boxplot()
p <- p + geom_line(data = df1, aes(x = a, y = b)) + annotate("text", x = mpos, y = txtval, label = "3.0")
p <- p + facet_wrap( ~ gear,ncol=2,scales="free")
You need to capture the variable you are using to facet with, in your summary data.frame. We could capture group wise maxima and use them for the y positions of the geom_segment() and geom_text:
library(tidyverse)
# get the max for each gear facet
df2 <- carData %>% group_by(gear) %>%
summarise(ypos = max(mpg)*1.1) %>%
mutate(x = lpos, xend = rpos) # use your factor level locators
p <- ggplot(carData, aes(cyl, mpg)) +
geom_boxplot() +
geom_segment(data = df2, aes(y = ypos, yend = ypos, x = x, xend = xend)) +
geom_text(data = df2, aes(y = ypos*1.02, x = mean(c(x, xend))), label = "3.0") +
facet_wrap( ~ gear,ncol=2, scales="free")
# if you want the end ticks
p + geom_segment(data = df2, aes(y = ypos, yend = ypos * .99, x = x, xend = x)) +
geom_segment(data = df2, aes(y = ypos, yend = ypos *.99, x = xend, xend = xend))

How to extract an interaction to an external variable from a ggplot graph?

I have a ggplot graph defined like this:
x <- seq(0, 10, by = 0.1)
y1 <- cos(x)
y2 <- sin(x)
df1 <- data.frame(x = x, y = y1, type = "sin", id = 1)
df2 <- data.frame(x = x, y = y2, type = "cos", id = 2)
df3 <- data.frame(x = 2, y = 0.5, type = "constant", id = 3)
df4 <- data.frame(x = 4, y = 0.2, type = "constant", id = 4)
combined <- rbind(df1, df2, df3, df4)
ggplot(combined, aes(x, y, colour = interaction(type, id))) + geom_line() +
geom_point(data = subset(combined, type == "constant"))
This works very well as illustrated below:
Now I would like to extract the interaction in a variable to reuse it later (e.g. customize the legend style or labels).
I did that in a very naïve way:
my.interaction <- interaction(combined$type, combined$id)
ggplot(combined, aes(x, y, colour = my.interaction)) + geom_line() +
geom_point(data = subset(combined, type == "constant"))
But then I have an error:
Error: Aesthetics must be either length 1 or the same as the data (2):
x, y, colour
Edit:
Here is the kind of manipulation I could do: edit the linetype of the legend
displayed <- levels(factor(my.interaction))
line.style <- rep(1, length.out = length(displayed))
line.style[grep("constant", displayed)] <- 0
That works:
ggplot(combined, aes(x, y, colour = interaction(type, id))) + geom_line() +
geom_point(data = subset(combined, type == "constant")) +
guides(colour=guide_legend(override.aes=list(linetype = line.style)))
That does not:
ggplot(combined, aes(x, y, colour = my.interation) + geom_line() +
geom_point(data = subset(combined, type == "constant")) +
guides(colour=guide_legend(override.aes=list(linetype = line.style)))
In the end, I could also edit the shapes or the legend labels (e.g. "Id: 1 / Type: sin" or any other advanced transformation of the labels based on the interaction values).
This'll work. What's wrong with adding a column to your data frame?
combined %>% mutate(my.interaction = paste(type, id, sep='.')) %>%
ggplot(aes(x, y, colour = my.interaction)) + geom_line() +
geom_point(data = subset(combined, type == "constant"))

Draw lines between all the coordinates in a plot

I have the following dataframe:
data <- data.frame(x = c(5,1,3,2,5,7,12), y = c(5,7,6,1,3,5,6))
I can plot these coordinates with the ggplot function and draw a line between these coordinates:
ggplot(data, aes(x, y)) + geom_point(size = 3) + geom_line()
So far, no problems. But instead of a single line though the coordinates, I want that a line is drawn between all the coordinates. Creating a sort of spider web between all the coordinates. Is this possible in the ggplot2 package?
If you want to do this in ggplot2, then you could use geom_segment for this. But before you can make such a plot, you have to create a dataframe which connencts each observation to the other observations. You could approach it as follows:
library(ggplot2)
library(dplyr)
library(tidyr)
dat %>%
complete(nesting(x,y), id) %>% # create the combinations
select(id, xend=x, yend=y) %>% # rename the new variables as end-points
left_join(dat, ., by = 'id') %>% # join with the original dataframe
filter(!(x==xend & y==yend)) %>% # remove the endpoints that are the same as the start points
ggplot(., aes(x, y)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
geom_label(aes(x = x, y = y, label = id, color = factor(id)), show.legend = FALSE) +
theme_minimal(base_size = 14) +
theme(axis.title = element_blank())
which gives:
Used data:
dat <- data.frame(x = c(5,1,3,2,5,7,12), y = c(5,7,6,1,3,5,6))
dat$id <- 1:nrow(dat)
Alternatively, you can also add the row-id on the fly without doing it beforehand:
dat %>%
mutate(id = row_number()) %>% # add a row id
complete(nesting(x,y), id) %>% # create the combinations
select(id, xend=x, yend=y) %>% # rename the new variables as end-points
left_join(dat %>% mutate(id = row_number()), .,
by = 'id') %>% # join with the original dataframe (also with an added row id)
filter(!(x==xend & y==yend)) %>% # remove the endpoints that are the same as the start points
ggplot(., aes(x, y)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
geom_label(aes(x = x, y = y, label = id, color = factor(id)), show.legend = FALSE) +
theme_minimal(base_size = 14) +
theme(axis.title = element_blank())
Using base plotting:
plot(data)
sapply(combn(nrow(data), 2L, simplify = FALSE),
function(x) do.call("segments", as.list(c(t(data[x, ])))))
Add bells and whistles to taste.
You may also use the FUN argument in combn:
plot(data)
combn(nrow(data), 2L, simplify = FALSE, FUN = function(cm){
segments(x0 = data[cm[1L], 1L],
y0 = data[cm[1L], 2L],
x1 = data[cm[2L], 1L],
y1 = data[cm[2L], 2L])
})

Resources