Draw lines between all the coordinates in a plot - r

I have the following dataframe:
data <- data.frame(x = c(5,1,3,2,5,7,12), y = c(5,7,6,1,3,5,6))
I can plot these coordinates with the ggplot function and draw a line between these coordinates:
ggplot(data, aes(x, y)) + geom_point(size = 3) + geom_line()
So far, no problems. But instead of a single line though the coordinates, I want that a line is drawn between all the coordinates. Creating a sort of spider web between all the coordinates. Is this possible in the ggplot2 package?

If you want to do this in ggplot2, then you could use geom_segment for this. But before you can make such a plot, you have to create a dataframe which connencts each observation to the other observations. You could approach it as follows:
library(ggplot2)
library(dplyr)
library(tidyr)
dat %>%
complete(nesting(x,y), id) %>% # create the combinations
select(id, xend=x, yend=y) %>% # rename the new variables as end-points
left_join(dat, ., by = 'id') %>% # join with the original dataframe
filter(!(x==xend & y==yend)) %>% # remove the endpoints that are the same as the start points
ggplot(., aes(x, y)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
geom_label(aes(x = x, y = y, label = id, color = factor(id)), show.legend = FALSE) +
theme_minimal(base_size = 14) +
theme(axis.title = element_blank())
which gives:
Used data:
dat <- data.frame(x = c(5,1,3,2,5,7,12), y = c(5,7,6,1,3,5,6))
dat$id <- 1:nrow(dat)
Alternatively, you can also add the row-id on the fly without doing it beforehand:
dat %>%
mutate(id = row_number()) %>% # add a row id
complete(nesting(x,y), id) %>% # create the combinations
select(id, xend=x, yend=y) %>% # rename the new variables as end-points
left_join(dat %>% mutate(id = row_number()), .,
by = 'id') %>% # join with the original dataframe (also with an added row id)
filter(!(x==xend & y==yend)) %>% # remove the endpoints that are the same as the start points
ggplot(., aes(x, y)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
geom_label(aes(x = x, y = y, label = id, color = factor(id)), show.legend = FALSE) +
theme_minimal(base_size = 14) +
theme(axis.title = element_blank())

Using base plotting:
plot(data)
sapply(combn(nrow(data), 2L, simplify = FALSE),
function(x) do.call("segments", as.list(c(t(data[x, ])))))
Add bells and whistles to taste.
You may also use the FUN argument in combn:
plot(data)
combn(nrow(data), 2L, simplify = FALSE, FUN = function(cm){
segments(x0 = data[cm[1L], 1L],
y0 = data[cm[1L], 2L],
x1 = data[cm[2L], 1L],
y1 = data[cm[2L], 2L])
})

Related

How do you add trendline to part of data in ggplot2?

I have data and a plot like this,
x = c(1,2,3,4,5,6,7,8,9,10,11,12)
y1 = x^2-5
y2 = -x^2+1
data <- data.frame(x,y1,y2)
data1 = data.frame(pivot_longer(data,2:3))
ggplot(data1, aes(x, y = value, color = name))+
geom_point()+
geom_smooth(method = 'lm',se = FALSE)
Is there a way to have the trendline only applying to values for x greater than a certain number, like 3?
You can do this:
ggplot(data1, aes(x, y = value, color = name))+
geom_point()+
geom_smooth(data=dplyr::filter(data1,x>3), method = 'lm',se = FALSE)
You can apply the current aes to geom_point only, and create a new column (i.e. x2 in my code) for mapping to geom_smooth.
library(tidyverse)
x = c(1,2,3,4,5,6,7,8,9,10,11,12)
y1 = x^2-5
y2 = -x^2+1
data <- data.frame(x,y1,y2)
data1 = data.frame(pivot_longer(data,2:3))
data1 %>% mutate(x2 = ifelse(x > 3, x, NA)) %>%
ggplot()+
geom_point(aes(x, y = value, color = name)) +
geom_smooth(aes(x2, y = value, color = name), method = 'lm',se = FALSE)
Created on 2022-05-07 by the reprex package (v2.0.1)
Similar to both above just using subset:
ggplot(data1, aes(x, y = value, color = name))+
geom_point()+
geom_smooth(data=subset(data1, x > 3), method = 'lm',se = FALSE)

Time series bar graph reordering

I have the following script I'm working on, I want to re order the bar graph in descending order by their values.
library(tidyverse)
library(lubridate)
library(ggplot2)
#df <- read_csv('dataframe.csv')
df %>%
mutate(date=mdy(date), year=year(date), year = year + (date >= mdy(paste0("10/01/", year))))%>%
group_by(year) %>%
summarize(avg = mean(flow)) -> df
y <- df$avg
x <- ymd(sprintf("%d-01-01",df$year))
d <- data.frame(x = x, y = y)
# interpolate values from zero to y and create corresponding number of x values
vals <- lapply(d$y, function(y) seq(0, y, by = 0.1))
y <- unlist(vals)
mid <- rep(d$x, lengths(vals))
d2 <- data.frame(x = mid - 100,
xend = mid + 100,
y = y,
yend = y)
ggplot(data = d2, aes(x = x, xend = xend, y = y, yend = yend, color = y)) +
geom_segment(size = 2)
Results
I want to reorder the bars in descending order by values
The dataset can be found through the following link
https://drive.google.com/file/d/11PVub9avzMFhUz02cHfceGh9DrlVQDbD/view?usp=sharing
the output I'm looking for is like this.
Kindly assist.
To arrange the data you need to adjust the factor levels. You could arrange the data based on avg column and change year to factor.
library(dplyr)
library(ggplot2)
df %>%
arrange(desc(avg)) %>%
mutate(year = factor(year, unique(year))) %>%
ggplot() + aes(year, avg) + geom_col(aes(fill = 'red')) + guides(fill=FALSE)
Or :
df %>%
arrange(desc(avg)) %>%
mutate(year = factor(year, unique(year))) %>%
ggplot() + aes(year, avg, fill = avg) + geom_col()
Try this:
library(scales)
#Custom Transform function
dttrans <- function(a, b, breaks = b$breaks, format = b$format) {
a <- as.trans(a)
b <- as.trans(b)
name <- paste(a$name, b$name, sep = "-")
trans <- function(x) a$trans(b$trans(x))
inv <- function(x) b$inverse(a$inverse(x))
trans_new(name, trans, inv, breaks, format = format)
}
ggplot(data = d2, aes(x = x, xend = xend, y = y, yend = yend, color = y)) +
geom_segment(size = 2) +
scale_x_continuous(trans = dttrans("reverse", "date"))
Credits: Mikko Marttila

Using stat_summary to plot the location of the median

I want a way to draw a vertical line where the median occurs for each group in my data on top of a histogram. I can do that by first grouping by the groups, mutating a new column to be the median, and then faceting by the group. Here is the some code to do that:
library(tidyverse)
N = 1000
m = c(1,5,10)
z = c('A','B','C')
d<-map2_dfr(m,z, ~data.frame(x = rbeta(N,shape1 =.x, shape2 = 20), z = .y))
d %>%
group_by(z) %>%
mutate(med = median(x)) %>%
ungroup %>%
ggplot(aes(x, fill = z))+
geom_histogram(aes(y = ..density..),bins = 10,color = 'black')+
geom_vline(aes(xintercept = med))+
facet_wrap(~z)
Since the median is a statistical summary, can I achieve the same result using stat_summary or stat_function with geom="vline"?
Yes, you can; there are just a few tricks to it.
Since stat_summary calculates a summary over y for every x, we'll need to fool the function by giving it a dummy x-variable and supply the input for the histogram as y. I've found that giving a dummy-x that is within the range of the data works best, since then it does not affect the axis limits.
In code below, assume d is the d generated with your code.
ggplot(d, aes(x, fill = z)) +
geom_histogram(aes(y = ..density..), bins = 10, colour = "black") +
stat_summary(aes(x = 0.1, y = x, xintercept = stat(y), group = z),
fun.y = median, geom = "vline") +
facet_wrap(~ z)
As compared to the original plot:
d %>%
group_by(z) %>%
mutate(med = median(x)) %>%
ungroup %>%
ggplot(aes(x, fill = z))+
geom_histogram(aes(y = ..density..),bins = 10,color = 'black')+
geom_vline(aes(xintercept = med))+
facet_wrap(~z)

long and wide format equivalents in ggraph

I am trying to make graph similar to this:
library(tidyverse)
library(ggraph)
iris_g <- den_to_igraph(as.dendrogram(hclust(dist(iris[1:4]))))
xy_expand <- function(xy, expand) xy * (1 + expand / 10)
ord <- match(as.numeric(V(iris_g)$label), seq_len(nrow(iris)))
V(iris_g)$Petal.Length <- iris$Petal.Length[ord]
V(iris_g)$Petal.Width <- iris$Petal.Width[ord]
V(iris_g)$Sepal.Length <- iris$Sepal.Length[ord]
V(iris_g)$Sepal.Width <- iris$Sepal.Width[ord]
V(iris_g)$Species <- iris$Species[ord]
ggraph(iris_g, layout = "dendrogram", circular = TRUE) +
geom_edge_diagonal() +
geom_node_point(aes(filter = leaf,
x = xy_expand(x, Sepal.Length),
y = xy_expand(y, Sepal.Length)),
color = "red") +
geom_node_point(aes(filter = leaf,
x = xy_expand(x, Sepal.Width),
y = xy_expand(y, Sepal.Width)),
color = "green") +
geom_node_point(aes(filter = leaf,
x = xy_expand(x, Petal.Length),
y = xy_expand(y, Petal.Length)),
color = "blue") +
geom_node_point(aes(filter = leaf,
x = xy_expand(x, Petal.Width),
y = xy_expand(y, Petal.Width)),
color = "yellow")
I would like to specify aes(color = Trait) so that the coloring integrates nicely into the rest of the figure but I cannot figure out if there is an equivalent to long data format in ggraph
The equivalent of what I want to do in ggplot would be:
library(ggplot2)
iris %>%
mutate(id = seq_len(nrow(iris))) %>%
gather(key = Trait, value = Value, Sepal.Length:Petal.Width) %>%
ggplot(aes(x = id, y = Value, color = Trait)) +
geom_point()

ggplot: labelling geom_smooth / stat_smooth values at correct value

I'm trying to get labels to line up with the values from a smooth line. While other answers I've seen suggest creating a data column of predicted values, I'm looking for a cleaner alternative that uses the data that is already being produced for the ggplot.
See example below for the problem:
require(tidyverse)
require(ggrepel)
set.seed(1)
df <- data.frame(x = rep(1:100, 5), y = c(sample(1:20, 100, T), sample(21:40, 100, T), sample(41:60, 100, T), sample(61:80, 100, T), sample(81:100, 100, T)), group = rep(letters[1:5], each = 100))
df <- tbl_df(df)
df %>%
ggplot(aes(x = x, y = y, label = group, color = group)) +
geom_smooth() +
guides(color = F) +
geom_text_repel(data = . %>% filter(x == max(x)), aes(x = x, y = y, label = group), nudge_x = 50)
Is there some way to get the smooth line value at max(x) without using ggplot_build() or another external, multi-step approach?
I'm not sure if this is really more elegant, but it's all in one pipe. I didn't have the "repel" version handy, but the idea is the same.
library(broom)
df %>%
{ggplot(., aes(x, y, label = group, color = group)) +
geom_smooth() +
guides(color = F) +
geom_text(data = group_by(., group) %>%
do(augment(loess(y~x, .))) %>%
filter(x == max(x)),
aes(x, .fitted), nudge_x = 5)}
You need to get the prediction of the loess smoother at that final x value, so you just have to fit it twice. If the model-fitting is slow, you can do that once, higher in the dplyr chain, and just use the output for the rest of the figure.
df %>%
group_by(group) %>%
do(augment(loess(y~x, .))) %>%
{ggplot(., aes(x, y, label = group, color = group)) +
geom_smooth() +
guides(color = F) +
geom_text(data = filter(., x == max(x)),
aes(x, .fitted), nudge_x = 5)}

Resources