Related
This question seems difficult to understand, but to illustrate, I bring a figure as an example:
I am trying to replicate this graph. So far I've done the graphics separately, but I don't know how I can put them together as in the example.
Any help?
time <- seq(from = 0,
to = 10,
by = 0.5)
line_1 <- c(0, 0, 0, 66, 173, 426, 1440, 800, 1200, 400, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
line_2 <- c(0, 0, 0, 0, 0, 0, 0, 0, 1000, 25000, 5000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
df <- data.frame(time, line_1, line_2)
library(ggpubr)
#the plot
ggplot(data = df, aes(x = time)) +
geom_line(aes(y = line_2), color = "red",
position = position_nudge(x = 0.5, y = 1000)) +
geom_line(aes(y = line_1),color = "blue") +
geom_rect(aes(xmin = 1, xmax = 5, ymin = 0, ymax = 1500), color = "black", alpha = 0) +
theme_pubr( base_size = 8,
border = TRUE)
#The plot with a zoom
ggplot(data = df, aes(x = time, y = line_1)) +
geom_line(color = "blue") +
xlim (1, 5) +
ylim (0, 1500) +
theme_pubr( base_size = 8,
border = TRUE)
You can use a custom annotation
p1 = ggplot(data = df, aes(x = time)) +
geom_line(aes(y = line_2), color = "red", position = position_nudge(x = 0.5, y = 1000)) +
geom_line(aes(y = line_1),color = "blue") +
geom_rect(aes(xmin = 1, xmax = 5, ymin = 0, ymax = 1500), color = "black", alpha = 0) +
theme_pubr( base_size = 8, border = TRUE)
#The plot with a zoom
p2 = ggplot(data = df, aes(x = time, y = line_1)) +
geom_line(color = "blue") +
xlim (1, 5) +
ylim (0, 1500) +
theme_pubr( base_size = 8,border = TRUE)
p1 +
annotation_custom(ggplotGrob(p2), xmin = 0, xmax = 4, ymin = 5000, ymax = 20000) +
geom_rect(aes(xmin = 0, xmax = 4, ymin = 5000, ymax = 20000), color='black', linetype='dashed', alpha=0) +
geom_path(aes(x,y,group=grp),
data=data.frame(x = c(1,0,5,4), y=c(1500,5000,1500,5000),grp=c(1,1,2,2)),
linetype='dashed')
Here is sample code.
I want to color points based on values of flag and choice (there are 4 possible combination since both variables are binary).
library(tidyverse)
x <- runif(10)
y <- runif(10)
z <- c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1)
flag <- c(0, 0, 1, 1, 1, 0, 0, 0, 1, 1)
choice <- c(0, 1, 0, 1, 0, 1, 0, 1, 0, 1)
tbl <- tibble(x, y, z, flag, choice)
scatterplot <- ggplot(tbl,
aes(x = x,
y = y),
color = factor(interaction(choice, flag)),
size = 1) +
geom_point(alpha = 0.7) +
scale_color_manual(values = c("blue3", "cyan1", "red3", "oran")) +
facet_grid(z ~ .) +
theme_bw() +
theme(legend.position = "right") +
theme(aspect.ratio = 1) +
ggtitle("Scatter plot")
scatterplot
You have two mistakes:
Set your color attribute inside of aes(). That way, the feature will be used for coloring.
"oran" is not a color, try with http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf (I set it to "orange").
Code
library(tidyverse)
x <- runif(10)
y <- runif(10)
z <- c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1)
flag <- c(0, 0, 1, 1, 1, 0, 0, 0, 1, 1)
choice <- c(0, 1, 0, 1, 0, 1, 0, 1, 0, 1)
tbl <- tibble(x, y, z, flag, choice)
scatterplot <- ggplot(tbl,
aes(x = x,
y = y,
color = factor(interaction(choice, flag))),
size = 1) +
geom_point(alpha = 0.7) +
scale_color_manual(values = c("blue3", "cyan1", "red3", "orange")) +
facet_grid(z ~ .) +
theme_bw() +
theme(legend.position = "right") +
theme(aspect.ratio = 1) +
ggtitle("Scatter plot")
scatterplot
Result
This question already has answers here:
Plotting two variables as lines using ggplot2 on the same graph
(5 answers)
Closed 3 years ago.
I am mainly posting because I really think I am over complicating this. I am creating a plot of 12 different lines over time. I would like each day to be represented on the x-axis with the "title" beneath each.
I've tried a few solutions and what I have "works" but it's not that good. Ignoring the placeholders I have in there I would like there to be points where they increase as well as showing where people are a little more clearly. My code seems a little long winded; maybe there is a better way to do this.
riddle_log <- structure(list(date = structure(c(1559779200, 1559865600, 1560124800,
1560211200, 1560297600, 1560384000, 1560470400, 1560470400, 1560470400,
1560729600, 1560729600, 1560816000, 1560902400, 1560988800, 1561075200,
1561334400), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
title = c("The Midget", "Bowling Balls", "Poisonous Ice",
"Dog Crosses River", "Camel Race", "Two Masked Men", "The Cabin",
"Black Truck", "Burglary", "Japanese Ship", "Haunted Floor",
"East and West", "Filling the Room", "Untied", "Window Jumper",
"Window Faller"), Brigid = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), Carly = c(0, 1, 1, 1, 2, 2, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3), Christian = c(1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 3, 3, 3, 3, 4, 4), Daniel = c(0, 0, 0, 0, 0, 1, 1,
2, 2, 2, 2, 3, 3, 3, 3, 3.5), Jess = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Luke = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Mara = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Marcus = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 2, 2, 3, 3, 3, 3.5), Nassim = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Nathalie = c(0, 0, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), Neil = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA,
-16L), class = c("tbl_df", "tbl", "data.frame"))
library(tidyverse)
library(ggthemes)
line1 <- riddle_log %>%
select(date, Brigid)
line2 <- riddle_log %>%
select(date, Carly)
line3 <- riddle_log %>%
select(date, Christian)
line4 <- riddle_log %>%
select(date, Daniel)
line5 <- riddle_log %>%
select(date, Jess)
line6 <- riddle_log %>%
select(date, Luke)
line7 <- riddle_log %>%
select(date, Mara)
line8 <- riddle_log %>%
select(date, Marcus)
line9 <- riddle_log %>%
select(date, Nassim)
line10 <- riddle_log %>%
select(date, Nathalie)
line11 <- riddle_log %>%
select(date, Neil)
ggplot() +
geom_line(data = line1, aes(x = date, y = Brigid, color = "a")) +
geom_line(data = line2, aes(x = date, y = Carly, color = "b")) +
geom_line(data = line3, aes(x = date, y = Christian, color = "c")) +
geom_line(data = line4, aes(x = date, y = Daniel, color = "d")) +
geom_line(data = line5, aes(x = date, y = Jess, color = "e")) +
geom_line(data = line6, aes(x = date, y = Luke, color = "f")) +
geom_line(data = line7, aes(x = date, y = Mara, color = "g")) +
geom_line(data = line8, aes(x = date, y = Marcus, color = "h")) +
geom_line(data = line9, aes(x = date, y = Nassim, color = "i")) +
geom_line(data = line10, aes(x = date, y = Nathalie, color = "j")) +
geom_line(data = line11, aes(x = date, y = Neil, color = "k")) +
scale_color_manual(name = "Analysts",
values = c("a" = "blue", "b" = "red", "c" = "orange", "d" = "black",
"e" = "steelblue", "f" = "blue", "g" = "blue", "h" = "blue",
"i" = "blue", "j" = "blue", "k" = "blue")) +
xlab('Date') +
ylab('Wins') +
ggtitle(" NAME ")
#+
# scale_x_date(breaks = as.Date(c("2019-05-01", "2019-08-15")))
# scale_x_discrete(name, breaks, labels, limits)
In short what I would like to add four things:
-All dates represented on the x-axis. The weekends are skipped but I would not want them to have gaps in the plot rather treated as consecutive days.
-If it's possible to have the title incorperated somehow that would be cool except I am struggling to think how since some days have multiple titles.
-A more distinguished way to see all line progress as opposed to the bad overlap that's happening here
-Points.
If there are any themes that are better suited for this type of problem I'm open for anything.
First of all, you are right that your code is "a little long winded". To take advantage of ggplot you should have your data in tidy ("tall") format, with one variable for "person" and another variable for the persons' score. That is easy to accomplish using gather() in the tidyr package:
riddle_log2 <- riddle_log %>%
tidyr::gather("Analyst", "Wins", Brigid:Neil)
Now that the data are in the preferred format for ggplot, we can plot them much more easily, like this:
ggplot(riddle_log2, aes(x = date, y = Wins, color = Analyst)) +
geom_line(size = 2)
However, a lot of the lines are on top of each other. We can try to make the plot better by plotting the first persons (which are plotted first and will end up behind the other lines) with thicker lines, for instance like this:
ggplot(riddle_log2, aes(x = date, y = Wins, color = Analyst)) +
geom_line(aes(size = Analyst)) +
scale_size_manual(values = seq(4, 1, length = 11))
Now, this is slightly better. Next, we can improve the colors. There are a huge amount of color palettes for R available. In cases such as this I often use the palettes of Paul Tol:
tol_colors = c("#332288", "#6699CC", "#88CCEE", "#44AA99", "#117733", "#999933",
"#DDCC77", "#661100", "#CC6677", "#882255", "#AA4499")
ggplot(riddle_log2) +
geom_line(aes(x = date, y = Wins, color = Analyst, size = Analyst)) +
scale_size_manual(values = seq(5, 1, length = 11)) +
scale_color_manual(values = tol_colors)
Now, this isn't perfect, but it is an improvement. What you should consider is to split the plots in a bunch of subplots using facet_wrap():
gg <- ggplot(riddle_log2, aes(x = date, y = Wins, color = Analyst)) +
geom_line(size = 2) +
scale_color_manual(values = tol_colors) +
facet_wrap(~Analyst)
gg
This is a much better option in this case, I think.
Next, you also want the x axis to show all dates. It is bit too little space to show every single day, so I will here show labels for every second day:
gg +
scale_x_datetime(breaks = "2 day", date_labels = "%d. %b") +
theme(axis.text.x = element_text(hjust = 0, angle = -45))
As you can see, formatting labels isn't exactly straightforward, but it is very flexible. Especially the codes for how to show the time/date are quite criptic; in this case, %d indicates "date" and %mindicates "abbreviated month". Other codes can be found by running ?strptime.
Finally, wer'e going to add the day's "title" for every time the "Win" score is increasing. We start by adding a variable 'Wins_increase' for the increase in Wins:
riddle_log2 <- riddle_log2 %>%
arrange(Analyst, date) %>% # Make sure sortings is correct
group_by(Analyst) %>% # 'Wins_increase' will be calculated for every Analyst
mutate(Wins_increase = Wins - lag(Wins)) # How much 'Wins' have increased since last day
Then we use geom_text() to add rotated labels:
gg + scale_x_datetime(breaks = "2 day", date_labels = "%d. %b") + # as before
theme(axis.text.x = element_text(hjust = 0, angle = -45)) + # as before
geom_text(data = riddle_log2 %>% filter(Wins_increase > 0), # Pick only where "Wins" is increasing
aes(y = Wins + 0.3, label = title), # We add 0.3 to lift the labels a bit
hjust = 0, angle = 90, size = 2) # Left-adjust and rotate labels
The next thing to fix is the overlap between labels for Marcus (because he won twice in the same day). This can be fixed using ggrepel package.
Here's an example of converting to "long" data to make ggplot easier. I also added a geom_jitter layer to make it easier to see days with overlaps.
riddle_log %>%
tidyr::gather(Analyst, Wins, -c(date, title)) %>%
ggplot(aes(x = date, y = Wins, color = Analyst)) +
geom_line() +
geom_jitter( width = 0, shape = 21, alpha = 0.7) + # one way to show daily overlap
scale_color_manual(name = "Analysts",
values = c("Brigid" = "blue", "Carly" = "red",
"Christian" = "orange", "Daniel" = "black",
"Jess" = "steelblue", "Luke" = "blue",
"Mara" = "blue", "Marcus" = "blue",
"Nassim" = "blue", "Nathalie" = "blue",
"Neil" = "blue"))
The first image is a hand drawn image and the second image is the same graph drawn using ggraph.
This is the code that generates the graph.
library(igraph)
library(tidyverse)
library(ggraph)
V <- read.table(text = "x y
2 1
4 2
4 4
2 5
6 4
3 7
8 6",
header = T) %>%
rownames_to_column("name")
E <- matrix(c(0, 1, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0,
0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0), nrow = 7, byrow = T) %>%
data.frame() %>%
rename_all(list(function(x) 1:7)) %>%
rownames_to_column(var = "from") %>%
gather(to, val, 2:6) %>%
filter(val == 1) %>%
select(from, to)
nodeLables <- c(" I ", " N0", "F", "N1", "N2", "F1","F2")
g <- graph_from_data_frame(E, vertices = V, directed = F)
ggraph(g) +
geom_edge_link(edge_width = 1.3) +
geom_node_label(aes(label = nodeLables),label.r = unit(0.75, "lines"),
label.size = 0.65, label.padding = unit(0.55,"lines"), show.legend = F) +
ggtitle("My plot") +
coord_flip() +
expand_limits(x = 0, y = 0) +
# Using scale_x_reverse and swapping the limits
scale_x_reverse(expand = c(0, 0), limits = c(9, 0), breaks = c(0:9), minor_breaks = NULL) +
# switching y position to "right" (pre-flip)
scale_y_continuous(expand = c(0, 0),limits = c(0, 9), breaks = c(0:9), minor_breaks = NULL, position = "right") +
theme_minimal()
In the second graph the nodes are not prefect circles and the curvature changes with the size of the label. I want to make the nodes perfect circles and assign colours for different types of nodes. For example
I - Blue
F - Red
Nx - Green
Fx - Orange
I've played around with geom_node_point and geom_node_text and got the below result. However, I can't increase the node size further. How can I increase the node size?
nodeColours <- c("blue", "green", "red","green", "green", "orange","orange")
ggraph(g) +
geom_edge_link(edge_width = 1.3) +
geom_node_point(aes(size = 4), color = nodeColours)+
geom_node_text(aes(label = nodeLables))+
ggtitle("My plot") +
coord_flip() +
expand_limits(x = 0, y = 0) +
scale_x_reverse(expand = c(0, 0), limits = c(9, 0), breaks = c(0:9), minor_breaks = NULL) +
scale_y_continuous(expand = c(0, 0),limits = c(0, 9), breaks = c(0:9), minor_breaks = NULL, position = "right") +
theme_minimal()
I am using geom_area to plot a very simple dataset. When plotting using geom_line everything is normal but when I switch to geom_area higher values getting plotted. I think looking at the graphs would be the best way of representing my problem:
require(tidyverse)
x <- structure(list(Time = 0:40, X15.DCIA = c(0, 1, 0.5, 0, 2, 2.5,
1, 0.5, 0, 1, 1.5, 1, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 3,
5, 7, 6.5, 5.5, 4, 3, 2, 1.5, 1, 0.25, 0, 0, 0, 0, 0, 0, 0),
X100.DCIA = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1.5, 7, 8, 7.5, 6.5, 5, 3.5, 2.25,
1.75, 1.1, 0.4, 0.1, 0, 0, 0, 0, 0, 0)),
class = "data.frame", row.names = c(NA,-41L))
x %>% gather(prct.DCIA, Vol, -Time) %>% ggplot(aes(x=Time, y=Vol)) +
geom_line(aes(color=prct.DCIA))
x %>% gather(prct.DCIA, Vol, -Time) %>% ggplot(aes(x=Time, y=Vol)) +
geom_area(aes(fill=prct.DCIA))
The geom_line is what I expected (a line plot of my data).
But then looking at the geom_area you see that 100DCIA has jumped up-to 15.
I am more interested in an explanation rather than a fix or workaround.
Note:
This can be a workaround:
x %>% gather(prct.DCIA, Vol, -Time) %>% ggplot(aes(x=Time, y=Vol)) +
geom_polygon(aes(fill=prct.DCIA, alpha=0.5)) + guides(alpha=FALSE)
Explanation:
Your plots are stacking on top of one another.
The values you see following the red line in the geom_area graph are the sum of the values for the red and blue lines in your geom_line graph.
You can see this clearly if you separate out prct.DCIA with facet_grid():
x %>% gather(prct.DCIA, Vol, -Time) %>% ggplot(aes(x=Time, y=Vol)) +
geom_area(aes(fill=prct.DCIA)) + facet_grid(.~prct.DCIA)
This is simply because position = "stack" is the default argument in geom_area:
geom_area(mapping = NULL, data = NULL, stat = "identity",
position = "stack", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...)
One might presume this is because people use geom_area because they want to show the whole area on a diagram, rather than fill under some lines. Generally bars or area might represent a count of something, or the area filled in represents something, while points or lines may represent a point estimate and the area above or below the line or point isn't meaningful.
Cf. the default argument for geom_line is position = "identity".
geom_line(mapping = NULL, data = NULL, stat = "identity",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...)
Fix:
If you use position = position_dodge() you can see they return to looking like the line graph, with the red area is plotted behind the blue area:
x %>% gather(prct.DCIA, Vol, -Time) %>% ggplot(aes(x=Time, y=Vol)) +
geom_area(aes(fill=prct.DCIA), position = position_dodge())
You can even set alpha < 1 and see this clearly:
x %>% gather(prct.DCIA, Vol, -Time) %>% ggplot(aes(x=Time, y=Vol)) +
geom_area(aes(fill=prct.DCIA), position = position_dodge(), alpha = 0.5)