Time series connected scatter plot in R (as image attached) - r

I want to plot these data as connected temporal scatter kind of plot in R. Any hint/code would be greatly helpful. I can do with single but not with time series one.

See if you can apply this to your data.
I've assumed that it is not important that the years are treated as 'time' data for the purposes of graphical representation.
library(tidyr)
library(dplyr)
library(stringr)
library(ggplot2)
library(ggrepel)
pop <- tibble(city = c("Delhi", "Madurai", "Cape Town", "Jo Burg"),
`1990` = c(10, 12, 5, 6),
`2000` = c(13, 17, 7, 8),
`2015` = c(20, 24, 7.5, 9),
km2_1990 = c(12, 15, 2, 4),
km2_2000 = c(13, 14, 3, 4),
km2_2015 = c(15, 16, 5, 6))
cont <- tibble(city = c("Delhi", "Madurai", "Cape Town", "Jo Burg"),
cont = c("Asia", "Asia", "Africa", "Africa"))
tib <-
pop %>%
pivot_longer(cols = `1990`:km2_2015, names_to = "year", values_to = "val") %>%
mutate(type = case_when(str_detect(year, "km") ~ "area",
TRUE ~ "pop"),
year = as.numeric(str_remove(year, "km2_"))) %>%
pivot_wider(names_from = type, values_from = val) %>%
left_join(cont) %>%
group_by(year, cont) %>%
summarise(area = sum(area),
pop = sum(pop)) %>%
group_by(cont) %>%
mutate(lab_cont = case_when(pop == max(pop)~cont,
TRUE ~ ""))
ggplot(tib, aes(area, pop, label = lab_cont))+
geom_line(aes(group = cont), colour = "red")+
geom_point(fill = "red", colour = "red", shape = 22, size = 4)+
geom_text_repel(aes(label = year))+
geom_text_repel(nudge_x = 1, size = 5)
This gives you the following graph, you can adjust arguments as required to pretty up the appearance to suit your purposes:

Transform your data like this
Then, plot using ggplot2 package.
mydata %>%
ggplot(aes(x=bsurf, y=pop, group=city, color=city, dlabel=year)) + geom_point(,size = 2)+
geom_line(size=1)+ geom_text(label=mydata$year, vjust = 1.2, nudge_y = 0.5)+
ggtitle("Dummy Title", subtitle = "Dummy") + xlab("Population") + ylab("Total Built-up Surface")

Related

Plot multiple lines (data series) with unique colors and custom x_axis in R

I'm trying to generate a plot in R which has multiple lines (each line represents a different category), each with unique colors. The x-axis is the time, which start at 17:00 and end at 9:00 the next day. The y-axis is the frequency (i.e. number of counts) of each category at certain time. Please have a look at the csv file that I use to plot this:
Time,-3,-2.5,-2,-1.5,-1,-0.5,0,0.5,1,1.5,2,2.5,3
0,0,0,0,0,0,0,288,224,148,78,37,23,19
1,0,0,0,0,0,0,321,208,128,74,55,20,11
2,0,0,0,0,0,0,326,212,128,80,46,20,5
3,0,0,0,0,0,0,345,209,131,73,36,17,6
4,0,0,0,0,0,0,364,201,117,77,38,15,5
5,0,0,0,0,0,0,390,205,100,73,36,10,3
6,0,0,0,0,0,0,406,196,121,57,24,8,5
7,0,0,0,0,0,1,560,161,62,25,5,3,0
8,0,0,0,0,0,18,772,22,5,0,0,0,0
9,0,0,0,0,18,130,667,1,0,1,0,0,0
10,0,0,0,2,55,256,503,1,0,0,0,0,0
11,1,0,0,7,106,349,354,0,0,0,0,0,0
12,1,1,0,12,184,368,251,0,0,0,0,0,0
13,0,0,0,32,228,357,200,0,0,0,0,0,0
14,0,0,0,51,245,314,208,0,0,0,0,0,0
15,0,0,0,51,232,317,218,0,0,0,0,0,0
16,0,0,0,37,224,338,218,1,0,0,0,0,0
17,0,0,0,21,156,350,290,1,0,0,0,0,0
18,0,0,0,2,72,351,392,1,0,0,0,0,0
19,0,0,0,0,15,207,587,9,0,0,0,0,0
20,0,0,0,0,1,33,748,34,2,0,0,0,0
21,0,0,0,0,0,3,609,137,51,12,4,1,1
22,0,0,0,0,0,0,325,241,133,71,31,11,6
23,0,0,0,0,0,0,272,227,149,82,50,21,17
Aside from column Time, each column represent a category (i.e. -3, -2.5, -2, etc...). At time 0, category -3 appears 0 times while category 3 appear 19 times and so on. I want my lines to represent the categories and show the frequency of each category over time on the graph (similar to this question, but instead of just crimeFreq I have multiple categories here.
Another 2 things are:
My x-axis need to start from 17 (means 17:00 or 5pm) to 9 (means 9:00) or 9am).
I only need the categories that are ">= 0" (i.e. 0, 0.5, 1, etc...)
I have tried the solution above and solution from this question but was not being able to success. Some of my attempts are:
Attempt 1:
df = read.csv("data_summary.csv")
// Taking the rows in the order of time that I want (i.e. from 17:00 to 9:00)
row_to_take = c(18,19,20,21,22,23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
// Tring to plot it with x-axis in desired order
matplot(x = df$Time[row_to_take], y = df[row_to_take, 9:14], ylab = "Frequency", xlab = "Hour", type = c("b"), pch=3, col = 1:7, xaxt="n" )
axis(1, at = c(17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9), las=0)
legend("topleft", legend = 1:7, col=1:7, pch=3)
However, this attempt does not generate a correct graph (incorrect x-axis and the category that was represent by the black color got drawn twice)
My second attempt:
ggplot(df, aes(Time)) + geom_line(aes(y = 0, colour = "0")) +
geom_line(aes(y = 0.5, colour = "0.5")) + geom_line(aes(y = 1, colour = "1"))+
geom_line(aes(y = 1.5, colour = "1.5"))+ geom_line(aes(y = 2, colour = "2"))+
geom_line(aes(y = 2.5, colour = "2.5"))+ geom_line(aes(y = 3, colour = "3"))
This attemp has the same problem with the first attemp. Also, I don't know how to change the legend's name for each color and the axis's name (xlab and ylab doesn't work ??)
Please suggest me a simple solution. I'm very new in R and don't know much about advance functions/packages. Thanks you all in advance :)
You could use the lovely package ggplot2. First, you should make your dataframe in a longer format using pivot_longer and then you can assign each category as a line with color. You can filter the categories like this:
df <- read.table(text = "Time,-3,-2.5,-2,-1.5,-1,-0.5,0,0.5,1,1.5,2,2.5,3
0,0,0,0,0,0,0,288,224,148,78,37,23,19
1,0,0,0,0,0,0,321,208,128,74,55,20,11
2,0,0,0,0,0,0,326,212,128,80,46,20,5
3,0,0,0,0,0,0,345,209,131,73,36,17,6
4,0,0,0,0,0,0,364,201,117,77,38,15,5
5,0,0,0,0,0,0,390,205,100,73,36,10,3
6,0,0,0,0,0,0,406,196,121,57,24,8,5
7,0,0,0,0,0,1,560,161,62,25,5,3,0
8,0,0,0,0,0,18,772,22,5,0,0,0,0
9,0,0,0,0,18,130,667,1,0,1,0,0,0
10,0,0,0,2,55,256,503,1,0,0,0,0,0
11,1,0,0,7,106,349,354,0,0,0,0,0,0
12,1,1,0,12,184,368,251,0,0,0,0,0,0
13,0,0,0,32,228,357,200,0,0,0,0,0,0
14,0,0,0,51,245,314,208,0,0,0,0,0,0
15,0,0,0,51,232,317,218,0,0,0,0,0,0
16,0,0,0,37,224,338,218,1,0,0,0,0,0
17,0,0,0,21,156,350,290,1,0,0,0,0,0
18,0,0,0,2,72,351,392,1,0,0,0,0,0
19,0,0,0,0,15,207,587,9,0,0,0,0,0
20,0,0,0,0,1,33,748,34,2,0,0,0,0
21,0,0,0,0,0,3,609,137,51,12,4,1,1
22,0,0,0,0,0,0,325,241,133,71,31,11,6
23,0,0,0,0,0,0,272,227,149,82,50,21,17", header = TRUE, sep = ",", check.names = FALSE)
library(dplyr)
library(ggplot2)
library(tidyr)
df %>%
pivot_longer(cols = -Time) %>%
filter(name >= 0) %>%
ggplot(aes(x = Time, y = value, colour = name)) +
geom_line() +
labs(x = "Time", y = "Value", colour = "Category")
Created on 2022-08-25 with reprex v2.0.2
Reverse x-axis values
You could use scale_x_continuous with "reverse":
library(dplyr)
library(ggplot2)
library(tidyr)
df %>%
pivot_longer(cols = -Time) %>%
filter(name >= 0) %>%
filter(Time >= 9 & Time <= 17) %>%
ggplot(aes(x = Time, y = value, colour = name)) +
geom_line() +
scale_x_continuous(trans = "reverse") +
labs(x = "Time", y = "Value", colour = "Category")
Created on 2022-08-25 with reprex v2.0.2
I combined the answer above from #Quinten and this answer: Setting limits with scale_x_time and was able to come up with this:
df = read.csv("data_summary.csv", check.names = FALSE, header = TRUE)
df$Time <- as.POSIXct(df$Time)
df %>%
pivot_longer(cols = -Time) %>%
filter(name > 0) %>%
ggplot(aes(x=Time, y=value, color=name)) +
geom_line()+
labs(x="Time", y="Frequency", title="")+
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1,family="Times",
face="bold", size=12),
axis.text.y = element_text(family="Times", face="bold", size=12),
strip.text = element_text(size=6, face="bold")) +
scale_y_continuous(expand = expansion(mult = c(0, .1))) +
scale_x_datetime(date_labels = '%T',
limits = c(as.POSIXct('2022-08-24 22:00:00', tz = 'UTC'),
as.POSIXct('2022-08-25 14:00:00', tz = 'UTC')),
breaks = '1 hours')
The code yields this graph:
which is what I need. Note that I have to change my "Time" column into the format: 0:00, 1:00,etc... so that I can use as.POSIXct(df$Time) on it.

How to plot a grouped geom_bar plot having a dataframe?

I have a dataframe like this one:
and want to plot in R something like this:
But for some reason I am really struggling with the grouped geom_bar code...
Can you help me please?
We may use barplot from base R
barplot(t(df1), beside = TRUE, col = c("blue", "orange", "grey",
"yellow", "lightblue" ))
-output
Or if we need a ggplot/plotly
library(ggplot2)
library(dplyr)
library(tidyr)
library(plotly)
library(tibble)
p <- df1 %>%
rownames_to_column('rn') %>%
pivot_longer(cols = -rn) %>%
ggplot(aes(x = rn, y = value, fill = name)) +
geom_col(position = 'dodge') +
theme_bw()
ggplotly(p)
-output
data
df1 <- structure(list(A = c(65, 9, 7, 70, 9), B = c(23, 4, 5, 53, 2),
C = c(42, 5, 2, 17, 7), D = c(51, 7, 5, 57, 5), E = c(14,
2, 2, 13, 4)), class = "data.frame", row.names = c("AAA",
"BBB", "CCC", "DDD", "EEE"))
Here's a solution using the tidyverse package that contains ggplot2 and tidyr packages. Additionally, this answer includes plotting the numbers as text on top of the bars.
library(tidyverse)
df1 %>%
# Convert rownames to a new column named rowname
rownames_to_column() %>%
# Put data into long format
pivot_longer(cols = -rowname,
names_to = "letter") %>%
# Build plot
ggplot(aes(x = rowname, y = value, fill = letter)) +
# Draw column plot and set position = "dodge" so every column
# starts from 0 value
geom_col(position = "dodge", width = 0.9) +
# Add text, sum 2 to value so the label is plotted on top of the column
geom_text(aes(y = value+2, label = value),
position = position_dodge(width = 0.9)) +
# Add bw theme or any other
theme_bw()

ggplot2 + ggrepel adding legend with label colours changes the colours itself

Assume the following data:
library(tidyverse)
library(ggrepel)
df <- data.frame(name = rep(letters[1:3], 3),
points = c(5, 3, 7, 12, 13, 14, 20, 30, 40),
time = rep(c("day 1", "day 2", "day 3"), each = 3))
df2 <- df %>%
group_by(name) %>%
mutate(points_sum = cumsum(points)) %>%
group_by(time) %>%
mutate(rank = rank(desc(points_sum), ties.method = "min")) %>%
ungroup() %>%
mutate(name_colour = case_when(rank == 1 ~ "#336600",
rank == 2 ~ "#339900",
rank == 3 ~ "#66ff33"))
I now want to draw th following plot, i.e. give the names/labels the colour specified in the name_colour column:
df2 %>%
ggplot(aes(x = time,
y = points_sum,
group = name,
label = name)) +
geom_point() +
geom_text_repel(direction = "y", size = 10, colour = df2$name_colour) +
theme_minimal()
However, this plot is missing a legend for these colours, i.e. I want to add a legend that has the ranks next to the according colour.
I'm not sure how I could manually add such a legend here. I tried to change my code above by the one below (only chenge in the second to last line), but this completely changes the colours of the labels:
df2 %>%
ggplot(aes(x = time,
y = points_sum,
group = name,
label = name)) +
geom_point() +
geom_text_repel(direction = "y", size = 10, aes(colour = name_colour)) +
theme_minimal()
Any ideas?
If you want to use the color codes from your dataframe then make use of scale_color_identity. By default this will not give you a legend so you have to add guide = guide_legend():
library(tidyverse)
library(ggrepel)
df <- data.frame(
name = rep(letters[1:3], 3),
points = c(5, 3, 7, 12, 13, 14, 20, 30, 40),
time = rep(c("day 1", "day 2", "day 3"), each = 3)
)
df2 <- df %>%
group_by(name) %>%
mutate(points_sum = cumsum(points)) %>%
group_by(time) %>%
mutate(rank = rank(desc(points_sum), ties.method = "min")) %>%
ungroup() %>%
mutate(name_colour = case_when(
rank == 1 ~ "#336600",
rank == 2 ~ "#339900",
rank == 3 ~ "#66ff33"
))
df2 %>%
ggplot(aes(
x = time,
y = points_sum,
group = name,
label = name
)) +
geom_point() +
geom_text_repel(aes(color = name_colour), direction = "y", size = 10) +
scale_color_identity(labels = c("A", "B", "C"), guide = guide_legend()) +
theme_minimal()
In general, I think a more typical ggplot approach would be to specify the colours in scale_colour_manual or equivalent, rather than coding them into the data frame itself. For example:
library(ggplot2)
library(dplyr)
library(ggrepel)
data.frame(
name = rep(letters[1:3], 3),
points = c(5, 3, 7, 12, 13, 14, 20, 30, 40),
time = rep(c("day 1", "day 2", "day 3"), each = 3)
) %>%
group_by(name) %>%
mutate(points_sum = cumsum(points)) %>%
group_by(time) %>%
mutate(rank = factor(rank(desc(points_sum), ties.method = "min"))) %>%
ungroup() %>%
ggplot(aes(
x = time,
y = points_sum,
group = name,
label = name)) +
geom_point() +
geom_text_repel(direction = "y", size = 10, aes(colour = rank)) +
theme_minimal() +
scale_colour_manual(
values = c("1" = "#336600", "2" = "#339900", "3" = "#66ff33")
)

Fail to change the legend title and label with ggplot2 in R

I was trying to change the legend title from group to the Greek letter "sigma" and the label "power.1, power.2, power.3" to "35, 40, 45" but it did not appear and still shows the default name and label. Could you please help me with it? Thanks so much.
# Load the library and input the data
library(ggplot2)
library(tidyr)
n <- 2:10
control <- rep(150, 4)
infected <- c(150, 170, 200, 250)
all <- c(control, infected)
sigma <- c(35, 40, 45)
# Compute the population mean
mu <- mean(all)
# Compute the sum of the tau squared
tau2 <- sum((all-mu)^2)
# Compute the gamma
gamma.1 <- (n*tau2)/(sigma[1]^2)
gamma.2 <- (n*tau2)/(sigma[2]^2)
gamma.3 <- (n*tau2)/(sigma[3]^2)
# Compute the power
power.1 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.1)
power.2 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.2)
power.3 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.3)
data <- data.frame(n, power.1, power.2, power.3)
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_fill_discrete(name = expression(sigma), labels = c("35","40","45"))
Try this in the final part of your code. One lesson you can learn is that fill and color are different aesthetics. So, if you set color you must use scale_color_manual. Here the code:
#Code
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_discrete(name = expression(sigma), labels = c("35","40","45"))
Output:
Or you can also try with guides() which will produce the same output (But first option is more direct):
#Code 2
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_discrete(labels = c("35","40","45"))+
guides(color=guide_legend(title=expression(sigma)))
You should used:
scale_colour_discrete(name = expression(sigma), labels = c("35","40","45"))

How can I create this chart in R using ggplot2?

I am using R in RStudio and I have the following data frame.
df1 <- data.frame(
comp = c("A", "B", "C", "D", "E", "F"),
Q2_2018 = c(27, 10, 6, 4, 3, 2),
Q2_2019 = c(31, 12, 8, 6, 5, 4))
I would like to create a chart (from the above data) like the one shown below (excluding the Amazon logo).
I am mostly stuck at drawing the circles with the % changes.
So far,
library(ggplot2)
library(reshape2)
library(magrittr)
melt(df1, id.vars = "comp") %>%
ggplot(aes(x= comp, y=value, fill=variable)) + geom_bar(stat = "identity", position = "dodge")
Can it be done with ggplot2?
Most of the way:
library(tidyverse)
df1 %>%
gather(year, val, -comp) %>%
group_by(comp) %>%
mutate(change = val / lag(val) - 1) %>%
mutate(change_lab = if_else(!is.na(change),
scales::percent(change,
accuracy = 1,
prefix = if_else(change > 0, "+", "-")),
NA_character_)) %>%
ungroup() %>%
ggplot(aes(comp, val, fill = year, label = val)) +
geom_col(position = position_dodge()) +
geom_text(position = position_dodge(width = 1), vjust = -0.5) +
geom_point(aes(comp, val + 5, size = change), color = "lightgreen") +
geom_text(aes(comp, val+5, label = change_lab)) +
scale_size_area(max_size = 30) +
guides(size= F) +
theme_classic()

Resources