Screenshot after knitting
---
title: "R Notebook"
output:
html_document:
df_print: paged
---
```{r}
library(tidyverse)
library(ggplot2)
library(scales)
library(ggthemes)
library(magrittr)
library(rio)
```
```{r, echo=FALSE}
df = read.csv('https://raw.githubusercontent.com/alyssatn/RWorkshopHW/main/Report_Card_Enrollment_from_2014-15_to_Current_Year.csv')
```
```{r, echo=FALSE}
df6=df[df$Gradelevel=="6th Grade",]
df6$All.Students=gsub(pattern = ',', replacement = "",df6$All.Students)
df6$All.Students=as.numeric(df6$All.Students)
df6$gap = df6$All.Students-78603
df6$gap_lead = 100 * (df6$All.Students - lead(df6$All.Students))/lead(df6$All.Students)
df6$PositiveGap=ifelse(df6$gap_lead > 0,"Yes","No")
df6$PositiveGap=as.character(df6$PositiveGap) #I don't know if this matters but I have it here anyway
df6[8,38] = "No" #manually updating the data for the 2014-2015 year
df6[8,36] = 0 #manually updating the data for the 2014-2015 year
df6$xlabel = c(7.8,6.8,5.3,4.3,3.3,2.3,1.3,2)#X coordinates for the line segment labels
df6$ylabel = c(5273 - ((5273 - 2135)/2), 9606 - (9606 - 5273)/2, 8643 - (8643 - 9606)/2, 8643 - (8643 - 5632)/2,5632 - (5632-2707)/2, 2707 - (2702-1415)/2,(1415/2), 600)#Y coordinates for the line segment labels
df6$label = c(2135-5273, 5273 - 9096, 9096 - 8643, 8643 - 5632, 5632 - 2707, 2707 - 1415, 1415, 0)
```
```{r, echo=FALSE}
base=ggplot(data=df6,
aes(x=SchoolYear,
y=gap))
plot1=base + geom_bar(fill = "black",
stat = 'identity') +
labs(title="Fewer 6th graders were enrolled in Washington State public schools after the Covid-19 Pandemic",
x ="School Year",
y = "Change in number of 6th graders since 2014",
caption = "Source: Washington State Department of Education") +
theme(plot.caption = element_text(hjust = 0, size = 8),
plot.title = element_text(hjust = 0.5, size = 9.5), axis.title = element_text(size = 10))
plot2 = plot1 + geom_segment(aes(x = 1, y = 0, xend=2, yend = 1415), color = "green") +
geom_segment(aes(x = 2, y = 1415, xend=3, yend = 2707), color = "green") +
geom_segment(aes(x = 3, y = 2707, xend=4, yend = 5632), color = "green") +
geom_segment(aes(x = 4, y = 5632, xend=5, yend = 8643), color = "green") +
geom_segment(aes(x = 5, y = 8643, xend=6, yend = 9606), color = "green") +
geom_segment(aes(x = 6, y = 9606, xend=7, yend = 5273), color = "red") +
geom_segment(aes(x = 7, y = 5273, xend=8, yend = 2135), color = "red")
plot3 = plot2 + geom_text(size = 3,#fontface = "bold",
aes(x = xlabel, y = ylabel, label = paste0(round(label,0))))
plot4 = plot3 + geom_vline(xintercept = 6.5, #where
size=1.5, #thickness
alpha=0.8,#transparency
color = "blue") +
annotate(geom = 'text',
label = "<- Start of Covid-19 pandemic",
size = 2.9,
y = 9001,
x=7.5,
angle=0)
```
```{r, echo=FALSE}
plot4
```
I was able to hide the source code using r,echo+false, but that only works in preview after I knit the rmd file it changes into the attached pic. Otherwise, before I do that you cannot see the source code.
Related
I have this data frame :
Raw.Score = c(0,1,2,3,4,5,6,7,8)
Severity = c(-3.56553994,-2.70296933,-1.63969850,-0.81321707,-0.04629182,
0.73721320,1.61278518,2.76647043,3.94804472)
x = data.frame(Raw.Score = Raw.Score, Severity = Severity)
Raw.score are raw numbers from 0 to 8 (let's consider them as the labels of the severity numbers)
Severity are relative numbres that represent the locations of the scores in the diagram
I want to graphically present the results as in the following example using ggplot (the example includes different numbers but I want something similar)
As a fun exercise in ggplot-ing here is one approach to achieve or come close to your desired result.
Raw.Score = c(0,1,2,3,4,5,6,7,8)
Severity = c(-3.56553994,-2.70296933,-1.63969850,-0.81321707,-0.04629182,
0.73721320,1.61278518,2.76647043,3.94804472)
dat <- data.frame(Raw.Score, Severity)
library(ggplot2)
dat_tile <- data.frame(
Severity = seq(-4.1, 4.1, .05)
)
dat_axis <- data.frame(
Severity = seq(-4, 4, 2)
)
tile_height = .15
ymax <- .5
ggplot(dat, aes(y = 0, x = Severity, fill = Severity)) +
# Axis line
geom_hline(yintercept = -tile_height / 2) +
# Colorbar
geom_tile(data = dat_tile, aes(color = Severity), height = tile_height) +
# Sgements connecting top and bottom labels
geom_segment(aes(xend = Severity, yend = -ymax, y = ymax), color = "orange") +
# Axis ticks aka dots
geom_point(data = dat_axis,
y = -tile_height / 2, shape = 21, stroke = 1, fill = "white") +
# ... and labels
geom_text(data = dat_axis, aes(label = Severity),
y = -tile_height / 2 - .1, vjust = 1, fontface = "bold") +
# Bottom labels
geom_label(aes(y = -ymax, label = scales::number(Severity, accuracy = .01))) +
# Top labels
geom_point(aes(y = ymax, color = Severity), size = 8) +
geom_text(aes(y = ymax, label = Raw.Score), fontface = "bold") +
# Colorbar annotations
annotate(geom = "text", fontface = "bold", label = "MILD", color = "black", x = -3.75, y = 0) +
annotate(geom = "text", fontface = "bold", label = "SEVERE", color = "white", x = 3.75, y = 0) +
# Fixing the scales
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(limits = c(-ymax, ymax)) +
# Color gradient
scale_fill_gradient(low = "orange", high = "red", guide = "none") +
scale_color_gradient(low = "orange", high = "red", guide = "none") +
# Get rid of all non-data ink
theme_void() +
# Add some plot margin
theme(plot.margin = rep(unit(10, "pt"), 4)) +
coord_cartesian(clip = "off")
I have dataframe which represents sales by model within 2 different years. 'change' column stands for absolute change by models from 2020 to 2021 while 'chng.percent' measures this change in percentages.
However, I am struggling to apply the given Code of slope plot to my data.
df <- data.frame (model = c("A", "A", "B","B"),
year = c(2020,2021,2020,2021),
sale =c(105,190,110,180),
chang = c(85,NA,70,NA),
chng.percent = c(80.9,NA, 63.6,NA))
Expected outcome (Like this)
Here's a way to do it all within ggplot using your existing data:
ggplot(df, aes(year, sale, color = model)) +
geom_line(arrow = arrow(type = "closed", angle = 20),
key_glyph = draw_key_point) +
geom_vline(aes(xintercept = year)) +
geom_text(aes(label = sale, hjust = ifelse(year == 2020, 1.3, -0.3)),
color = "black",
size = 6) +
geom_text(aes(x = min(df$year) + 0.25, y = 105,
label = paste0("+", chang[1], "; ", chng.percent[1], "%"),
color = "A"), size = 5) +
geom_text(aes(x = max(df$year) - 0.25, y = 150,
label = paste0("+", chang[3], "; ", chng.percent[3], "%"),
color = "B"), size = 5) +
theme_void(base_size = 16) +
coord_cartesian(clip = "off") +
scale_x_continuous(breaks = c(2020, 2021)) +
guides(color = guide_legend(override.aes = list(size = 5))) +
scale_color_brewer(palette = "Set1") +
theme(plot.margin = margin(30, 30, 30, 30),
aspect.ratio = 1.5,
axis.text.x = element_text(size = 20))
you can try something like this :
df <- data.frame(model = c("A", "B"),
sale_2020 =c(105,110),
sale_2021 =c(190,180),
chang = c(85,70),
chng.percent = c(80.9, 63.6))
df %>%
ggplot() +
geom_segment(aes(x = 1, xend = 2,
y = sale_2020,
yend = sale_2021,
group = model,
col = model),
size = 1.2) +
# set the colors
scale_color_manual(values = c("#468189", "#9DBEBB"), guide = "none") +
# remove all axis stuff
theme_classic() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()) +
geom_text(aes(x = x, y = y, label = label),
data = data.frame(x = 1:2,
y = 10 + max(df$sale_2021),
label = c("2020", "2021")),
col = "grey30",
size = 6) +
# add vertical lines that act as axis for 2020
geom_segment(x = 1, xend = 1,
y = min(df$sale_2020) -10,
yend = max(df$sale_2020) + 81,
col = "grey70", size = 1.5) +
# add vertical lines that act as axis for 2021
geom_segment(x = 2, xend = 2,
y = min(df$sale_2021) - 80,
yend = max(df$sale_2021) + 1,
col = "grey70", size = 1.5) +
# add the success rate next to each point on 2021 axis
geom_text(aes(x = 2 + 0.08,
y = sale_2021,
label = paste0(round(sale_2021, 1))),
col = "grey30") +
# add the success rate next to each point on 2021 axis
geom_text(aes(x = 1 - 0.08,
y = sale_2020,
label = paste0(round(sale_2020, 1))),
col = "grey30") +
# add the success rate next to each point on 2020 axis
geom_text(aes(x = 2 - 0.5,
y = c(156, 135),
label = paste0(round(chng.percent, 1), "%")),
col = "grey30")
I was able to replicate another good answers here to create a basic radial plot, but can anyone give me any clue of others functions/parameters/ideas on how to convert the basic one to something similar to this :
You could get pretty close like this:
df <- data.frame(x = c(10, 12.5, 15), y = c(1:3),
col = c("#fcfbfc", "#fbc3a0", "#ec6f4a"))
library(ggplot2)
ggplot(df, aes(x = 0, xend = x, y = y, yend = y, color = col)) +
geom_hline(yintercept = c(1:3), size = 14, color = "#dfdfdf") +
geom_hline(yintercept = c(1:3), size = 13, color = "#f7f7f7") +
geom_segment(color = "#bf2c23", size = 14, lineend = 'round') +
geom_segment(size = 13, lineend = 'round') +
scale_color_identity() +
geom_point(aes(x = x - 0.03 * y), size = 5, color = "#bf2c23",
shape = 21, fill = 'white') +
geom_point(aes(x = x - 0.03 * y), size = 2, color = "#bf2c23",
shape = 21, fill = 'white') +
scale_y_continuous(limits = c(0, 4)) +
scale_x_continuous(limits = c(0, 20)) +
coord_polar() +
theme_void()
Here's a start. Are there particular aspects you're trying to replicate? This is a fairly customized format.
df <- data.frame(type = c("on", "ia", "n"),
radius = c(2,3,4),
value = c(10,21,22))
library(ggplot2); library(ggforce)
ggplot(df) +
geom_link(aes(x = radius, xend = radius,
y = 0, yend = value),
size = 17, lineend = "round", color = "#bb353c") +
geom_link(aes(x = radius, xend = radius,
y = 0, yend = value, color = type),
size = 16, lineend = "round") +
geom_label(aes(radius, y = 30,
label = paste(type, ": ", value)), hjust = 1.8) +
scale_x_continuous(limits = c(0,4)) +
scale_y_continuous(limits = c(0, 30)) +
scale_color_manual(values = c("on" = "#fff7f2",
"ia" = "#f8b68f",
"n" = "#e4593a")) +
guides(color = "none") +
coord_polar(theta = "y") +
theme_void()
I have created a plot with geom_area(), geom_line() in it. Now I would like to add a country map background in the plot and for same I am trying to use: map_data() & geom_ploygon() but it's giving error, probably because one's xaxis is on date scale & other's is longitude.
Error:
Error: Invalid input: date_trans works with objects of class Date only
Here is my code & plot without map:
library(tidyverse)
library(glue)
library(scales)
library(tidytext)
data:
file_url <- "https://raw.githubusercontent.com/johnsnow09/covid19-df_stack-code/main/ts_all_long3.csv"
ts_all_long <- read.csv(url(file_url))
Step 1:
confirm_col = "#32a4ba"
death_col = "#f08080"
Country_selected = c("India")
scaleFactor = max(ts_all_long %>%
filter(Country.Region %in% Country_selected) %>%
pull(Confirmed_daily)) /
max(ts_all_long %>%
filter(Country.Region %in% Country_selected) %>%
pull(Death_daily))
ts_all_long %>%
filter(Country.Region %in% c("India") ) %>%
ggplot(aes(x = date)) +
geom_area(aes(y = Confirmed_daily), fill = confirm_col, alpha = .7) +
geom_line(aes(y = Death_daily * scaleFactor), col = death_col,
size = 0.8, alpha = 0.8) +
scale_y_continuous(name = "Daily Cases", sec.axis = sec_axis(~./scaleFactor, name = "Daily Deaths"),
labels = scales::comma_format()) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
geom_hline(yintercept = c(25000,95000), lty = 2, col = "grey50") +
geom_vline(xintercept = c(ymd("2020-07-08"),ymd("2020-09-10"),
ymd("2021-03-15"),ymd("2021-04-03")),
lty = 2, col = "grey50") +
annotate("text", x = ymd("2020-08-10"), y = 96000, col = death_col, size = 3,
label = "25k to 95k in \n~2months+") +
annotate("text", x = ymd("2021-03-18"), y = 96000, col = death_col, size = 3,
label = "25k to 95k in \n~2weeks+") +
geom_text(
label=glue("1st wave"), angle = 90, x=ymd("2020-07-30"), y= 75000,
size = 4, color = confirm_col
) +
geom_text(
label=glue("2nd wave"), angle = 90, x=ymd("2021-03-18"), y= 75000,
size = 4, color = confirm_col
) +
theme_excel_new() +
theme(
axis.title.y.left=element_text(color=confirm_col),
axis.text.y.left=element_text(color=confirm_col),
axis.title.y.right=element_text(color=death_col),
axis.text.y.right=element_text(color=death_col),
plot.title = element_markdown(face = "plain", family = "serif", size = 14),
panel.grid.major = element_blank()
) +
labs(title = glue("<i>{Country_selected}</i>: Daily Cases to jump to 1 lac in 1st & 2nd wave, as of: {max(ts_all_long$date)}"),
# subtitle = "Cases to jump from 25,000 to 95,000 in first & second wave",
caption = "Data source: covid19.analytics
Created by: ViSa")
Step 2: Code & image for map:
ggplot() +
geom_polygon(data = map_data("world", region = "India"),
aes(x = long, y = lat, group = group),
fill="lightgray", colour = "white")
Step 3: When I try to combine code for above 2 steps I get an error:
confirm_col = "#32a4ba"
death_col = "#f08080"
Country_selected = c("India")
scaleFactor = max(ts_all_long %>%
filter(Country.Region %in% Country_selected) %>%
pull(Confirmed_daily)) /
max(ts_all_long %>%
filter(Country.Region %in% Country_selected) %>%
pull(Death_daily))
ts_all_long %>%
filter(Country.Region %in% c("India") ) %>%
ggplot() +
# added country map here from step2
geom_polygon(data = map_data("world", region = "India"),
aes(x = long, y = lat, group = group),
fill="lightgray", colour = "white") +
# usual plot of step1
geom_area(aes(x = date, y = Confirmed_daily), fill = confirm_col, alpha = .7) +
geom_line(aes(x = date, y = Death_daily * scaleFactor), col = death_col,
size = 0.8, alpha = 0.8) +
scale_y_continuous(name = "Daily Cases", sec.axis = sec_axis(~./scaleFactor, name = "Daily Deaths"),
labels = scales::comma_format()) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
geom_hline(yintercept = c(25000,95000), lty = 2, col = "grey50") +
geom_vline(xintercept = c(ymd("2020-07-08"),ymd("2020-09-10"),
ymd("2021-03-15"),ymd("2021-04-03")),
lty = 2, col = "grey50") +
annotate("text", x = ymd("2020-08-10"), y = 96000, col = death_col, size = 3,
label = "25k to 95k in \n~2months+") +
annotate("text", x = ymd("2021-03-18"), y = 96000, col = death_col, size = 3,
label = "25k to 95k in \n~2weeks+") +
geom_text(
label=glue("1st wave"), angle = 90, x=ymd("2020-07-30"), y= 75000,
size = 4, color = confirm_col
) +
geom_text(
label=glue("2nd wave"), angle = 90, x=ymd("2021-03-18"), y= 75000,
size = 4, color = confirm_col
) +
theme_excel_new() +
theme(
axis.title.y.left=element_text(color=confirm_col),
axis.text.y.left=element_text(color=confirm_col),
axis.title.y.right=element_text(color=death_col),
axis.text.y.right=element_text(color=death_col),
plot.title = element_markdown(face = "plain", family = "serif", size = 14),
panel.grid.major = element_blank()
) +
labs(title = glue("<i>{Country_selected}</i>: Daily Cases to jump to 1 lac in 1st & 2nd wave, as of: {max(ts_all_long$date)}"),
# subtitle = "Cases to jump from 25,000 to 95,000 in first & second wave",
caption = "Data source: covid19.analytics
Created by: ViSa")
I would suggest to add the map as a background image to your plot which could be done via e.g. the ggimage package like so:
library(ggimage)
map <- ggplot() +
geom_polygon(data = map_data("world", region = "India"),
aes(x = long, y = lat, group = group),
fill="lightgray", colour = "white") +
theme_void()
ggsave("map.png")
#> Saving 7 x 5 in image
ggbackground(p, "map.png")
p:
d <- ts_all_long %>%
filter(Country.Region %in% c("India")) %>%
mutate(date = as.Date(date))
p <- ggplot(d, aes(x = date)) +
geom_area(aes(y = Confirmed_daily), fill = confirm_col, alpha = .7) +
geom_line(aes(y = Death_daily * scaleFactor), col = death_col,
size = 0.8, alpha = 0.8) +
scale_y_continuous(name = "Daily Cases", sec.axis = sec_axis(~./scaleFactor, name = "Daily Deaths"),
labels = scales::comma_format()) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
geom_hline(yintercept = c(25000,95000), lty = 2, col = "grey50") +
geom_vline(xintercept = c(ymd("2020-07-08"),ymd("2020-09-10"),
ymd("2021-03-15"),ymd("2021-04-03")),
lty = 2, col = "grey50") +
annotate("text", x = ymd("2020-08-10"), y = 96000, col = death_col, size = 3,
label = "25k to 95k in \n~2months+") +
annotate("text", x = ymd("2021-03-18"), y = 96000, col = death_col, size = 3,
label = "25k to 95k in \n~2weeks+") +
geom_text(
label=glue("1st wave"), angle = 90, x=ymd("2020-07-30"), y= 75000,
size = 4, color = confirm_col
) +
geom_text(
label=glue("2nd wave"), angle = 90, x=ymd("2021-03-18"), y= 75000,
size = 4, color = confirm_col
) +
theme_excel_new() +
theme(
axis.title.y.left=element_text(color=confirm_col),
axis.text.y.left=element_text(color=confirm_col),
axis.title.y.right=element_text(color=death_col),
axis.text.y.right=element_text(color=death_col),
plot.title = element_markdown(face = "plain", family = "serif", size = 14),
panel.grid.major = element_blank()
) +
labs(title = glue("<i>{Country_selected}</i>: Daily Cases to jump to 1 lac in 1st & 2nd wave, as of: {max(ts_all_long$date)}"),
# subtitle = "Cases to jump from 25,000 to 95,000 in first & second wave",
caption = "Data source: covid19.analytics
Created by: ViSa")
I am attempting to recreate some plots from a research article in R and am running into an issue with applying a log scale to y axis. The visualization I'm attempting to recreate is this:
reference plot with y log scale
I currently have a working version without the logarithmic scale applied to the y-axis:
Proportion_Mean_Plot <- ggplot(proportions, aes(days2,
proportion_mean, group = observation)) +
geom_point(aes(shape = observation)) +
geom_line() +
scale_x_continuous(breaks = seq(0,335,20)) +
scale_y_continuous(breaks = seq(0,6,.5)) +
theme_tufte() +
geom_rangeframe() +
theme(legend.position="none") +
theme(axis.line.x = element_line(colour = "black", size = 0.5, linetype = 1),
axis.line.y = element_line(colour = "black", size = 0.5, linetype = 1)) +
labs(title = "Proportion of Baseline Mean",
subtitle = "Daily steps within each intervention phase",
x = "DAYS",
y = "PROPORTION OF BASELINE \n(MEAN)") +
geom_vline(xintercept = 164.5) +
geom_hline(yintercept = 1) +
annotate("text", x = c(82, 246), y = 5,
label = c("Intervention 1", "Intervention 2")) +
geom_segment(aes(x = 0, y = mean, xend = end, yend = mean),
data = proportion_intervention1_data) +
geom_segment(aes(x = start, y = mean, xend = end, yend = mean),
data = proportion_intervention2_data, linetype = 4)
This produces a decent representation of the original:
normally scaled y-axis plot
I would like to try to apply that logarithmic scaling to more closely match it. Any help is appreciated.
As per Richard's suggestion, here is a quick example how you can use scale_y_log10:
suppressPackageStartupMessages(library(tidyverse))
set.seed(123)
# generate some data
proportions <- tibble(interv_1 = pmax(0.4, rnorm(160, mean = 1.3, sd = 0.2)),
interv_2 = pmax(0.01, rnorm(160, mean = 1.6, sd = 0.5)))
proportions <- proportions %>%
gather(key = observation, value = proportion_mean) %>%
mutate(days2 = 1:320)
# create the plot
ggplot(proportions, aes(days2, proportion_mean, group = observation)) +
geom_point(aes(shape = observation)) +
geom_line() +
scale_x_continuous(breaks = seq(0,335,20), expand = c(0, 0)) +
scale_y_log10(breaks = c( 0.1, 0.5, 1, 2, 3, 4, 5), limits = c(0.1, 5)) +
# theme_tufte() +
# geom_rangeframe() +
theme(legend.position="none") +
theme(axis.line.x = element_line(colour = "black", size = 0.5, linetype = 1),
axis.line.y = element_line(colour = "black", size = 0.5, linetype = 1)) +
labs(title = "Proportion of Baseline Mean",
subtitle = "Daily steps within each intervention phase",
x = "DAYS",
y = "PROPORTION OF BASELINE \n(MEAN)") +
geom_vline(xintercept = 164.5) +
geom_hline(yintercept = 1) +
annotate("text", x = c(82, 246), y = 5,
label = c("Intervention 1", "Intervention 2")) +
# plugged the values for the means of the two distributions
geom_segment(aes(x = 0, y = 1.3, xend = 164.5, yend = 1.3)) +
geom_segment(aes(x = 164.5, y = 1.6, xend = 320, yend = 1.6), linetype = 4)