100% stacked area using ggplot - r

I have two data frame: LF and HF
head(LF)
Year SS SS_CQT SRP SRP_CQT TDP TDP_CQT TP TP_CQT
1 2009 386.18 1164.3966 4586 12.30089 5285 14.23955 6707 18.17906
2 2010 268.72 884.9963 4354 13.37728 4927 15.20045 6078 18.81523
3 2011 347.61 746.7686 6924 12.25466 7917 13.84788 9302 16.93291
4 2012 170.68 1218.6758 2471 16.39350 3006 19.60066 3670 24.18561
head(HF)
Year SS SS_CQT SRP SRP_CQT TDP TDP_CQT TP TP_CQT
1 2009 184.44 4055.367 535 11.53037 621 13.50632 1175 25.82282
2 2010 118.08 2726.272 737 14.44196 868 16.92781 1236 24.56522
3 2011 119.90 2208.308 663 10.19803 742 11.42253 1086 17.36818
4 2012 554.07 11913.003 2413 45.44719 2781 52.90863 4290 85.87746
5 2013 165.32 5926.628 424 15.93962 461 17.16547 873 31.70556
Following relationship follow for above data frame: LF$SS + HF$SS = total load
I want to plot the proportion(%) of LF and HF each column variable using the two data frame as shown below;
Your help would be appreciated

Here is an approach:
library(tidyverse)
lf %>%
mutate(col = "lf") %>% #add column to lf specifying the data frame
bind_rows(hf %>% #bind rows of hf
mutate(col = "hf")) %>% #add column to hf specifying the data frame
gather(key, value, 2:9) %>% #convert to long format
group_by(key, Year) %>% #group by variable and year
mutate(ratio = value/sum(value)) %>% #calculate the desired ratio
ggplot()+
geom_area(aes(x = Year, y = ratio, fill = col)) + #pretty much self explanatory
facet_wrap(~key) +
scale_y_continuous(labels = scales::percent)
data:
lf <- read.table(text = "Year SS SS_CQT SRP SRP_CQT TDP TDP_CQT TP TP_CQT
1 2009 386.18 1164.3966 4586 12.30089 5285 14.23955 6707 18.17906
2 2010 268.72 884.9963 4354 13.37728 4927 15.20045 6078 18.81523
3 2011 347.61 746.7686 6924 12.25466 7917 13.84788 9302 16.93291
4 2012 170.68 1218.6758 2471 16.39350 3006 19.60066 3670 24.18561", header = T)
hf <- read.table(text = "Year SS SS_CQT SRP SRP_CQT TDP TDP_CQT TP TP_CQT
1 2009 184.44 4055.367 535 11.53037 621 13.50632 1175 25.82282
2 2010 118.08 2726.272 737 14.44196 868 16.92781 1236 24.56522
3 2011 119.90 2208.308 663 10.19803 742 11.42253 1086 17.36818
4 2012 554.07 11913.003 2413 45.44719 2781 52.90863 4290 85.87746", header = T)
I have removed the last row from hf so it matches the number of rows in lf

My answer doesn't differ much from #missuse's, except that it skips the need to calculate proportions.
For ggplot, you generally want data in long shape, so after binding the two data frames and marking which data frame observations come from (creating the type column in mutate), you should gather the data. In geom_area, using position = position_fill() calculates proportions within each facet, rather than you needing to do this manually.
library(tidyverse)
lf <- read.table(text = "Year SS SS_CQT SRP SRP_CQT TDP TDP_CQT TP TP_CQT
1 2009 386.18 1164.3966 4586 12.30089 5285 14.23955 6707 18.17906
2 2010 268.72 884.9963 4354 13.37728 4927 15.20045 6078 18.81523
3 2011 347.61 746.7686 6924 12.25466 7917 13.84788 9302 16.93291
4 2012 170.68 1218.6758 2471 16.39350 3006 19.60066 3670 24.18561", header = T)
hf <- read.table(text = "Year SS SS_CQT SRP SRP_CQT TDP TDP_CQT TP TP_CQT
1 2009 184.44 4055.367 535 11.53037 621 13.50632 1175 25.82282
2 2010 118.08 2726.272 737 14.44196 868 16.92781 1236 24.56522
3 2011 119.90 2208.308 663 10.19803 742 11.42253 1086 17.36818
4 2012 554.07 11913.003 2413 45.44719 2781 52.90863 4290 85.87746", header = T)
df <- bind_rows(
lf %>% mutate(type = "LF"),
hf %>% mutate(type = "HF")
) %>%
gather(key = measure, value = value, -Year, -type)
ggplot(df, aes(x = Year, y = value, fill = type)) +
geom_area(position = position_fill()) +
facet_wrap(~ measure) +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(values = c(HF = "darkorange", LF = "slateblue"))
Created on 2018-05-20 by the reprex package (v0.2.0).

Related

Add Group Subheader and Subtotal Rows to data.frame or table in R

Objective
I wish to add subheader and subtotal/margin rows within a table. Ultimately, I am looking for a structure shown below, which I will export to Excel with openxlsx and writeData.
2019
2020
2021
A
A1
1001
1157
911
A2
1005
803
1110
A3
1125
897
1190
Total A
3131
2857
3211
B
B1
806
982
1098
B2
1106
945
1080
B3
1057
1123
867
Total B
2969
3050
3045
C
C1
847
1087
1140
C2
1146
966
1176
C3
1071
915
892
Total C
3064
2968
3208
Total All
9164
8875
9464
I suspect the subheaders and subtotals are completely different questions, but I am asking both here in case there is a common method related to each.
Reproducible Code So Far
Create the Sample Data (long format):
d <- data.frame(
year = rep(c(2019, 2020, 2021), times = 9),
sector = rep(c("A","B","C"),each = 9),
subsector = paste0(rep(c("A","B","C"),each = 9), rep(c("1","2","3"), each = 3)),
value = sample(800:1200, 27, replace = FALSE)
)
Output:
head(d)
#> year sector subsector value
#> 1 2019 A A1 1001
#> 2 2020 A A1 1157
#> 3 2021 A A1 911
#> 4 2019 A A2 1005
#> 5 2020 A A2 803
#> 6 2021 A A2 1110
Format wide and add a margin (total) row:
library(janitor)
#[snip]warnings[/snip]
library(tidyverse)
#[snip]warnings[/snip]
d %>%
group_by(year, sector, subsector) %>%
summarise(sales = sum(value, na.rm = TRUE)) %>%
pivot_wider(names_from = year, values_from = sales) %>%
janitor::adorn_totals(where = "row")
Output:
#> `summarise()` has grouped output by 'year', 'sector'. You can override using the `.groups` argument.
#> sector subsector 2019 2020 2021
#> A A1 1001 1157 911
#> A A2 1005 803 1110
#> A A3 1125 897 1190
#> B B1 806 982 1098
#> B B2 1106 945 1080
#> B B3 1057 1123 867
#> C C1 847 1087 1140
#> C C2 1146 966 1176
#> C C3 1071 915 892
#> Total - 9164 8875 9464
Created on 2022-03-02 by the reprex package (v2.0.1)
The janitor package's adorn_totals() function works well for adding a margin row or column for the entire set. And Sam Firke's response here hints at a solution using tidyr::gather but my data is in a different format. I don't want to "gather" the columns. Others in the same thread show solutions but they place all the totals at the end of the table.
I can imagine a solution where I loop through the sector factors and assemble and combine tables for each sector, but I suspect I am overthinking this and there is a simpler solution.
Is there an existing solution for this objective, or ideas on accomplishing this efficiently/universally?
Please Note: the number of subsectors per sector will vary in the actual data (i.e., some may have only one subsector, others may have several), and there is no naming convention relating the subsector to the sector (i.e., the parent sector will not be part of the child subsectors name: rather than Sector: "A", Subsector: "A1", it might be Sector: "Manufacturing", Subsector: "Cars").
#akrun -- Solution!
Your answer got me 90% of the way there and your subsequent comments lead me to the remaining solution.
gt has a function as_raw_html() which, using xml2::read_html() and rvest::html_table() convert the gt() object to a tibble while keeping the subheaders.
library(dplyr)
library(tidyr)
library(purrr)
library(gt)
library(xml2)
library(rvest)
d <- data.frame(
year = rep(c(2019, 2020, 2021), times = 9),
sector = rep(c("A","B","C"),each = 9),
subsector = paste0(rep(c("A","B","C"),each = 9), rep(c("1","2","3"), each = 3)),
value = sample(800:1200, 27, replace = FALSE)
)
d %>%
group_by(year, sector, subsector) %>%
summarise(sales = sum(value, na.rm = TRUE), .groups = 'drop') %>%
pivot_wider(names_from = year, values_from = sales) %>%
group_by(sector) %>%
group_modify(~ .x %>% adorn_totals(where = "row")) %>%
gt() %>%
gt::as_raw_html() %>%
xml2::read_html() %>%
rvest::html_table()
#> [[1]]
#> # A tibble: 15 x 4
#> subsector `2019` `2020` `2021`
#> <chr> <chr> <chr> <chr>
#> 1 A A A A
#> 2 A1 932 1117 800
#> 3 A2 925 1078 1090
#> 4 A3 816 1058 1146
#> 5 Total 2673 3253 3036
#> 6 B B B B
#> 7 B1 862 1181 947
#> 8 B2 1083 812 912
#> 9 B3 1079 1130 1097
#> 10 Total 3024 3123 2956
#> 11 C C C C
#> 12 C1 966 895 944
#> 13 C2 970 1147 1166
#> 14 C3 1043 1116 826
#> 15 Total 2979 3158 2936
Created on 2022-03-02 by the reprex package (v2.0.1)
The subheader rows repeat the sector name in all columns; other than that, it looks good.
Interestingly, rvest also has a read_html function that might even reference the xml2::read_html() function, but it did not work in this context.
Instead of applying adorn_totals on the entire summary, use group_modify and then convert to gt
library(dplyr)
library(tidyr)
library(purrr)
library(janitor)
library(gt)
d %>%
group_by(year, sector, subsector) %>%
summarise(sales = sum(value, na.rm = TRUE), .groups = 'drop') %>%
pivot_wider(names_from = year, values_from = sales) %>%
group_by(sector) %>%
group_modify(~ .x %>% adorn_totals(where = "row")) %>%
gt()
-output
An option is also to split the column with expss
library(expss)
library(openxlsx)
out <- d %>%
group_by(year, sector, subsector) %>%
summarise(sales = sum(value, na.rm = TRUE), .groups = 'drop') %>%
pivot_wider(names_from = year, values_from = sales) %>%
group_by(sector) %>%
group_modify(~ .x %>% adorn_totals(where = "row")) %>%
ungroup %>%
split_columns(columns = 1)
wb <- createWorkbook()
sh <- addWorksheet(wb, "Tables")
xl_write(out, wb, sh)
saveWorkbook(wb, file.path(getwd(), "Documents/table1.xlsx"), overwrite = TRUE)
-output

How to plot a specific row in a dataframe using ggplot [duplicate]

This question already has answers here:
Plotting each value of columns for a specific row
(2 answers)
Closed 1 year ago.
I have a dataframe that shows the number of car sales in each country for years 2000 to 2020. I wish to plot a line graph to show how the number of car sales have changed over time for only a specific country/row, with year on the x axis and sales on the y axis. How would I do this using ggplot?
You perhaps want this
#toy_data
sales
#> Country 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
#> 2 A 1002 976 746 147 1207 627 157 1481 1885 1908 392
#> 3 B 846 723 1935 176 1083 636 1540 1692 899 607 1446
#> 4 C 1858 139 1250 121 1520 199 864 238 1109 1029 937
#> 5 D 534 1203 1759 553 1765 1784 1410 420 606 467 1391
library(tidyverse)
#for all countries
sales %>% pivot_longer(!Country, names_to = 'year', values_to = 'sales') %>%
mutate(year = as.numeric(year)) %>%
ggplot(aes(x = year, y = sales, color = Country)) +
geom_line()
#for one country
sales %>% pivot_longer(!Country, names_to = 'year', values_to = 'sales') %>%
mutate(year = as.numeric(year)) %>%
filter(Country == 'A') %>%
ggplot(aes(x = year, y = sales)) +
geom_line()
Created on 2021-06-07 by the reprex package (v2.0.0)
Suppose you have a data frame that looks like this:
#make dummy df
df <- matrix(sample(1:100, 63), ncol=21, nrow=3)
rownames(df) <- c("UK", "US", "UAE")
colnames(df) <- 2000:2020
Here I generated some random data for 21 years between 2000 and 2020, and for three countries. To get a line plot with ggplot for UK, I did:
data_uk <- data.frame(year=colnames(df), sales=df["UK",], row.names=NULL)
ggplot(data=data_uk, aes(x=year, y=sales, group=1)) + geom_point() + geom_line()
Example plot

Sum variables by unique variable names, for different metrics - requires finding unique names before/after a prefix in variable name

Is there is a way to sum variables (e.g. sales and units) for all unique variable names (brands like coke and pepsi) within a dataframe.
To help, here is some example data.
set.seed(123)
period <- seq(as.Date('2021/01/01'), as.Date('2021/01/07'), by="day")
Coke_Regular_Units <- sample(1000:2000, 7, replace = TRUE)
Coke_Diet_Units <- sample(1000:2000, 7, replace = TRUE)
Coke_Regular_Sales <- sample(500:1000,7, replace = TRUE)
Coke_Diet_Sales <- sample(500:1000, 7, replace = TRUE)
Pepsi_Regular_Units <- sample(1000:2000, 7, replace = TRUE)
Pepsi_Diet_Units <- sample(1000:2000, 7, replace = TRUE)
Pepsi_Regular_Sales <- sample(500:1000, 7, replace = TRUE)
Pepsi_Diet_Sales <- sample(500:1000, 7, replace = TRUE)
df <- data.frame(Coke_Regular_Units, Coke_Diet_Units, Coke_Regular_Sales, Coke_Diet_Sales,
Pepsi_Regular_Units, Pepsi_Diet_Units, Pepsi_Regular_Sales, Pepsi_Diet_Sales)
> head(df)
period Coke_Regular_Units Coke_Diet_Units Coke_Regular_Sales Coke_Diet_Sales Pepsi_Regular_Units
1 2021-01-01 1414 1117 589 847 1425
2 2021-01-02 1462 1298 590 636 1648
3 2021-01-03 1178 1228 755 976 1765
4 2021-01-04 1525 1243 696 854 1210
5 2021-01-05 1194 1013 998 827 1931
6 2021-01-06 1937 1373 590 525 1589
Pepsi_Diet_Units Pepsi_Regular_Sales Pepsi_Diet_Sales
1 1554 608 943
2 1870 762 808
3 1372 892 634
4 1843 924 808
5 1142 829 910
6 1543 522 723
I like a code to automatically calculate Coke_Sales, Coke_Units, Pepsi_Sales, Pepsi_Units, Regular_Sales and Diet_Units.
I am currently doing it like this for each variable
library(dplyr)
df$Coke_Sales <- rowSums(Filter(is.numeric, select(df, (matches("Coke") & matches("Sales")))))
df$Coke_Units <- rowSums(Filter(is.numeric, select(df, (matches("Coke") & matches("Units")))))
This is ok for a small number of variables, but I need to do this for 100s of variables. Is there any function that enables this? It would need to automatically find the unique variable names like Coke, Pepsi, Diet and Regular. The metric is the last part of the variable name, so doesn't necessarily need to auto-find this but would be great. If it makes it any easier, it would be ok to specify the metrics as there are only 3 metrics at most, but there are hundreds of brands.
If it cant be automated, is there a way it can be simplified, where I specify the variables required. Not perfect but still an improvement. For example including these lines of code to specify variables to sum and metrics required.
VarsToSum <- c("Coke", "Pepsi", "Diet", "Regular")
Metrics <- c("Sales", "Units")
If it can't be accomplished that way either, maybe I need to break into smaller steps, any tips would be great. Trying to think how to do it, should I try to find unique name before a prefix "_", then calculate "Sales" and "Units" for those unique names. Would this be the best way to do it? Or should I reshape the data? Are there any other routes to get there?
Any help, or directions how to achieve this would be greatly appreciated. Thanks
here is a data.tableapproach...
library( data.table )
setDT(df) #make it a data.table
#melt to long
ans <- melt( df, id.vars = "period", variable.factor = FALSE )
#split variable to 3 new columns
ans[, c("brand", "type", "what") := tstrsplit( variable, "_" ) ]
# > head(ans)
# period variable value brand type what
# 1: 2021-01-01 Coke_Regular_Units 1414 Coke Regular Units
# 2: 2021-01-02 Coke_Regular_Units 1462 Coke Regular Units
# 3: 2021-01-03 Coke_Regular_Units 1178 Coke Regular Units
# 4: 2021-01-04 Coke_Regular_Units 1525 Coke Regular Units
# 5: 2021-01-05 Coke_Regular_Units 1194 Coke Regular Units
# 6: 2021-01-06 Coke_Regular_Units 1937 Coke Regular Units
#summarise however you like
ans[, .(total = sum(value) ), by = .(brand, type, what)]
# brand type what total
# 1: Coke Regular Units 10527
# 2: Coke Diet Units 8936
# 3: Coke Regular Sales 5158
# 4: Coke Diet Sales 5171
# 5: Pepsi Regular Units 11160
# 6: Pepsi Diet Units 10813
# 7: Pepsi Regular Sales 5447
# 8: Pepsi Diet Sales 5491
Using outer for pasteing the syllables and grep.
sapply(outer(c("Coke", "Pepsi"), c("Sales", "Units"), paste, sep=".*"), function(x)
rowSums(df[grep(x, names(df))]))
# Coke.*Sales Pepsi.*Sales Coke.*Units Pepsi.*Units
# [1,] 1436 1551 2531 2979
# [2,] 1226 1570 2760 3518
# [3,] 1731 1526 2406 3137
# [4,] 1550 1732 2768 3053
# [5,] 1825 1739 2207 3073
# [6,] 1115 1245 3310 3132
# [7,] 1446 1575 3481 3081
Here's a solution similar in spirit to that of #Wimpel, but with the tidyverse :
library(tidyverse)
summary_df <-
df %>%
pivot_longer(cols = ends_with("Sales") | ends_with("Units"),
names_to = c("brand", "type", ".value"),
names_pattern = "(.*)_(.*)_(.*)") %>%
group_by(brand) %>%
summarize(Sales = sum(Sales),
Units = sum(Units)) %>%
pivot_wider(names_from = "brand",
values_from = c("Sales", "Units"),
names_glue = "{brand}_{.value}")
summary_df
# # A tibble: 1 x 4
# Coke_Sales Pepsi_Sales Coke_Units Pepsi_Units
# <int> <int> <int> <int>
# 1 10329 10938 19463 21973

Plotting a boxplot using R

I am trying to create boxplot using R script from the following type of tab delimited file "New.txt" where the number of rows and columns will be variable
Chr Start End Name 18NGS31 18MPD168 18NGS21 18NGS29 18NGS33 18NGS38
chr9 1234 1234 ABL1 1431 1 1112 1082 1809 1647
chr9 2345 2345 ASXL1 3885 37 3578 1974 2921 3559
chr9 3456 3456 ETV6 3235 188 2911 1578 2344 2673
chr9 4567 4567 MYD88 3198 187 2860 1547 2289 2621
After skipping first four columns create box plot in R from 5th column on wards using following commands
file <- "new.txt"
x=read.table(file,skip=1)
boxplot(x$V5,x$V6,x$V7,x$V9,x$V10,x$V11,col=rainbow(54),xlab="abc",ylab="Coverage",main="Coverage Metrics")
And I am getting following box plot
[![R ploy][1]][1]
I want to modify this command such that I can incorporate any number of columns that will be present in the tab delimited file and label each box plot as per its column head.
I recommend reshaping from wide to long .
Here is a minimal example using ggplot2
# Sample data
df <- data.frame(id = paste0("id", 1:100), matrix(rnorm(1000), ncol = 10))
library(dplyr)
library(tidyr)
library(ggplot2)
df %>%
gather(key, value, -id) %>%
mutate(key = factor(key, levels = paste0("X", 1:10))) %>%
ggplot(aes(x = key, y = value)) +
geom_boxplot()
Explanation: Reshaping from wide to long stores the column names in a new column key and its values in value; we can then simply map key to x. This works for an arbitrary number of columns.
Update
Using your sample data
df <- read.table(text =
"Chr Start End Name 18NGS31 18MPD168 18NGS21 18NGS29 18NGS33 18NGS38
chr9 1234 1234 ABL1 1431 1 1112 1082 1809 1647
chr9 2345 2345 ASXL1 3885 37 3578 1974 2921 3559
chr9 3456 3456 ETV6 3235 188 2911 1578 2344 2673
chr9 4567 4567 MYD88 3198 187 2860 1547 2289 2621", header = T)
df %>%
gather(key, value, -Chr, -Start, -End, -Name) %>%
ggplot(aes(x = key, y = value, fill = key)) +
geom_boxplot()

Diagonals to rows in data.frame

I have a matrix-like data frame with an additional column denoting time. It contains information on the number of enrolled students in a given school, from grade 5 (column A) to grade 9 (column E).
time A B C D E
1 13 1842 1844 1689 1776 1716
2 14 1898 1785 1807 1617 1679
3 15 2065 1865 1748 1731 1590
4 16 2215 1994 1811 1708 1703
5 17 2174 2122 1903 1765 1699
I need to trace the size of the cohort over time, meaning that I need row-wise information on how many fifth graders from each starting year remained in the school from grades 6 through 9. For example, for the cohort that has begun fifth grade in 2013, I want information on how many remained in sixth grade in 2014, and so on.
Expected output
This is what I would like to end up with:
start.time point.A point.B point.C point.D point.E
1 13 1842 1785 1748 1708 1699
2 14 1898 1865 1811 1765 NA
3 15 2065 1811 1765 NA NA
4 16 2215 1765 NA NA NA
5 17 2174 NA NA NA NA
I have looked at diag() from base.R, but I could only get the the data from the main diagonal. Ideally, I'd like to accomplish this using dplyr syntax and the pipe.
Data
structure(list(time = 13:17, A = c(1842, 1898, 2065, 2215, 2174), B = c(1844, 1785, 1865, 1994, 2122), C = c(1689, 1807, 1748, 1811, 1903), D = c(1776, 1617, 1731, 1708, 1765), E = c(1716, 1679, 1590, 1703, 1699)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L), vars = "time", drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L), group_sizes = c(1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
time = 13:17), class = "data.frame", row.names = c(NA, -5L), vars = "time", drop = TRUE, .Names = "time"), .Names = c("time", "A", "B", "C", "D", "E"))
Convert the input DF except for the first column to a matrix mat. Then since row(mat) - col(mat) is constant on diagonals split with respect to that creating a list of ts class series in L. We used ts class since we can later cbind them even if they are of different lengths. The diagonals for which row(mat) - col(mat) >= 0 are the only ones we want so pick off those, cbind them together and transpose the result. Then replace all columns in DF except the first with that. No packages are used.
mat <- as.matrix(DF[-1])
L <- lapply(split(mat, row(mat) - col(mat)), ts)
replace(DF, -1, t(do.call("cbind", L[as.numeric(names(L)) >= 0])))
giving:
time A B C D E
1 13 1842 1785 1748 1708 1699
2 14 1898 1865 1811 1765 NA
3 15 2065 1994 1903 NA NA
4 16 2215 2122 NA NA NA
5 17 2174 NA NA NA NA
Since you mentioned dplyr in your question, you could use dplyr::lead to shift the values of columns B to E by 1, 2 etc. respectively, and then bind the result with columns time and A from your original data as follows
library(tidyverse)
bind_cols(df[, 1:2], map2_df(.x = df[, c(3:ncol(df))],
.y = seq_along(df[, 3:ncol(df)]),
.f = ~dplyr::lead(x = .x, n = .y)))
# A tibble: 5 x 6
# Groups: time [5]
# time A B C D E
# <int> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 13 1842 1785 1748 1708 1699
#2 14 1898 1865 1811 1765 NA
#3 15 2065 1994 1903 NA NA
#4 16 2215 2122 NA NA NA
#5 17 2174 NA NA NA NA
Note that your data is grouped by time the way you provided it.
With some grouping and arranging and row_number(), we can do this with dplyr and tidyr, and we don't lose values.
Looks a bit messy, but here I create a 2-dimensional index where the second dimension is inverted. When these index positions are summed, we get a matching value for diagonal rows.
data %>%
ungroup() %>%
mutate(row = row_number()) %>%
gather(class, stud, A:E) %>%
arrange(row, desc(class)) %>%
group_by(row) %>%
mutate(time_left = row_number()) %>%
ungroup() %>%
transmute(time, class, stud, start_year = time_left + row - 1) %>%
ggplot(aes(time, stud, color = factor(start_year))) +
geom_line() +
geom_point()
Replace the mirrored upper triangle of "d" with the values from the lower triangle.
m <- as.matrix(d[-1])
d[-1] <- NA
d[-1][upper.tri(m, diag = TRUE)[ , ncol(m):1]] <- m[lower.tri(m, diag = TRUE)]
# time A B C D E
# 1 13 1842 1785 1748 1708 1699
# 2 14 1898 1865 1811 1765 NA
# 3 15 2065 1994 1903 NA NA
# 4 16 2215 2122 NA NA NA
# 5 17 2174 NA NA NA NA

Resources