gt table truncated when output as png - r

I am attempting to output a fairly long gt table using gtsave. It keeps getting truncated, where the last few columns are missing. The scrollbar also shows up in the outputted image.
Here is the outputted table:
I want it to look like my 5% variation table, which is below:
Here is my code for both tables (including R Markdown headers):
3% Table
{r hotspot 3% 100 coverage reads table, fig.dim = c(12, 6)}
setwd(output_hotspot)
hotspot_3pct_table <- hotspot_3pct_longer %>%
ungroup() %>%
gt() %>%
tab_header(title = "Variation Across Lots at Select Base Pairs", subtitle = paste0("At least 100 coverage reads with at least 3% variation")) %>%
cols_label(
lot = "Lot"
) %>%
fmt_number(columns = 2:24, decimals = 2) %>%
fmt_missing(columns = everything(), missing_text = "--") %>%
tab_source_note(source_note = paste0("Dash indicates no variation present in lot")) %>%
tab_options(
table.width = pct(100)
)
gtsave(hotspot_3pct_table, "hotspot_3pct.png")
5% Table
{r hotspot 5% 100 coverage reads table, fig.dim = c(12, 6)}
setwd(output_hotspot)
hotspot_5pct_table <- hotspot_5pct_longer %>%
ungroup() %>%
gt() %>%
tab_header(title = "Variation Across Lots at Select Base Pairs", subtitle = paste0("At least 100 coverage reads with at least 5% variation")) %>%
cols_label(
lot = "Lot"
) %>%
fmt_number(columns = 2:18, decimals = 2) %>%
fmt_missing(columns = everything(), missing_text = "--") %>%
tab_source_note(source_note = paste0("Dash indicates no variation present in lot"))
gtsave(hotspot_5pct_table, "hotspot_5pct.png", expand = 10)
I have tried different fig.dim settings and different expand settings. I haven't encountered this issue before, so I am not sure how to approach this.
Reprex of each dataset:
3% Dataset
hotspot_3pct_longer = structure(list(lot = c("ABL GMP1", "MVS", "Tox Lot", "MVB1",
"MVB2", "CTM2", "CTM1", "Fuji 30k", "Fuji 7.5k"), `12` = c(0.0382775119617225,
0.0390625, 0.034883720930233, NA, NA, NA, NA, NA, NA), `13` = c(0.0588235294117647,
NA, NA, 0.048076923076924, 0.0714285714285714, 0.0417789757412399,
NA, NA, NA), `253` = c(NA, NA, 0.03360709902766, NA, NA, NA,
NA, NA, NA), `1266` = c(0.0646451454923886, NA, NA, NA, NA, NA,
NA, NA, NA), `1820` = c(1, NA, NA, NA, NA, NA, NA, NA, NA), `1821` = c(1,
NA, NA, NA, NA, NA, NA, NA, NA), `2861` = c(0.0434994715017482,
NA, NA, NA, NA, NA, NA, NA, NA), `3031` = c(0.183159188690842,
NA, NA, NA, NA, NA, NA, NA, NA), `3252` = c(0.0521527362955475,
NA, NA, NA, NA, NA, NA, NA, NA), `3368` = c(0.107515576323988,
NA, NA, NA, NA, NA, NA, NA, NA), `3512` = c(0.345980014097939,
NA, 0.064333937531195, NA, NA, 0.0822086320821032, 0.078818748712571,
0.089279658964298, NA), `3527` = c(0.17209788747124, NA, 0.0377838832455329,
NA, NA, 0.0471288691223414, 0.044333490343853, 0.059236465044716,
NA), `3554` = c(0.250983372072233, NA, 0.05112660944206, NA,
NA, 0.0639663737103554, 0.055374526495866, 0.0861535232698471,
0.031875819851334), `4752` = c(NA, NA, 0.04827943749595, NA,
NA, 0.0498005129666572, 0.049766115231033, 0.052495800335974,
0.0519236625723281), `4761` = c(NA, NA, 0.038136808232708, NA,
NA, 0.0317014863319821, 0.036080058906219, 0.034794423440454,
0.033717392388648), `7078` = c(NA, NA, 0.032269021739131, NA,
NA, NA, NA, NA, NA), `7299` = c(0.0830269157229004, 0.083128195417535,
0.361278273500727, 0.375946173254836, 0.216166788588149, 0.110078513058805,
0.10393717387867, 0.355137204850032, 0.310679611650486), `7300` = c(0.0525369400359628,
0.050222762251924, 0.245149911816579, 0.232037691401649, 0.148067737733391,
0.0629358437935844, 0.063008245663919, 0.236435818262021, 0.20123839009288
), `7301` = c(NA, NA, 0.0519736842105269, 0.038054968287527,
NA, NA, NA, 0.037803780378038, 0.034240150093809), `7315` = c(NA,
NA, NA, 0.037735849056604, 0.0406386066763426, NA, NA, NA, 0.036363636363637
), `7318` = c(0.0474754244861484, 0.07482430756511, NA, NA, 0.0369206598586017,
0.0493811726465808, 0.046463780540078, NA, NA), `7319` = c(0.0623240852432649,
0.083063994828701, NA, 0.0326086956521739, 0.058765915768854,
0.0560072267389341, 0.0604447228311939, 0.053601340033501, 0.039495798319328
), `7320` = c(0.0808298755186722, 0.10897808803568, NA, 0.045643153526971,
0.0581113801452785, 0.0764283011729096, 0.081006685017696, 0.04,
0.031458531935177)), row.names = c(NA, -9L), groups = structure(list(
lot = c("ABL GMP1", "CTM1", "CTM2", "Fuji 30k", "Fuji 7.5k",
"MVB1", "MVB2", "MVS", "Tox Lot"), .rows = structure(list(
1L, 7L, 6L, 8L, 9L, 4L, 5L, 2L, 3L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -9L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
5% Dataset
hotspot_5pct_longer = structure(list(lot = c("MVB2", "ABL GMP1", "CTM1", "CTM2", "Tox Lot",
"Fuji 30k", "Fuji 7.5k", "MVB1", "MVS"), `13` = c(0.0714285714285714,
0.0588235294117647, NA, NA, NA, NA, NA, NA, NA), `1266` = c(NA,
0.0646451454923886, NA, NA, NA, NA, NA, NA, NA), `1820` = c(NA,
1, NA, NA, NA, NA, NA, NA, NA), `1821` = c(NA, 1, NA, NA, NA,
NA, NA, NA, NA), `3031` = c(NA, 0.183159188690842, NA, NA, NA,
NA, NA, NA, NA), `3252` = c(NA, 0.0521527362955475, NA, NA, NA,
NA, NA, NA, NA), `3368` = c(NA, 0.107515576323988, NA, NA, NA,
NA, NA, NA, NA), `3512` = c(NA, 0.345980014097939, 0.078818748712571,
0.0822086320821032, 0.064333937531195, 0.089279658964298, NA,
NA, NA), `3527` = c(NA, 0.17209788747124, NA, NA, NA, 0.059236465044716,
NA, NA, NA), `3554` = c(NA, 0.250983372072233, 0.055374526495866,
0.0639663737103554, 0.05112660944206, 0.0861535232698471, NA,
NA, NA), `4752` = c(NA, NA, NA, NA, NA, 0.052495800335974, 0.0519236625723281,
NA, NA), `7299` = c(0.216166788588149, 0.0830269157229004, 0.10393717387867,
0.110078513058805, 0.361278273500727, 0.355137204850032, 0.310679611650486,
0.375946173254836, 0.083128195417535), `7300` = c(0.148067737733391,
0.0525369400359628, 0.063008245663919, 0.0629358437935844, 0.245149911816579,
0.236435818262021, 0.20123839009288, 0.232037691401649, 0.050222762251924
), `7301` = c(NA, NA, NA, NA, 0.0519736842105269, NA, NA, NA,
NA), `7318` = c(NA, NA, NA, NA, NA, NA, NA, NA, 0.07482430756511
), `7319` = c(0.058765915768854, 0.0623240852432649, 0.0604447228311939,
0.0560072267389341, NA, 0.053601340033501, NA, NA, 0.083063994828701
), `7320` = c(0.0581113801452785, 0.0808298755186722, 0.081006685017696,
0.0764283011729096, NA, NA, NA, NA, 0.10897808803568)), row.names = c(NA,
-9L), groups = structure(list(lot = c("ABL GMP1", "CTM1", "CTM2",
"Fuji 30k", "Fuji 7.5k", "MVB1", "MVB2", "MVS", "Tox Lot"), .rows = structure(list(
2L, 3L, 4L, 6L, 7L, 8L, 1L, 9L, 5L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -9L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))

Pass the options to webshot() by using vwidth and vheight. Refer the documentation,
https://www.rdocumentation.org/packages/webshot/versions/0.5.2/topics/webshot
gtsave(hotspot_3pct_table, "hotspot_3pct.png", vwidth = 1500, vheight = 1000)

Related

How to change the order of values in a plot [duplicate]

This question already has an answer here:
How to force specific order of the variables on the X axis?
(1 answer)
Closed 6 months ago.
This might be simple or it might not be, I need to change the order of the displayed data from "DBG DBN POST PRE" to "PRE DBG DBN POST" both in the plot and the legend
Here is my code:
Actigraph %>%
pivot_longer(cols = Standing:Sitting) %>%
ggplot(aes(x = name, y = value, fill = Condition)) +
geom_boxplot()
and here is the output
EDIT**
dput(head(Actigraph))
structure(list(Participant.Code = c("AE1_25", "AE1_25", "AE1_25",
"AE1_25", "AE1_25", "AE4_23"), Condition = c("DBG", "DBG", "DBG",
"DBG", "DBG", "DBG"), Day.within.condition = c(1L, 2L, 3L, 4L,
5L, 1L), Standing = c(34L, 631L, 10517L, 8467L, 33L, 892L), Stepping = c(76L,
598L, 788L, 598L, 59L, 234L), Cycling = c(220L, 56L, 4266L, 91L,
2920L, 144L), Sitting = c(9392L, 23589L, 12144L, 17205L, 17396L,
2550L), X = c(NA, NA, NA, NA, NA, NA), X.1 = c(NA, NA, NA, NA,
NA, NA), X.2 = c(NA, NA, NA, NA, NA, NA), X.3 = c(NA, NA, NA,
NA, NA, NA), X.4 = c(NA, NA, NA, NA, NA, NA), X.5 = c(NA, NA,
NA, NA, NA, NA), X.6 = c(NA, NA, NA, NA, NA, NA), X.7 = c(NA,
NA, NA, NA, NA, NA), X.8 = c(NA, NA, NA, NA, NA, NA)), row.names = c(NA,
6L), class = "data.frame")
You could use forcats::lvls_reorder(), like this:
Actigraph %>%
pivot_longer(cols = Standing:Sitting) %>%
ggplot(aes(x = name, y = value, fill = forcats::lvls_reorder(Condition, c(4,1:3)))) +
geom_boxplot() + labs(fill="Condition")

How to separate data into morning and afternoon

I have a column "Start time" that has both date and time in it, I would like to separate (and save into different .csv files) all the data that is before noon and after noon.
> dput(head(DQ))
structure(list(ID = 1:6, Date = c("2022-02-15", "2022-02-25",
"2022-02-25", "2022-02-28", "2022-03-01", "2022-03-01"), Start.time = c("2-15-22 11:15:43",
"2-25-22 8:52:33", "2-25-22 8:55:08", "2-28-22 23:19:36", "3-1-22 8:58:31",
"3-1-22 21:04:49"), Completion.time = c("2-15-22 11:16:59", "2-25-22 8:55:02",
"2-25-22 16:16:37", "2-28-22 23:21:52", "3-1-22 9:00:02", "3-1-22 21:06:31"
), Email = c("anonymous", "anonymous", "anonymous", "anonymous",
"anonymous", "anonymous"), Name = c(NA, NA, NA, NA, NA, NA),
Total.points = c(NA, NA, NA, NA, NA, NA), Quiz.feedback = c(NA,
NA, NA, NA, NA, NA), Participant.Code = c("AE1_04", "AE1_02",
"AE1_02", "AE1_02", "AE1_02", "AE1_02"), Points...Participant.Code...Code.du.participant...Code.de.la.participante = c(NA,
NA, NA, NA, NA, NA), Feedback...Participant.Code...Code.du.participant...Code.de.la.participante = c(NA,
NA, NA, NA, NA, NA), Stiffness...Raideur = c(10L, 3L, 2L,
6L, 7L, 6L), Points...Stiffness...Raideur = c(NA, NA, NA,
NA, NA, NA), Feedback...Stiffness...Raideur = c(NA, NA, NA,
NA, NA, NA), Fatigue...Fatigue = c(10L, 6L, 4L, 6L, 7L, 5L
), Points...Fatigue...Fatigue = c(NA, NA, NA, NA, NA, NA),
Feedback...Fatigue...Fatigue = c(NA, NA, NA, NA, NA, NA),
Discomfort...Inconfort = c(7L, 7L, 5L, 6L, 7L, 7L), Points...Discomfort...Inconfort = c(NA,
NA, NA, NA, NA, NA), Feedback...Discomfort...Inconfort = c(NA,
NA, NA, NA, NA, NA), Happiness...Joie = c(1L, 8L, 7L, 8L,
7L, 8L), Points...Happiness...Joie = c(NA, NA, NA, NA, NA,
NA), Feedback...Happiness...Joie = c(NA, NA, NA, NA, NA,
NA), Productivity...Productivité = c(10L, 6L, 7L, 8L, 5L,
8L), Points...Productivity...Productivité = c(NA, NA, NA,
NA, NA, NA), Feedback...Productivity...Productivité = c(NA,
NA, NA, NA, NA, NA), Ability.to.concentrate...Capacité.de.se.concentrer = c(7L,
8L, 6L, 8L, 6L, 8L), Points...Ability.to.concentrate...Capacité.de.se.concentrer = c(NA,
NA, NA, NA, NA, NA), Feedback...Ability.to.concentrate...Capacité.de.se.concentrer = c(NA,
NA, NA, NA, NA, NA), Alertness...Vigilance = c(7L, 5L, 4L,
8L, 6L, 7L), Points...Alertness...Vigilance = c(NA, NA, NA,
NA, NA, NA), Feedback...Alertness...Vigilance = c(NA, NA,
NA, NA, NA, NA), Stress...Stress = c(10L, 9L, 7L, 8L, 7L,
7L), Points...Stress...Stress = c(NA, NA, NA, NA, NA, NA),
Feedback...Stress...Stress = c(NA, NA, NA, NA, NA, NA), Back.Pain...Mal.de.dos = c(9L,
6L, 6L, 7L, 7L, 7L), Points...Back.Pain...Mal.de.dos = c(NA,
NA, NA, NA, NA, NA), Feedback...Back.Pain...Mal.de.dos = c(NA,
NA, NA, NA, NA, NA), Neck.Pain...Douleur.au.cou = c(9L, 4L,
3L, 6L, 4L, 5L), Points...Neck.Pain...Douleur.au.cou = c(NA,
NA, NA, NA, NA, NA), Feedback...Neck.Pain...Douleur.au.cou = c(NA,
NA, NA, NA, NA, NA), Head.Pain...Mal.de.tête = c(7L, 1L,
1L, 2L, 1L, 1L), Points...Head.Pain...Mal.de.tête = c(NA,
NA, NA, NA, NA, NA), Feedback...Head.Pain...Mal.de.tête = c(NA,
NA, NA, NA, NA, NA), Eye.Pain...Douleur.oculaire = c(10L,
8L, 1L, 1L, 1L, 1L), Points...Eye.Pain...Douleur.oculaire = c(NA,
NA, NA, NA, NA, NA), Feedback...Eye.Pain...Douleur.oculaire = c(NA,
NA, NA, NA, NA, NA), In.the.past.24.hours..have.you.done.any.light.physical.activity...Au.cours.des.dernières.24.heures..avez.vous.fait.une.activité.physique.légère. = c("No / Non",
"No / Non", "Yes / Oui", "No / Non", "Yes / Oui", "Yes / Oui"
), Points...In.the.past.24.hours..have.you.done.any.light.physical.activity...Au.cours.des.dernières.24.heures..avez.vous.fait.une.activité.physique.légère. = c(NA,
NA, NA, NA, NA, NA), Feedback...In.the.past.24.hours..have.you.done.any.light.physical.activity...Au.cours.des.dernières.24.heures..avez.vous.fait.une.activité.physique.légère. = c(NA,
NA, NA, NA, NA, NA), If.yes..what.did.you.do.and.for.how.long....Si.oui..qu.avez.vous.fait.et.pendant.combien.de.temps. = c("",
"", "brisk 10 min walk", "", "stretching", "Stretching"),
Points...If.yes..what.did.you.do.and.for.how.long....Si.oui..qu.avez.vous.fait.et.pendant.combien.de.temps. = c(NA,
NA, NA, NA, NA, NA), Feedback...If.yes..what.did.you.do.and.for.how.long....Si.oui..qu.avez.vous.fait.et.pendant.combien.de.temps. = c(NA,
NA, NA, NA, NA, NA), In.the.past.24.hours..have.you.done.any.moderate.physical.activity...Au.cours.des.dernières.24.heures..avez.vous.fait.une.activité.physique.modérée. = c("No / Non",
"Yes / Oui", "No / Non", "Yes / Oui", "No / Non", "No / Non"
), Points...In.the.past.24.hours..have.you.done.any.moderate.physical.activity...Au.cours.des.dernières.24.heures..avez.vous.fait.une.activité.physique.modérée. = c(NA,
NA, NA, NA, NA, NA), Feedback...In.the.past.24.hours..have.you.done.any.moderate.physical.activity...Au.cours.des.dernières.24.heures..avez.vous.fait.une.activité.physique.modérée. = c(NA,
NA, NA, NA, NA, NA), If.yes..what.did.you.do.and.for.how.long....Si.oui..qu.avez.vous.fait.et.pendant.combien.de.temps.2 = c("",
"30 min walk at lunch", "", "Magasiner et un petit strength training et cardio",
"", ""), Points...If.yes..what.did.you.do.and.for.how.long....Si.oui..qu.avez.vous.fait.et.pendant.combien.de.temps.2 = c(NA,
NA, NA, NA, NA, NA), Feedback...If.yes..what.did.you.do.and.for.how.long....Si.oui..qu.avez.vous.fait.et.pendant.combien.de.temps.2 = c(NA,
NA, NA, NA, NA, NA), In.the.past.24.hours..have.you.done.any.vigorous.physical.activity.....Au.cours.des.dernières.24.heures..avez.vous.pratiqué.une.activité.physique.intense. = c("No / Non",
"No / Non", "No / Non", "No / Non", "No / Non", "No / Non"
), Points...In.the.past.24.hours..have.you.done.any.vigorous.physical.activity.....Au.cours.des.dernières.24.heures..avez.vous.pratiqué.une.activité.physique.intense. = c(NA,
NA, NA, NA, NA, NA), Feedback...In.the.past.24.hours..have.you.done.any.vigorous.physical.activity.....Au.cours.des.dernières.24.heures..avez.vous.pratiqué.une.activité.physique.intense. = c(NA,
NA, NA, NA, NA, NA), If.so..what.did.you.do.and.for.how.long....Si.oui..qu.avez.vous.fait.et.pendant.combien.de.temps. = c("",
"", "", "", "", ""), Points...If.so..what.did.you.do.and.for.how.long....Si.oui..qu.avez.vous.fait.et.pendant.combien.de.temps. = c(NA,
NA, NA, NA, NA, NA), Feedback...If.so..what.did.you.do.and.for.how.long....Si.oui..qu.avez.vous.fait.et.pendant.combien.de.temps. = c(NA,
NA, NA, NA, NA, NA), Please.add.any.additional.comments.you.have...Veuillez.ajouter.tout.commentaire.supplémentaire.que.vous.avez. = c(NA,
NA, NA, NA, NA, NA), Points...Please.add.any.additional.comments.you.have...Veuillez.ajouter.tout.commentaire.supplémentaire.que.vous.avez. = c(NA,
NA, NA, NA, NA, NA), Feedback...Please.add.any.additional.comments.you.have...Veuillez.ajouter.tout.commentaire.supplémentaire.que.vous.avez. = c(NA,
NA, NA, NA, NA, NA)), row.names = c(NA, 6L), class = "data.frame")
I'm open to different ways of doing this, either sorting/filtering all of the morning and afternoons OR adding a column that states if it is morning and afternoon OR having it sorted and filtered in R and then having it saved to separate .csv files
please let me know if you need more details
First split:
library(dplyr)
DQ <-
DQ |>
mutate(morning = format(as.POSIXct(Start.time, format = "%m-%d-%y %H:%M:%S"), "%H") < 12) |>
group_by(morning) |>
group_split()
Output:
[[1]]
# A tibble: 2 × 6
ID Date Start.time Completion.time Email morning
<int> <chr> <chr> <chr> <chr> <lgl>
1 4 2022-02-28 2-28-22 23:19:36 2-28-22 23:21:52 anonymous FALSE
2 6 2022-03-01 3-1-22 21:04:49 3-1-22 21:06:31 anonymous FALSE
[[2]]
# A tibble: 4 × 6
ID Date Start.time Completion.time Email morning
<int> <chr> <chr> <chr> <chr> <lgl>
1 1 2022-02-15 2-15-22 11:15:43 2-15-22 11:16:59 anonymous TRUE
2 2 2022-02-25 2-25-22 8:52:33 2-25-22 8:55:02 anonymous TRUE
3 3 2022-02-25 2-25-22 8:55:08 2-25-22 16:16:37 anonymous TRUE
4 5 2022-03-01 3-1-22 8:58:31 3-1-22 9:00:02 anonymous TRUE
Then save:
library(readr)
write_csv(DQ[[1]], "afternoon.csv")
write_csv(DQ[[2]], "morning.csv")
You can use hms from the lubridate package for this and filter it or create a new column based on your need.
library(tidyverse)
library(lubridate)
df <- structure(list(ID = 1:6,
Date = c("2022-02-15", "2022-02-25",
"2022-02-25", "2022-02-28", "2022-03-01", "2022-03-01"),
Start.time = c("2-15-22 11:15:43", "2-25-22 8:52:33", "2-25-22 8:55:08",
"2-28-22 23:19:36", "3-1-22 8:58:31","3-1-22 21:04:49")),
row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"))
df %>%
mutate(time = Start.time %>% str_extract(" .*") %>% str_trim() %>% hms()) %>%
mutate(day = ifelse(time >= hms("00:00:00") & time < hms("12:00:00"), "fn", "an"))
# A tibble: 6 × 5
ID Date Start.time time day
<int> <chr> <chr> <Period> <chr>
1 1 2022-02-15 2-15-22 11:15:43 11H 15M 43S fn
2 2 2022-02-25 2-25-22 8:52:33 8H 52M 33S fn
3 3 2022-02-25 2-25-22 8:55:08 8H 55M 8S fn
4 4 2022-02-28 2-28-22 23:19:36 23H 19M 36S an
5 5 2022-03-01 3-1-22 8:58:31 8H 58M 31S fn
6 6 2022-03-01 3-1-22 21:04:49 21H 4M 49S an
This should do the job:
dt2 <- dt %>%
mutate(start_hour = hour(as.POSIXct(Start.time, format = "%m-%d-%y %H:%M:%S")),
completion_hour = hour(as.POSIXct(Completion.time, format = "%m-%d-%y %H:%M:%S"))) %>%
mutate(part_of_day = ifelse(start_hour < 12, "morning", "afternoon"))
Note that this classifies a start time of exactly noon as afternoon and exactly midnight as morning.

Is there a method to check total counts of a species matches the respective number of length measurements taken within a survey site?

I have a table that has for each survey site and survey date, a total of the number organisms counted, and measurements for each organism found. I would like to make sure that the data is correct by making sure the total organism counted match the total number of measurements taken.
I initially tried to gather the table, changed the values to 1 or 0 if a measurement was taken, and then group_by and summarise. This method didnt work, and I am sure there is a nicer method so any help would be appreciated.
Ideally I would like a table that has site, survey data, total counts and a count column derived from summing the number of measurements taken. The idea would be that the two count columns should have the same values, and hence not be missing data.
Sample data -
structure(list(Date.of.Survey = c("12/04/2022", "16/04/2022",
"12/04/2022", "13/04/2022", "14/04/2022", "15/04/2022"), Location = c("Wandle - Merton Abbey Mills",
"Wandle - Merton Abbey Mills", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir"), Was.the.trap.working.when.you.checked.it. = c("Yes",
"Yes", "Yes", "Yes", "Yes", "Yes"), Number.of.eels = c(0L, 1L,
0L, 0L, 0L, 20L), X1..Length..mm. = c("", "180", "", "", "",
"72"), X2..Length..mm. = c("", "", "", "", "", "69"), X3..Length..mm. = c("",
"", "", "", "", "76"), X4..Length..mm. = c("", "", "", "", "",
"72"), X5..Length..mm. = c("", "", "", "", "", "72"), X6..Length..mm. = c("",
"", "", "", "", "73"), X7..Length..mm. = c(NA, NA, NA, NA, NA,
77L), X8..Length..mm. = c(NA, NA, NA, NA, NA, 78L), X9..Length..mm. = c(NA,
NA, NA, NA, NA, 75L), X10..Length..mm. = c(NA, NA, NA, NA, NA,
72L), X11..Length..mm. = c(NA, NA, NA, NA, NA, 75L), X12..Length..mm. = c(NA,
NA, NA, NA, NA, 78L), X13..Length..mm. = c(NA, NA, NA, NA, NA,
74L), X14..Length..mm. = c(NA, NA, NA, NA, NA, 70L), X15..Length..mm. = c(NA,
NA, NA, NA, NA, 75L), X16..Length..mm. = c(NA, NA, NA, NA, NA,
75L), X17..Length..mm. = c(NA, NA, NA, NA, NA, 73L), X18..Length..mm. = c(NA,
NA, NA, NA, NA, 72L), X19..Length..mm. = c(NA, NA, NA, NA, NA,
75L), X20..Length..mm. = c(NA, NA, NA, NA, NA, 71L), X21..Length..mm. = c(NA,
NA, NA, NA, NA, NA), X22..Length..mm. = c(NA, NA, NA, NA, NA,
NA), X23..Length..mm. = c(NA, NA, NA, NA, NA, NA), X24..Length..mm. = c(NA,
NA, NA, NA, NA, NA), X25..Length..mm. = c(NA, NA, NA, NA, NA,
NA), X26..Length..mm. = c(NA, NA, NA, NA, NA, NA), X27..Length..mm. = c(NA,
NA, NA, NA, NA, NA), X28..Length..mm. = c(NA, NA, NA, NA, NA,
NA), X29..Length..mm. = c(NA, NA, NA, NA, NA, NA), X30..Length..mm. = c(NA,
NA, NA, NA, NA, NA), X31..Length..mm. = c(NA, NA, NA, NA, NA,
NA), X32..Length..mm. = c(NA, NA, NA, NA, NA, NA), X33..Length..mm. = c(NA,
NA, NA, NA, NA, NA), X34..Length..mm. = c(NA, NA, NA, NA, NA,
NA), X35..Length..mm. = c(NA, NA, NA, NA, NA, NA), X36..Length..mm. = c(NA,
NA, NA, NA, NA, NA), X37..Length..mm. = c(NA, NA, NA, NA, NA,
NA), X38..Length..mm. = c(NA, NA, NA, NA, NA, NA), X39..Length..mm. = c(NA,
NA, NA, NA, NA, NA), X40..Length..mm. = c(NA, NA, NA, NA, NA,
NA), X41..Length..mm. = c(NA, NA, NA, NA, NA, NA), X42..Length..mm. = c(NA,
NA, NA, NA, NA, NA), X43..Length..mm. = c(NA, NA, NA, NA, NA,
NA), X44..Length..mm. = c(NA, NA, NA, NA, NA, NA), X45..Length..mm. = c(NA,
NA, NA, NA, NA, NA), X46..Length..mm. = c(NA, NA, NA, NA, NA,
NA), X47..Length..mm. = c(NA, NA, NA, NA, NA, NA), X48..Length..mm. = c(NA,
NA, NA, NA, NA, NA), X49..Length..mm. = c(NA, NA, NA, NA, NA,
NA), X50..Length..mm. = c(NA, NA, NA, NA, NA, NA)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))```
Thanks in advance
You want to first make sure that your blanks are NAs. Then you can use rowSums to count the number of non-NA columns, and finally use case_when to create a variable to identify whether the count matches the number of measurements. I also recommend using janitor's clean_names function to make it a little easier to work with your variable names.
library(dplyr)
library(janitor)
df <- df %>%
mutate_all(na_if,"") %>%
mutate(count = rowSums(!is.na(select(., 5:50)))) %>%
mutate(count_match = case_when(number_of_eels == count ~1,
TRUE ~0))

Apply for loop or sapply on multiple columns in R

I have data like wherein A* represents the place with time format (01:01) as column names and the row names like 1C or 9D represent the individual ID.
structure(list(V1 = c("1C", "9D", "9F", "9H", "9S", "9T", "9Y"
), `A*01:01` = c(NA, NA, "1", NA, NA, NA, NA), `A*02:01` = c(NA,
NA, "1", NA, NA, NA, NA), `A*02:02` = c(NA, NA, "1", NA, NA,
NA, NA), `A*02:03` = c(NA, NA, "1", NA, NA, NA, NA), `A*02:05` = c(NA,
NA, NA, NA, NA, NA, "1"), `A*02:06` = c(NA, NA, NA, NA, NA, NA,
"1"), `A*03:01` = c(NA, NA, "1", NA, NA, NA, NA), `A*11:01` = c(NA,
NA, NA, NA, NA, NA, "1"), `A*11:02` = c(NA, NA, NA, NA, NA, NA,
"1"), `A*23:01` = c(NA, NA, NA, NA, "1", NA, NA), `A*23:02` = c(NA,
NA, NA, NA, "1", NA, NA), `A*24:02` = c(NA, NA, NA, NA, "1",
NA, NA), `A*24:03` = c(NA, NA, NA, NA, "1", NA, NA), `A*25:01` = c(NA,
NA, NA, NA, NA, NA, "1"), `A*26:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `A*29:01` = c(NA, NA, NA, NA, NA, "1", NA), `A*29:02` = c(NA,
NA, NA, NA, NA, "1", NA), `A*30:01` = c(NA, NA, NA, NA, "1",
NA, NA), `A*30:02` = c(NA, NA, NA, NA, "1", NA, NA), `A*31:01` = c(NA,
NA, NA, NA, NA, "1", NA), `A*32:01` = c(NA, NA, "1", NA, NA,
NA, NA), `A*33:01` = c(NA, NA, NA, NA, NA, "1", NA), `A*33:03` = c(NA,
NA, NA, NA, NA, "1", NA), `A*34:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `A*34:02` = c(NA, NA, NA, NA, NA, NA, "1"), `A*36:01` = c(NA,
NA, "1", NA, NA, NA, NA), `A*43:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `A*66:01` = c(NA, NA, NA, NA, NA, NA, "1"), `A*66:02` = c(NA,
NA, NA, NA, NA, NA, "1"), `A*68:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `A*68:02` = c(NA, NA, NA, NA, NA, NA, "1"), `A*69:01` = c(NA,
NA, NA, NA, NA, NA, "1"), `A*74:01` = c(NA, NA, "1", NA, NA,
NA, NA), `A*80:01` = c(NA, NA, "1", NA, NA, NA, NA), `B*07:02` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*07:03` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*08:01` = c(NA, "1", NA, NA, NA, NA, NA), `B*13:01` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*13:02` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*14:01` = c(NA, NA, NA, NA, NA, NA, "1"), `B*14:02` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*14:05` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*14:06` = c(NA, NA, NA, NA, NA, NA, "1"), `B*15:01` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*15:02` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*15:03` = c(NA, NA, NA, NA, NA, NA, "1"), `B*15:10` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*15:11` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*15:12` = c(NA, NA, NA, NA, NA, NA, "1"), `B*15:13` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*15:16` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*15:18` = c(NA, NA, NA, NA, NA, NA, "1"), `B*18:01` = c(NA,
NA, NA, "1", NA, NA, NA), `B*27:03` = c(NA, NA, NA, "1", NA,
NA, NA), `B*27:05` = c(NA, NA, NA, "1", NA, NA, NA), `B*27:08` = c(NA,
NA, NA, "1", NA, NA, NA), `B*35:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*35:08` = c(NA, NA, NA, NA, NA, NA, "1"), `B*37:01` = c(NA,
NA, NA, "1", NA, NA, NA), `B*38:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*39:01` = c(NA, NA, NA, NA, NA, NA, "1"), `B*39:05` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*40:01` = c(NA, NA, NA, "1", NA,
NA, NA), `B*40:02` = c(NA, NA, NA, "1", NA, NA, NA), `B*40:05` = c(NA,
NA, NA, "1", NA, NA, NA), `B*40:06` = c(NA, NA, NA, "1", NA,
NA, NA), `B*41:01` = c(NA, NA, NA, "1", NA, NA, NA), `B*41:02` = c(NA,
NA, NA, "1", NA, NA, NA), `B*42:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*44:02` = c(NA, NA, NA, NA, NA, NA, "1"), `B*44:03` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*45:01` = c(NA, NA, NA, "1", NA,
NA, NA), `B*46:01` = c(NA, NA, NA, NA, NA, NA, "1"), `B*47:01` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*48:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*49:01` = c(NA, NA, NA, "1", NA, NA, NA), `B*50:01` = c(NA,
NA, NA, "1", NA, NA, NA), `B*51:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*51:02` = c(NA, NA, NA, NA, NA, NA, "1"), `B*52:01` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*53:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*54:01` = c(NA, NA, NA, NA, NA, NA, "1"), `B*55:01` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*56:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*57:01` = c(NA, NA, NA, NA, NA, NA, "1"), `B*57:03` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*58:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*59:01` = c(NA, NA, NA, NA, NA, NA, "1"), `B*67:01` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*73:01` = c(NA, NA, NA, "1", NA,
NA, NA), `B*78:01` = c(NA, NA, NA, NA, NA, NA, "1"), `B*81:01` = c(NA,
NA, NA, NA, NA, NA, "1"), `B*82:01` = c(NA, NA, NA, NA, NA, NA,
"1"), `B*82:02` = c(NA, NA, NA, NA, NA, NA, "1"), `C*01:02` = c("1",
NA, "1", NA, NA, NA, NA), `C*02:02` = c("1", NA, NA, NA, NA,
NA, "1"), `C*02:10` = c("1", NA, NA, NA, NA, NA, "1"), `C*03:02` = c(NA,
NA, NA, NA, NA, NA, "1"), `C*03:03` = c(NA, NA, NA, NA, NA, NA,
"1"), `C*03:04` = c(NA, NA, NA, NA, NA, NA, "1"), `C*04:01` = c(NA,
NA, NA, NA, "1", NA, NA), `C*04:03` = c(NA, NA, NA, NA, NA, NA,
"1"), `C*05:01` = c("1", NA, NA, NA, NA, NA, "1"), `C*06:02` = c("1",
"1", NA, NA, NA, NA, NA), `C*07:01` = c("1", "1", NA, NA, NA,
NA, NA), `C*07:02` = c("1", "1", NA, NA, NA, NA, NA), `C*07:04` = c("1",
"1", NA, NA, NA, NA, NA), `C*08:01` = c("1", NA, NA, NA, NA,
NA, "1"), `C*08:02` = c("1", NA, NA, NA, NA, NA, "1"), `C*12:02` = c("1",
NA, NA, NA, NA, NA, "1"), `C*12:03` = c("1", NA, NA, NA, NA,
NA, "1"), `C*14:02` = c("1", NA, NA, NA, "1", NA, NA), `C*15:02` = c("1",
NA, NA, NA, NA, NA, "1"), `C*16:01` = c("1", NA, NA, NA, NA,
NA, "1"), `C*17:01` = c(NA, NA, NA, NA, NA, NA, "1"), `C*18:01` = c("1",
"1", NA, NA, NA, NA, NA), `C*18:02` = c("1", "1", NA, NA, NA,
NA, NA)), row.names = c("1C", "9D", "9F", "9H", "9S", "9T", "9Y"
), class = "data.frame")
I am using the following code to process the data, which is working fine for column 2. But I have 16000 columns and 400 rows in my real data. I want to use "for loop" or "sapply" in R to perform processing. A quick solution is much appreciated.
LA <- dat[!is.na(dat[,2]),]
LA<-LA[,1]
res <- gsub("[[:digit:]]","",LA)
pos <- gsub("[[:alpha:]]","",LA)
LA_sep <- data.frame(res, pos)
LA_sep$res <- paste0(LA_sep$res, "&")
LA_sep$pos <- paste0(LA_sep$pos, "&")
LA_sep <- as.data.frame(t(LA_sep))
LA_sep <- apply(LA_sep[1:ncol(LA_sep)], 1, function(x) paste(na.omit(x), collapse = "")) ## nrow(ma) ## ncol(ma) ## NCOL(1:12) ## NROW(1:12)
write.csv(LA_sep, "2.csv")
When I run the above code on big data, I got the following output for 2nd column (A01:01). First I want to extract the column V1 values where column 2, 3, 4 .. and so on having values 1 for each rows. Then split the digits and characters and saved as "res" and "pos" seprated by "&" for each row having value 1 for each columns (A01:01, A*02:01, A02:02, ......)
res F&K&M&Q&E&R&A&R&N&A&N&N&H&N&M&H&S&A&N&T&N&N&G&T&G&T&T&TL&D&I&I&Y&F&R&D&M&I&K&K&R&A&H&H&A&H&A&R&R&R&G&D&G&P&I&E&L
pos 9&44&45&62&63&65&69&65&66&69&66&66&70&66&67&70&71&76&77&80&77&77&79&80&79&80&80&80&81&90&95&97&99&109&114&116&138&142&144&144&145&149&151&151&152&151&152&156&163&163&167&166&167&193&194&275&276
So, it is very difficult to do this for each column manually or without a loop for big data. Every output should be saved as column names as a separate file.

Deleting a line if multiple columns are NA - R solution

I want to delete rows only when selected columns are NA.
Data here:
dput(df)
structure(list(record_id = c("BIV-1601-1250-E1", "BIV-1601-1250-E1",
"BIV-1601-1250-E1", "BIV-1601-1250-E1", "BIV-1601-1250-E1", "BIV-1601-1719-E1",
"BIV-1601-1719-E1", "BIV-1601-1719-E1", "BIV-1601-1719-E1", "BIV-1601-1719-E1",
"BIV-1402-1368-E1", "BIV-1402-1368-E1", "BIV-1402-1368-E1", "BIV-1402-1368-E1",
"BIV-1402-1368-E1", "BIV-1101-1038-E1", "BIV-1101-1038-E1", "BIV-1101-1038-E1",
"BIV-1101-1038-E1", "BIV-1101-1038-E1", "BIV-1701-1145-E1", "BIV-1701-1145-E1",
"BIV-1701-1145-E1", "BIV-1701-1145-E1", "BIV-1701-1145-E1", "BIV-1102-2040-E1",
"BIV-1102-2040-E1", "BIV-1102-2040-E1", "BIV-1102-2040-E1", "BIV-1102-2040-E1"
), DATE = structure(c(NA, 17478, 17480, 17479, NA, 18295, NA,
18296, 18296, NA, NA, 17912, 17914, 17934, NA, 17221, 17221,
17223, 17224, NA, NA, 17820, 17822, 17823, NA, NA, 18359, 18361,
18361, NA), class = "Date"), haemoglobin = structure(c(NA, 101,
NA, NA, NA, 100, NA, NA, NA, NA, NA, 97.6, NA, NA, NA, NA, 109,
NA, NA, NA, NA, 120, NA, NA, NA, NA, 205, NA, NA, NA), label = "g/L", class = c("labelled",
"numeric")), WBC = structure(c(NA, NA, "5", NA, NA, NA, "27.6",
NA, NA, NA, NA, NA, "8.8", NA, NA, NA, NA, "10.3", NA, NA, NA,
NA, "23.5", NA, NA, NA, NA, "11.81", NA, NA), label = "10^9/L", class = c("labelled",
"character")), CRP = c(NA, NA, "9", NA, NA, NA, "499", NA, NA,
NA, NA, NA, "7", NA, NA, NA, "43", "54.4", NA, NA, NA, NA, "37",
NA, NA, NA, NA, "<4.0", NA, NA), admission_day = c(NA, 0L, 2L,
1L, NA, 1L, NA, 2L, 2L, NA, NA, 1L, 3L, 23L, NA, 0L, 0L, 2L,
3L, NA, NA, 0L, 2L, 3L, NA, NA, 0L, 2L, 2L, NA)), row.names = c(NA,
-30L), groups = structure(list(record_id = c("BIV-1101-1038-E1",
"BIV-1102-2040-E1", "BIV-1402-1368-E1", "BIV-1601-1250-E1", "BIV-1601-1719-E1",
"BIV-1701-1145-E1"), .rows = structure(list(16:20, 26:30, 11:15,
1:5, 6:10, 21:25), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
I only want to drop the lines when the following columns DATE, haemoglobin, CRP, WBC, and admission_day all equal NA. My thoughts were something like this:
library(dplyr)
cols_to_drop <- c("DATE", "haemoglobin", "CRP", "WBC", "admission_day")
df <- df %>% mutate(case_when(is.na(cols_to_drop) ~ drop_na(DATE)))
Obviously (as usual for me) this doesn't work... II think it's something to do with needing to make case_when equal to a particular variable... but I want it to apply across the whole dataframe.
If someone can help, I'd be grateful!
You can use if_all/if_any -
library(dplyr)
cols_to_drop <- c("DATE", "haemoglobin", "CRP", "WBC", "admission_day")
df %>% filter(!if_all(cols_to_drop, is.na))
With if_any -
df %>% filter(if_any(cols_to_drop, Negate(is.na)))

Resources