hello all i have this datasset :
> dput(test1)
structure(list(startdate = c("2019-11-06", "2019-11-06", "2019-11-06",
"2019-11-06", "2019-11-06", "2019-11-06", "2019-11-06", "2019-11-06",
"2019-11-06", "2019-11-06", "2019-11-06", "2019-11-06", "2019-11-06",
"2019-11-06", "2019-11-06", "2019-11-06", "2019-11-06", "2019-11-06",
"2019-11-06", "2019-11-06", "2019-11-06", "2019-11-27", "2019-11-27",
"2019-11-27", "2019-11-27", "2019-11-27", "2019-11-27", "2019-11-27",
"2019-11-27", "2019-11-27", "2019-11-27", "2019-11-27", "2019-11-27",
"2019-11-27", "2019-11-27", "2019-11-27", "2019-11-27", "2019-11-27",
"2019-11-27", "2019-11-27", "2019-11-01", "2019-11-05", "2019-11-15",
"2019-11-16", "2019-11-17", "2019-11-18", "2019-11-19", "2019-11-20",
"2019-11-21", NA), id = c("POL55", "POL56", "POL57", "POL58",
"POL59", "POL60", "POL61", "POL62", "POL63", "POL64", "POL65",
"POL66", "POL67", "POL68", "POL69", "POL56", "POL57", "POL58",
"POL59", "POL60", "POL61", "POL55", "POL56", "POL57", "POL58",
"POL59", "POL60", "POL61", "POL55", "POL56", "POL57", "POL58",
"POL59", "POL60", "POL61", "POL55", "POL56", "POL57", "POL58",
"POL59", "POL60", "POL61", "POL62", "POL63", "POL64", "POL65",
"POL66", "POL67", "POL68", NA), m0_9 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98,
33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), m10_19 = c(NA,
NA, NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65,
3, 98, 33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), m20_29 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 32, 34, NA,
NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA,
NA, NA, NA, NA, NA), m30_39 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3,
98, 33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), m40_49 = c(32, 34, NA, NA,
NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), m50_59 = c(NA,
NA, NA, NA, NA, NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA,
7, 9, 1, 65, 3, 98, 33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), m60_69 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9,
1, 65, 3, 98, 33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), m70 = c(NA, NA, NA, NA, NA, NA, 32,
34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), f0_9 = c(32, 34, NA,
NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), f10_19 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 32, 34, NA, NA, NA, NA, 55,
3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), f20_29 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3,
98, 33, NA, NA, NA), f30_39 = c(NA, NA, NA, 32, 34, NA, NA, NA,
NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), f40_49 = c(NA, NA, NA, NA,
NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3,
98, 33, NA, NA, NA, NA, NA, NA, NA, NA, 32, 34, NA, NA, NA, NA,
55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA), f50_59 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 32, 34, NA, NA, NA, NA,
55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), f60_69 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 32, 34, NA, NA, NA, NA,
55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), f70 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3,
98, 33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"))
I would like to create a list called ageCat. This list should contain a number of lists. The number of lists is the amount of age categories. Then for each age category i would like to extract the following info startAge, endAge, maleCount,femaleCount, totalCount.
Additionaly, i want only to sum up only individuals that have the same id and start date. For now i have written this:
create list of age
createLists <- function(startdate, id){
testFiltered = test1[policyid == id & start == startdate]
ageGroup <- vector("list", length == 8)
names(ageGroup) <- as.character(seq_along(ageGroup))
for(ageCat in seq_along(ageGroup)){
ageGroup[[ageCat]] <- getAgeInfo(testFiltered, ageCat)
}
getAgeInfo <- function(testFiltered, ageCat){
start =
end =
nomales =
nofemales =
}
ageGroup <- list(startAge = start,
endAge = end ,
maleCount = nomales ,
femaleCount = nofemales)
}
I have hard coded the length of the vecor ageGroup. How can i do this without hard coding it, aka. to look up how many columns with age categories I have for each gender?
Secondly, how can i extract the information startAge, endAge, maleCount,femaleCount, totalCount
Instead of working with lists I suggest to convert your data.frame to long format, getting rid of missing values and extracting sex and age. A `tidyverse´ approach might look like this:
library(dplyr)
library(tidyr)
library(tibble)
df <- tibble(
startdate = c(
"2019-11-06", "2019-11-06", "2019-11-06",
"2019-11-06", "2019-11-06", "2019-11-06", "2019-11-06", "2019-11-06",
"2019-11-06", "2019-11-06", "2019-11-06", "2019-11-06", "2019-11-06",
"2019-11-06", "2019-11-06", "2019-11-06", "2019-11-06", "2019-11-06",
"2019-11-06", "2019-11-06", "2019-11-06", "2019-11-27", "2019-11-27",
"2019-11-27", "2019-11-27", "2019-11-27", "2019-11-27", "2019-11-27",
"2019-11-27", "2019-11-27", "2019-11-27", "2019-11-27", "2019-11-27",
"2019-11-27", "2019-11-27", "2019-11-27", "2019-11-27", "2019-11-27",
"2019-11-27", "2019-11-27", "2019-11-01", "2019-11-05", "2019-11-15",
"2019-11-16", "2019-11-17", "2019-11-18", "2019-11-19", "2019-11-20",
"2019-11-21", NA
),
id = c(
"POL55", "POL56", "POL57", "POL58",
"POL59", "POL60", "POL61", "POL62", "POL63", "POL64", "POL65",
"POL66", "POL67", "POL68", "POL69", "POL56", "POL57", "POL58",
"POL59", "POL60", "POL61", "POL55", "POL56", "POL57", "POL58",
"POL59", "POL60", "POL61", "POL55", "POL56", "POL57", "POL58",
"POL59", "POL60", "POL61", "POL55", "POL56", "POL57", "POL58",
"POL59", "POL60", "POL61", "POL62", "POL63", "POL64", "POL65",
"POL66", "POL67", "POL68", NA
),
m0_9 = c(
NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98,
33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
),
m10_19 = c(
NA,
NA, NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65,
3, 98, 33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
),
m20_29 = c(
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 32, 34, NA,
NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA,
NA, NA, NA, NA, NA
),
m30_39 = c(
NA, NA, NA, NA, NA, NA, NA, NA,
NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3,
98, 33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA
),
m40_49 = c(
32, 34, NA, NA,
NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
),
m50_59 = c(
NA,
NA, NA, NA, NA, NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA,
7, 9, 1, 65, 3, 98, 33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), m60_69 = c(
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9,
1, 65, 3, 98, 33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA
), m70 = c(
NA, NA, NA, NA, NA, NA, 32,
34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), f0_9 = c(
32, 34, NA,
NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), f10_19 = c(
NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 32, 34, NA, NA, NA, NA, 55,
3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), f20_29 = c(
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3,
98, 33, NA, NA, NA
), f30_39 = c(
NA, NA, NA, 32, 34, NA, NA, NA,
NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA
), f40_49 = c(
NA, NA, NA, NA,
NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3,
98, 33, NA, NA, NA, NA, NA, NA, NA, NA, 32, 34, NA, NA, NA, NA,
55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA
), f50_59 = c(
NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 32, 34, NA, NA, NA, NA,
55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), f60_69 = c(
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 32, 34, NA, NA, NA, NA,
55, 3, NA, NA, NA, 7, 9, 1, 65, 3, 98, 33, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA
), f70 = c(
NA, NA, NA, NA, NA, NA, NA, NA,
NA, 32, 34, NA, NA, NA, NA, 55, 3, NA, NA, NA, 7, 9, 1, 65, 3,
98, 33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA
)
)
# Convert to tidy data frame
df_age <- df %>%
gather(age_sex, count, -startdate, -id) %>%
filter(!is.na(count)) %>%
extract(age_sex, into = c("sex", "start_age", "end_age"), regex = "(m|f)(\\d+)_?(\\d+)?", remove = FALSE) %>%
mutate(ageg = paste0(start_age, "_", end_age))
df_age
#> # A tibble: 187 x 8
#> startdate id age_sex sex start_age end_age count ageg
#> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
#> 1 2019-11-27 POL55 m0_9 m 0 9 32 0_9
#> 2 2019-11-27 POL56 m0_9 m 0 9 34 0_9
#> 3 2019-11-27 POL61 m0_9 m 0 9 55 0_9
#> 4 2019-11-27 POL55 m0_9 m 0 9 3 0_9
#> 5 2019-11-27 POL59 m0_9 m 0 9 7 0_9
#> 6 2019-11-27 POL60 m0_9 m 0 9 9 0_9
#> 7 2019-11-27 POL61 m0_9 m 0 9 1 0_9
#> 8 2019-11-27 POL55 m0_9 m 0 9 65 0_9
#> 9 2019-11-27 POL56 m0_9 m 0 9 3 0_9
#> 10 2019-11-27 POL57 m0_9 m 0 9 98 0_9
#> # ... with 177 more rows
# df back to nested list by startdate and ageg
df_list <- df_age %>%
# Count by startdate, ageg, start_age, end_age, sex
count(startdate, ageg, start_age, end_age, sex, wt = count) %>%
# male and female counts back in columns
spread(sex, n, fill = 0) %>%
# split by startdate
split(.$startdate) %>%
# ... and split each startdate list by ageg
lapply(function(x) split(x, x$ageg))
Created on 2020-03-10 by the reprex package (v0.3.0)
Related
I would like to know how many NAs there are per ID. For example in the case of ID = 1 from Monday till Friday there are a total of 4 NA.
Is there any solution in R?
My df:
My output:
My sample data:
structure(list(id = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
start.end = c("Mo_start", "Mo_end", "Tue_start", "Tue_end",
"Wed_start", "Wed_end", "Thur_start", "Thur_end", "Fri_start",
"Fri_end", "Mo_start", "Mo_end", "Tue_start", "Tue_end",
"Wed_start", "Wed_end", "Thur_start", "Thur_end", "Fri_start",
"Fri_end", "Mo_start", "Mo_end", "Tue_start", "Tue_end",
"Wed_start", "Wed_end", "Thur_start", "Thur_end", "Fri_start",
"Fri_end", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), time = structure(c(NA, NA,
25200, 53100, 25200, 53100, 25200, 53100, NA, NA, NA, NA,
NA, NA, 32400, 56700, NA, NA, NA, NA, 35100, 53100, 23400,
53100, 23400, 31500, 23400, 31500, 23400, 53100, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), class = c("hms", "difftime"), units = "secs"),
X4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA)), class = c("spec_tbl_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -80L), spec = structure(list(
cols = list(id = structure(list(), class = c("collector_double",
"collector")), start.end = structure(list(), class = c("collector_character",
"collector")), time = structure(list(format = ""), class = c("collector_time",
"collector")), X4 = structure(list(), class = c("collector_logical",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
df %>%
group_by(id) %>%
summarise(Missing=sum(is.na(time)), .groups="drop")
Giving
# A tibble: 4 x 2
id Missing
<dbl> <int>
1 1 4
2 2 8
3 3 0
4 NA 50
Your sample data appears not to match your expected output.
library(data.table)
dataset <- data.table(dataset)
dataset[is.na(time),.N,by=id]
Update:
Using setNames for adequate column names:
setNames(aggregate(time ~ id, data=df, function(x) {sum(is.na(x))}, na.action = NULL), c("id", "NAs/day"))
Output:
id NAs/day
1 1 4
2 2 8
3 3 0
First answer:
We could use aggregate:
aggregate(time ~ id, data=df, function(x) {sum(is.na(x))}, na.action = NULL)
Output
id time
1 1 4
2 2 8
3 3 0
I use the igraph and sf packages.
I have an igraph object whose vertices have spatial coordinates geo_dist_graph.
The vertices names and coordinates look like this:
grid_grid <-
structure(list(coords.x1 = c(15.504078, 15.704078, 15.904078,
15.104078, 15.304078, 15.504078, 15.704078, 15.104078, 15.304078,
15.704078, 14.904078, 14.304078, 13.904078, 14.704078, 13.704078,
14.104078, 14.704078, 14.904078, 13.704078, 13.904078, 14.704078,
13.704078, 13.904078, 14.304078),
coords.x2 = c(43.835623, 43.835623,
43.835623, 44.035623, 44.035623, 44.035623, 44.035623, 44.235623,
44.235623, 44.235623, 44.435623, 44.635623, 44.835623, 44.835623,
45.035623, 45.035623, 45.035623, 45.035623, 45.235623, 45.235623,
45.235623, 45.435623, 45.435623, 45.435623),
g9.nodes = c(27,
28, 29, 40, 41, 42, 43, 55, 56, 58, 69, 81, 94, 98, 108, 110,
113, 114, 123, 124, 128, 138, 139, 141)),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24"
))
The graph is from a simple squared adjacency matrix:
geo_dist_graph <-
structure(c(NA, 1, 1, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, 1, NA, NA, NA,
1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 1, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA,
NA, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 1, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, 1, NA,
1, NA, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 1, 1, 1, NA, NA, 1, NA, NA, NA, 1, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA,
NA, 1, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 1, 1, 1, NA, 1, NA, 1, 1, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 1, NA,
1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, NA, NA, 1, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 1, 1, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, 1, 1,
NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 1, 1, NA, NA, NA, NA, 1, 1, NA, NA, 1, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, 1, NA,
NA, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 1, 1, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, 1, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, 1,
NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, 1, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA,
NA, 1, NA, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 1, NA, 1, 1, NA, NA, 1, NA, NA, 1, 1, 1, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, 1, 1, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, 1, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1,
1, NA, 1, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 1, NA, NA, NA, 1, NA, NA, 1, NA),
.Dim = c(24L,
24L))
colnames(geo_dist_graph) <- grid_grid$g9.nodes
row.names(geo_dist_graph) <- grid_grid$g9.nodes
geo_dist_graph <- graph_from_adjacency_matrix(geo_dist_graph, mode = "upper", diag = F)
The spatial coordinates where attched this way:
V(geo_dist_graph)$x <-
grid_grid$coords.x1[match(V(geo_dist_graph)$name, grid_grid$g9.nodes)]
V(geo_dist_graph)$y <-
grid_grid$coords.x2[match(V(geo_dist_graph)$name, grid_grid$g9.nodes)]
The graph is correclty plotted in space when using the plot function. But when I try to add a basemap like this plot(map_crop_sp, add = T), the map doesn't show behind the graph, but there is no error message.
The map is vector map, don't know if it's important. Here is the code used to create it.
map <- st_read("ne_10m_coastline/ne_10m_coastline.shp")
map_crop <- st_crop(map, xmin = 13.304078, ymin = 43.635623, xmax = 16.503846, ymax = 45.60185)
map_crop_sp <- as(map_crop, Class = "Spatial")
Answer
Since the igraph should be on top of the map, I plot it second. I also added rescale = F:
plot(map_crop_sp)
plot(geo_dist_graph, add = T, rescale = F)
Rationale
I typed ?plot.igraph. From there, I found ?igraph.plotting. It seems that plotting an igraph object rescales it (plot(..., rescale = TRUE):
Logical constant, whether to rescale the coordinates to the [-1,1]x-1,1 interval. This parameter is not implemented for tkplot.
Defaults to TRUE, the layout will be rescaled.
Hello I imported a dataset from SPSS in R, the dataset has labels and I want to use value labels as string values. Is there a way to do it?
head(dataset$A7B1)
<Labelled double>: A7b1. Cantón de San José en que reside
[1] NA NA NA 2 8 NA 4 NA 5
Labels:
value label
1 SAN JOSÉ
2 ESCAZÚ
3 DESAMPARADOS
4 PURISCAL
5 TARRAZÚ
6 ASERRÍ
7 MORA
8 GOICOECHEA
9 SANTA ANA
10 ALAJUELITA
11 CORONADO
12 ACOSTA
13 TIBAS
14 MORAVIA
15 MONTES DE OCA
16 TURRUBARES
17 DOTA
18 CURRIDABAT
19 PÉREZ ZELEDÓN
20 LEÓN CORTÉS
I need that every double labelled value become a string value according to the value label.
glimpse(dataset)
Rows: 283
Columns: 9
$ A7A <dbl+lbl> 2, 8, 3, 3, 1, 2, 4, 4, 4, 2, 2, 4, 3, 4, 2, 3, 1, 2, 2, 6, 1, 1, 2, 2, 1, 2, 3, 1, 2, 1, 1, 4, 3, 1, 2, 2, 1, 1, 4, ...
$ A7B1 <dbl+lbl> NA, NA, NA, NA, 8, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3, NA, NA, NA, 1, 11, NA, NA, 8, NA, NA, 3, NA, 14, 1,...
$ A7B2 <dbl+lbl> 1, NA, NA, NA, NA, 1, NA, NA, NA, 1, 1, NA, NA, NA, 1, NA, NA, 6, 2, NA, NA, NA, 1, 10, NA, 1, NA, NA, 1, NA, NA, NA,...
$ A7B3 <dbl+lbl> NA, NA, 1, 7, NA, NA, NA, NA, NA, NA, NA, NA, 3, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA...
$ A7B4 <dbl+lbl> NA, NA, NA, NA, NA, NA, 2, 1, 1, NA, NA, 9, NA, 7, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ A7B5 <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ A7B6 <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ A7B7 <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ A7B8 <dbl+lbl> NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA..
dput(head(dataset$A7A))
structure(c(2, 8, 3, 3, 1, 2), label = "A7a. Provincia de residencia", labels = c(`San Jose` = 1, Alajuela = 2, Cartago = 3, Heredia = 4, Guanacaste = 5, Puntarenas = 6,
Limon = 7, Extrenjero = 8), class = "haven_labelled")
I typically use haven when reading in SPSS data and have a helper function for this. Hope this helps--if it doesn't please provide more info in your question :)
library(haven)
swap_labels <- function(x, keep_original = TRUE) {
labels <- attr(x, "labels")
new_vec <- names(labels)[match(x, labels)]
if(keep_original) {
haven::labelled_spss(new_vec, setNames(names(labels), labels))
} else {
new_vec
}
}
# Reproducible example
test_vec <- labelled_spss(1:3, labels = setNames(1:3, letters[1:3]))
> test_vec
<labelled_spss<integer>[3]>
[1] 1 2 3
Labels:
value label
1 a
2 b
3 c
> swap_labels(test_vec)
<labelled_spss<character>[3]>
[1] a b c
Labels:
value label
a 1
b 2
c 3
I'm quite new to R and have been struggling with properly formatting a forest plot I've created.
When I click the "zoom" option in R to open the graph in a new window, it looks as such:
Forest Plot Currently
My main goal is to get the forest plot as compact as possible, i.e. publication quality/style. I currently have wayyyy too much white space in my plot. I think it has something to do with me messing around with the par() function, and now have no clue how to revert to defaults.
#Metafor library
library(metafor)
#ReadXL library to import excel sheet
library(readxl)
#Name the data sheet from the excel file
ACDF<- read_excel("outpatient_ACDF_meta_analysis.xlsx")
#View the data sheet with view(ACDF)
par(mar=c(20,1,1,1))
#This below measures with risk ratios. If you want to measure odds ratios, use argument measure=OR
returnop <- escalc(measure="OR", ai=op_return_OR, bi=op_no_return_OR, ci=ip_return_OR, di=ip_no_return_OR, data=ACDF)
#Generate a Random Effects Model
REmodel<-rma(yi=yi, vi=vi, data=returnop, slab=paste(Author, Year, sep=", "), method="REML")
#Generate a forest plot of the data
forest(REmodel, xlim=c(-17, 6),
ilab=cbind(ACDF$op_return_OR, ACDF$op_no_return_OR, ACDF$ip_return_OR, ACDF$ip_no_return_OR),
ilab.xpos=c(-9.5,-8,-6,-4.5), cex=.75, ylim=c(-1, 27),
psize=1)
### add column headings to the plot
text(c(-9.5,-8,-6,-4.5), 26, c("Return+", "Return-", "Return+", "Return-"))
text(c(-8.75,-5.25), 27, c("Outpatient", "Inpatient"))
text(-16, 26, "Study", pos=4)
text(6, 26, "Log Odds Ratio [95% CI]", pos=2)
I'm not 100% as to how to provide my data otherwise, but I used the dput function to provide as follows. Apologies for the N/As, still fleshing out the data for the future.
structure(list(Study = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), Author = c("Stieber", "Villavicencio",
"Lied", "Liu", "Garringer", "Joseffer", "Trahan", "Lied", "Sheperd",
"Talley", "Martin", "McGirt", "Adamson", "Fu", "Arshi", "Khanna",
"McClelland", "Purger", "McLellend2", NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Year = c(2005, 2007,
2007, 2009, 2010, 2010, 2011, 2012, 2012, 2013, 2015, 2015, 2016,
2017, 2017, 2017, 2017, 2017, 2017, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), op_return_OR = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 1, 3, 2, 16, 257, 7, NA, 5, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), op_no_return_OR = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
596, 769, 992, 4581, 958, 1749, NA, 3120, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ip_return_OR = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 8, 9, 2, 257, 2034, 12, NA,
200, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), ip_no_return_OR = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 589, 641, 482, 16171, 8930, 1744, NA, 46312, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), op_death = c(NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, 1, NA,
1, 0, NA, 2, NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), op_no_death = c(NA, NA, NA, 45, NA,
NA, NA, NA, NA, NA, 596, NA, 993, 4597, NA, 1754, NA, 3125, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), ip_death = c(NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, 0, NA,
0, 42, NA, 2, NA, 20, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), ip_no_death = c(NA, NA, NA, 64,
NA, NA, NA, NA, NA, NA, 597, NA, 484, 16386, NA, 1754, NA, 46492,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
2979.79797979798), op_thrombo = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0, NA, NA, 8, 20, 4, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), op_no_thrombo = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 597, NA, NA, 4589, 1195,
1752, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), ip_thrombo = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 2, NA, NA, 67, 150, 4, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ip_no_thrombo = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 595, NA, NA, 16361, 10814,
1752, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), op_stroke = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0, NA, NA, 2, 12, 0, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), op_no_stroke = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 597, NA, NA, 4595, 1203,
1756, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), ip_stroke = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 2, NA, NA, 14, 132, 0, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ip_no_stroke = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 595, NA, NA, 16414, 10832,
1756, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), op_dysphagia = c(NA, NA, NA, 0, NA, NA,
NA, NA, NA, NA, NA, NA, 11, NA, NA, NA, NA, 2, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), op_no_dysphagia = c(NA,
NA, NA, 45, NA, NA, NA, NA, NA, NA, NA, NA, 618, NA, NA, NA,
NA, 49, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), ip_dysphagia = c(NA, NA, NA, 1, NA, NA, NA, NA,
NA, NA, NA, NA, 1, NA, NA, NA, NA, 59, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ip_no_dysphagia = c(NA,
NA, NA, 63, NA, NA, NA, NA, NA, NA, NA, NA, 273, NA, NA, NA,
NA, 2917, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), op_hematoma = c(NA, NA, NA, 0, NA, NA, NA, NA,
NA, NA, NA, NA, 1, NA, NA, NA, 1, 4, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), op_no_hematoma = c(NA,
NA, NA, 45, NA, NA, NA, NA, NA, NA, NA, NA, 629, NA, NA, NA,
2015, 47, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), ip_hematoma = c(NA, NA, NA, 1, NA, NA, NA, NA,
NA, NA, NA, NA, 1, NA, NA, NA, 273, 65, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ip_no_hematoma = c(NA,
NA, NA, 63, NA, NA, NA, NA, NA, NA, NA, NA, 273, NA, NA, NA,
7791, 1713, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA)), .Names = c("Study", "Author", "Year", "op_return_OR",
"op_no_return_OR", "ip_return_OR", "ip_no_return_OR", "op_death",
"op_no_death", "ip_death", "ip_no_death", "op_thrombo", "op_no_thrombo",
"ip_thrombo", "ip_no_thrombo", "op_stroke", "op_no_stroke", "ip_stroke",
"ip_no_stroke", "op_dysphagia", "op_no_dysphagia", "ip_dysphagia",
"ip_no_dysphagia", "op_hematoma", "op_no_hematoma", "ip_hematoma",
"ip_no_hematoma"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-35L))
The par option looks ok to me. I changed the ylim option and modified the y location and size of some of the header text as below:
#Generate a forest plot of the data
forest(REmodel, xlim=c(-17, 6),
ylim=c(-1, 10),
ilab=cbind(ACDF$op_return_OR, ACDF$op_no_return_OR, ACDF$ip_return_OR,
ACDF$ip_no_return_OR),
ilab.xpos=c(-9.5,-8,-6,-4.5), cex=.75,
psize=1)
### add column headings to the plot
text(c(-9.5,-8,-6,-4.5), 8.5, c("Return+", "Return-", "Return+", "Return-"),
cex = 0.65)
text(c(-8.75,-5.25), 9.5, c("Outpatient", "Inpatient"))
text(-17, 8.5, "Study", pos=4)
text(6, 8.5, "Log Odds Ratio [95% CI]", pos=2)
This gives the following plot:
I want to combine these columns based on the value in "Date" so that there are only unique values of date with the corresponding age groups conglomerated. This was a result from using spread() in tidyr. If u look the values for Date are repeated
dput(dataframe) reads ....
structure(list(Date = c("201740", "201740", "201740", "201740",
"201741", "201741", "201741", "201741", "201742", "201742", "201742",
"201742", "201743", "201743", "201743", "201743", "201743", "201743",
"201744", "201744", "201744", "201744", "201744", "201744", "201745",
"201745", "201745", "201745", "201745", "201745", "201746", "201746",
"201746", "201746", "201746", "201746", "201747", "201747", "201747",
"201747", "201747", "201747", "201748", "201748", "201748", "201748",
"201748", "201748", "201749", "201749", "201749", "201749", "201749",
"201749", "201750", "201750", "201750", "201750", "201750", "201750",
"201751", "201751", "201751", "201751", "201751", "201751", "201752",
"201752", "201752", "201752", "201752", "201752", "201801", "201801",
"201801", "201801", "201801", "201801", "201802", "201802", "201802",
"201802", "201802", "201802", "201803", "201803", "201803", "201803",
"201803", "201803", "201804", "201804", "201804", "201804", "201804",
"201804", "201805"), `0-4 yr` = c(NA, 0.1, NA, NA, NA, 0.2, NA,
NA, NA, 0.2, NA, NA, NA, NA, 0.3, NA, NA, NA, NA, NA, 0.6, NA,
NA, NA, NA, NA, 0.7, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA,
1.8, NA, NA, NA, NA, NA, 2.7, NA, NA, NA, NA, NA, 3.3, NA, NA,
NA, NA, NA, 5.2, NA, NA, NA, NA, NA, 7.9, NA, NA, NA, NA, NA,
13.7, NA, NA, NA, NA, NA, 18.3, NA, NA, NA, NA, NA, 23.3, NA,
NA, NA, NA, NA, 28.2, NA, NA, NA, NA, NA, 35.6, NA, NA, NA, 41.9
), `18-49 yr` = c(NA, 0.1, NA, NA, 0.1, NA, NA, NA, NA, 0.2,
NA, NA, NA, 0.2, NA, NA, NA, NA, NA, 0.4, NA, NA, NA, NA, NA,
0.5, NA, NA, NA, NA, NA, 0.7, NA, NA, NA, NA, NA, 1, NA, NA,
NA, NA, NA, 1.4, NA, NA, NA, NA, NA, 1.9, NA, NA, NA, NA, NA,
2.7, NA, NA, NA, NA, NA, 4.2, NA, NA, NA, NA, NA, 6.6, NA, NA,
NA, NA, NA, 9.3, NA, NA, NA, NA, NA, 12.5, NA, NA, NA, NA, NA,
15.2, NA, NA, NA, NA, NA, 17.7, NA, NA, NA, NA, NA), `5-17 yr` = c(0,
NA, NA, NA, 0.1, NA, NA, NA, 0.1, NA, NA, NA, 0.1, NA, NA, NA,
NA, NA, 0.2, NA, NA, NA, NA, NA, 0.3, NA, NA, NA, NA, NA, 0.5,
NA, NA, NA, NA, NA, 0.7, NA, NA, NA, NA, NA, 0.9, NA, NA, NA,
NA, NA, 1.2, NA, NA, NA, NA, NA, 1.7, NA, NA, NA, NA, NA, 2.5,
NA, NA, NA, NA, NA, 3.5, NA, NA, NA, NA, NA, 4.3, NA, NA, NA,
NA, NA, 5.9, NA, NA, NA, NA, NA, 7.3, NA, NA, NA, NA, NA, 9,
NA, NA, NA, NA, NA, NA), `50-64 yr` = c(NA, NA, 0.2, NA, NA,
NA, 0.3, NA, NA, NA, 0.5, NA, NA, NA, NA, NA, 0.8, NA, NA, NA,
NA, NA, 1.1, NA, NA, NA, NA, NA, 1.6, NA, NA, NA, NA, NA, 2.2,
NA, NA, NA, NA, NA, 3.1, NA, NA, NA, NA, 4.1, NA, NA, NA, NA,
NA, 5.4, NA, NA, NA, NA, NA, NA, 8.1, NA, NA, NA, NA, NA, 13.7,
NA, NA, NA, NA, 21.7, NA, NA, NA, NA, NA, NA, 32.6, NA, NA, NA,
NA, NA, 42.9, NA, NA, NA, NA, NA, 52, NA, NA, NA, NA, NA, 60.2,
NA, NA), `65+ yr` = c(NA, NA, NA, 0.5, NA, NA, NA, 1, NA, NA,
NA, 2.1, NA, NA, NA, NA, NA, 3, NA, NA, NA, NA, NA, 3.9, NA,
NA, NA, NA, NA, 5.1, NA, NA, NA, NA, NA, 6.5, NA, NA, NA, NA,
NA, 9.2, NA, NA, NA, NA, NA, 14.3, NA, NA, NA, NA, NA, 20.5,
NA, NA, NA, NA, NA, 30.2, NA, NA, NA, NA, NA, 50.2, NA, NA, NA,
NA, NA, 90.1, NA, NA, NA, NA, NA, 137.9, NA, NA, NA, NA, NA,
179.5, NA, NA, NA, NA, NA, 217.4, NA, NA, NA, NA, NA, 251.8,
NA)), .Names = c("Date", "0-4 yr", "18-49 yr", "5-17 yr", "50-64 yr",
"65+ yr"), class = "data.frame", row.names = c(NA, 97L))
Could try aggregation, this could have been done before your spread. But after works as well
library(tidyverse)
dataframe %>%
group_by(Date) %>%
summarise_all(funs(sum(., na.rm = T)))
I've used sum() here because its not clear how you want to summarise.
A more suitable way might be:
dataframe %>%
gather("age_group", "value", -Date) %>%
filter(!is.na(value)) %>%
spread(age_group, value)
Where we gather the data back to may have what been your original input, this needs to be filtered and then just re-spread