Related
I have given a
a namend vector col:
col <- c(id = "CLUSTER", x = "LONGNUM", y = "LATNUM", n = "Severely.stunted.child.under.5.years..Total", pos = "Severely.stunted.child.under.5.years.Yes")
#a List of Dataframes with the the Elements of col as Columns and namend after a specific study area. (see deput below) The List Results contains 19 different files (different years)
a list of shapefiles of with 6 Elements (corresponding countries):
study_area <- c("Ethiopia", "Liberia", "Malawi", "Rwanda", "Uganda", "Zimbabwe")
Countries <- lapply(study_area, function(x){gisco_get_countries(country= x, resolution = 60 )})
Countries <- lapply(Countries, function(x) {as_Spatial(x, cast = TRUE, IDs = c("CNTR_NAME", "ISO§_CODE", "CNRT_ID", "NAME_ENGL", "FID"))})
names(Countries) <- study_area
I would like to preform the function from the prevR Library :
s.prevR(Results[[1]], col, Countries[[1]])
But actually for every element in the lists where the names fit:
I tried something like: Map(function(x, y) { as.prevR(x, col, y)}, Results, Countries)
But there it does (obviously) not match by names of x and y
dput( dput(Results[[1]][1:5,1:24])
structure(list(CLUSTER = c("", "1", "10", "100", "101"), Severely.stunted.child.under.5.years.No = c(3438,
8, 7, 9, 6), Severely.stunted.child.under.5.years.Yes = c(1047,
4, NA, 7, 1), Severely.stunted.child.under.5.years..Total = c(4485,
12, 7, 16, 7), Stunted.child.under.5.years.No = c(2531, 2, 7,
7, 5), Stunted.child.under.5.years.Yes = c(1954, 10, NA, 9, 2
), Stunted.child.under.5.years..Total = c(4485, 12, 7, 16, 7),
Severely.wasted.child.under.5.years.No = c(4295, 11, 7, 16,
7), Severely.wasted.child.under.5.years.Yes = c(190, 1, NA,
NA, NA), Severely.wasted.child.under.5.years..Total = c(4485,
12, 7, 16, 7), Wasted.child.under.5.years.No = c(3957, 10,
7, 16, 6), Wasted.child.under.5.years.Yes = c(528, 2, NA,
NA, 1), Wasted.child.under.5.years..Total = c(4485, 12, 7,
16, 7), Severely.underweight.child.under.5.years.No = c(4028,
10, 7, 12, 7), Severely.underweight.child.under.5.years.Yes = c(457,
2, NA, 4, NA), Severely.underweight.child.under.5.years..Total = c(4485,
12, 7, 16, 7), Underweight.child.under.5.years.No = c(3185,
7, 7, 12, 5), Underweight.child.under.5.years.Yes = c(1300,
5, NA, 4, 2), Underweight.child.under.5.years..Total = c(4485,
12, 7, 16, 7), LATNUM = c(NA, 10.889096, 5.323272, 8.830199,
10.806748), LONGNUM = c(NA, 37.269565, 39.556812, 40.72964,
39.7703), SurveyId = c("ET2005DHS", "ET2005DHS", "ET2005DHS",
"ET2005DHS", "ET2005DHS"), DHSC = c("ET", "ET", "ET", "ET",
"ET"), Country = c("Ethiopia", "Ethiopia", "Ethiopia", "Ethiopia",
"Ethiopia")), row.names = c(NA, 5L), class = "data.frame")
and Countries
dput(Countries[[1]])
new("SpatialPolygonsDataFrame", data = structure(list(CNTR_NAME = "Federal Democratic Republic of Ethiopia",
ISO3_CODE = "ETH", CNTR_ID = "ET", NAME_ENGL = "Ethiopia",
FID = "ET"), class = "data.frame", row.names = 1L), polygons = list(
new("Polygons", Polygons = list(new("Polygon", labpt = c(39.6420582930584,
8.63562315843106), area = 93.13026982, hole = FALSE, ringDir = 1L,
coords = structure(c(41.6307, 42.4043, 41.816, 41.8348,
42.9681, 42.7628, 42.9804, 43.9589, 45.6126, 46.9411,
47.8524, 45.6126, 45.4747, 45.2923, 44.9162, 43.4741,
42.8138, 41.9101, 41.2328, 40.708, 39.9305, 39.5667,
38.9731, 38.1026, 36.9621, 35.9477, 35.8294, 35.3235,
35.0325, 34.9588, 34.5428, 33.7557, 33.0448, 33.2485,
33.8204, 34.0937, 34.1132, 34.4181, 34.8021, 35.2153,
35.6227, 36.1342, 36.5603, 37.2972, 37.5268, 37.9201,
38.5391, 39.0217, 40.0851, 40.8941, 41.6307, 13.3913,
12.4686, 11.6292, 11.0448, 10.9974, 10.7159, 10.0644,
9.0545, 8.4674, 8.0224, 7.9151, 5.5657, 5.4241, 5.2367,
4.9368, 4.7993, 4.301, 3.9823, 3.9616, 4.2326, 3.8858,
3.5224, 3.5158, 3.6459, 4.3833, 4.62, 5.2367, 5.413,
5.8494, 6.4537, 6.7418, 7.6074, 7.899, 8.381, 8.4168,
8.6026, 9.4986, 10.6735, 10.8052, 11.9187, 12.5064, 12.8315,
14.2577, 14.3876, 14.2588, 14.8128, 14.4413, 14.5899,
14.5456, 14.0891, 13.3913), dim = c(51L, 2L)))), plotOrder = 1L,
labpt = c(39.6420582930584, 8.63562315843106), ID = "1",
area = 93.13026982)), plotOrder = 1L, bbox = structure(c(33.0448,
3.5158, 47.8524, 14.8128), dim = c(2L, 2L), dimnames = list(c("x",
"y"), c("min", "max"))), proj4string = new("CRS", projargs = "+proj=longlat +datum=WGS84 +no_defs"))
If the Countries names are all in the Results names and if 'Results' have duplicates for names, then we can make the Countries to have the same length by replicating based on the names of the 'Results'
Map(function(x, y) { as.prevR(x, col, y)}, Results, Countries[names(Results)])
I would like to plot the evolution of the number of workers per category ("A", "D", "F", "I"), from 2017 to 2021, with a stacked bar chart (with the labels in the middle of each bar, for each category), one bar per year. Yet my dataset isn't in the right way to do this, I think I need to use pivot_wider() or pivot_longer() from what I have seen here, but I don't really know how to manipulate these functions. Could anyone help ?
Here is the structure of my dataset, for reproducibility :
structure(list(A = c("10", "7", "8", "8", "9", "Total"), D = c(23,
14, 29, 35, 16, 117), F = c(8, 7, 11, 6, 6, 38), I = c(449, 498,
415, 470, 531, 2363), annee = c("2017", "2018", "2019", "2020",
"2021", NA)), core = structure(list(A = c("10", "7", "8", "8",
"9"), D = c(23, 14, 29, 35, 16), F = c(8, 7, 11, 6, 6), I = c(449,
498, 415, 470, 531)), class = "data.frame", row.names = c(NA,
-5L)), tabyl_type = "two_way", totals = "row", row.names = c(NA,
6L), class = c("tabyl", "data.frame"))
library(tidyverse)
library(ggrepel)
df <- structure(list(A = c("10", "7", "8", "8", "9", "Total"), D = c(
23,
14, 29, 35, 16, 117
), F = c(8, 7, 11, 6, 6, 38), I = c(
449, 498,
415, 470, 531, 2363
), annee = c(
"2017", "2018", "2019", "2020",
"2021", NA
)), core = structure(list(A = c(
"10", "7", "8", "8",
"9"
), D = c(23, 14, 29, 35, 16), F = c(8, 7, 11, 6, 6), I = c(
449,
498, 415, 470, 531
)), class = "data.frame", row.names = c(
NA,
-5L
)), tabyl_type = "two_way", totals = "row", row.names = c(
NA,
6L
), class = c("tabyl", "data.frame"))
df |>
filter(!is.na(annee)) |>
mutate(A = as.double(A)) |>
pivot_longer(-annee, names_to = "category") |>
ggplot(aes(annee, value, fill = category, label = value)) +
geom_col() +
geom_label_repel(position = position_stack(), max.overlaps = 20)
Created on 2022-08-08 by the reprex package (v2.0.1)
Once you remove the total row, and ensuring that A through I are numeric, you can pivot_longer and pass to ggplot() like this:
data %>%
filter(A!="Total") %>%
mutate(across(A:I, as.numeric)) %>%
pivot_longer(cols = -annee, names_to = "group", values_to = "ct") %>%
ggplot(aes(annee,ct,fill=group)) +
geom_col()
I did not add the category labels, since group I dominates each year; you might want to reconsider that visualization
I have panel data with years from 2005 to 2015 and sectors from 1 to 33 (excluding 2, 4 and 31). I would like to run some loops and save the output for each year-sector combination separately. This is my code:
for (i in 2005:2015){
ntm_data <-subset(ntm_data_wip, StartDate <=i & EndDate >i)
for(s in c(1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33)){
ntm_data <-subset(ntm_data, ISIC4==s)
# Once the data is loaded, I exclude NTM codes which are missing.
# I only need the reporter, NTM code and product codes (HS 6-digit codes).
ntm_data <- ntm_data[!is.na(ntm_data$ntmcode)&ntm_data$ntmcode!="",]
ntm_data <- ntm_data[,c("reporter", "ntmcode", "hs6")]
# I group the data by reporter, NTM and product code (hs6) and count the number of combinations in a new variable called count.
ntm_data <- ntm_data %>% group_by(reporter, ntmcode, hs6) %>%
summarise(count = n())
head(ntm_data)
# I prepare the regulatory matrix by creating a list of countries for which I want the regulatory distance. The
# regulatory matrix shows the distance between two countries and has as column and row names the ISO3 codes of the countries.
# As specified above, I am interested in having the analysis for all available countries.
avail_iso3s <- unique(ntm_data$reporter)
# I create an empty regulatory distance matrix. For column size I use the length of avail_iso3s and add 1 for the reporter column.
# I populate the column names with reporter and the ISO3 codes with the option dimnames.
regulatory_distance_matrix <- data.frame(matrix(vector(),0,length(avail_iso3s)+1,
dimnames = list(c(), c("reporter", avail_iso3s )
)),
stringsAsFactors=F)
#' Now I can move on to calculating the regulatory distance formula in page 3 of "DEEP REGIONAL INTEGRATION AND NON-TARIFF MEASURES:A METHODOLOGY FOR DATA ANALYSIS (2015)" .
#' As N is a constant, I start with calculating it outside of the loop
N <- ntm_data %>% group_by(ntmcode, hs6) %>% count()
N <- nrow(N)
# I now fill in the regulatory distance matrix with values
for (g in 1:length(avail_iso3s)){
country_a <- ntm_data[ntm_data$reporter==avail_iso3s[g],c("ntmcode", "hs6")]
country_a$country_a <- 1
regulatory_distance_matrix[g,"reporter"] <- avail_iso3s[g]
for (k in 1:length(avail_iso3s)){
if (!is.na(regulatory_distance_matrix[k,avail_iso3s[g]])){next }
country_b <- ntm_data[ntm_data$reporter==avail_iso3s[k],c("ntmcode", "hs6")]
country_b$country_b <- 1
merged <- merge(country_a, country_b, by=c("ntmcode", "hs6"), all = TRUE)
merged[is.na(merged)] <- 0
merged$abs_diff <- abs(merged$country_a-merged$country_b)
rd <- sum(merged$abs_diff)/N
regulatory_distance_matrix[g,avail_iso3s[k]] <- rd
}
}
# Now I fill in the missing values and create a Stata dta.file.
for (g in 1:length(avail_iso3s)){
for (k in 1:length(avail_iso3s)){
if (is.na(regulatory_distance_matrix[k,avail_iso3s[g]])){
regulatory_distance_matrix[k,avail_iso3s[g]] <- regulatory_distance_matrix[g,avail_iso3s[k]]
}
}
}
regulatory_distance_matrix$year <-i
regulatory_distance_matrix$ISIC4 <-s
write.dta(regulatory_distance_matrix, paste0("C:/Users/Utente/Desktop/Master's thesis/Thesis analysis/- RD construction/Binary sectoral RD/regulatory_distance_matrix_",i,"_",s,".dta"))
}
}
However, after the first file (regulatory_distance_matrix_",i,"_",s,".dta") is correctly created, I get the following error during the creation of the second file:
Error in eval(e, x, parent.frame()) : oggetto "ISIC4" non trovato
Does someone how to fix this issue?
Thanks in advance!
EDIT:
> dput(head(ntm_data_wip))
structure(list(reporter = c("TUR", "ARG", "BRA", "CHN", "USA",
"EUN"), Reporter_ISO_N = c("792", "032", "076", "156", "842",
"918"), hs6 = c("910610", "851679", "040221", "620449", "021012",
"284990"), ntmcode = c("B31", "A11", "B33", "B83", "A83", "B33"
), partner = c("TON", "WLD", "WLD", "IRN", "VAT", "WLD"), Partner_ISO_N = c("776",
"000", "000", "364", "336", "000"), nbr = c(1L, 1L, 1L, 1L, 2L,
1L), Year = c(2016L, 2014L, 2013L, 2016L, 2017L, 2011L), NTMNomenclature = c("M4",
"M4", "M4", "M4", "M4", "M4"), NomenCode = c("H4", "H4", "H4",
"H4", "H4", "H3"), Dataset_id = c(161L, 174L, 174L, 131L, 179L,
111L), ntm_1_digit = c("B", "A", "B", "B", "A", "B"), StartDate = c(2015L,
2006L, 2008L, 2011L, 1992L, 2009L), EndDate = c(9999L, 9999L,
9999L, 9999L, 9999L, 2011L), new_ISIC4 = c("32", "28", "10",
"13", "10", "19"), ISIC4 = c(32L, 28L, 10L, 13L, 10L, 19L)), datalabel = "", time.stamp = "31 Jul 2021 11:34", formats = c("%9s",
"%9s", "%9s", "%9s", "%9s", "%9s", "%9.0g", "%12.0g", "%9s",
"%9s", "%12.0g", "%9s", "%10.0g", "%10.0g", "%9s", "%10.0g"), types = c(3L,
3L, 6L, 4L, 3L, 3L, 65530L, 65529L, 2L, 3L, 65529L, 1L, 65529L,
65529L, 2L, 65530L), val.labels = structure(c("", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", ""), .Names = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "")), var.labels = c("",
"", "", "", "", "", "Number of NTM, distinct codes", "", "",
"", "", "", "(min) StartDate", "(max) EndDate", "", ""), version = 118L, label.table = list(), expansion.fields = list(
c("ISIC4", "destring", "Characters removed were:"), c("ISIC4",
"destring_cmd", "destring new_ISIC4, gen(ISIC4)")), byteorder = "LSF", orig.dim = c(6953474L,
16L), row.names = c(NA, 6L), class = "data.frame")
This is too long for a comment.
The issue is that the inner loop of for (s in c(...)) {...} uses ntm_data which overwrites itself within the inner loop. So in the first pass, ntm_data$ICIS4 is a column. Plus, since we are subsetting, we don’t want subsequent iterations having been filtered based on the previous s.
Here's a snippet of the top of the beginning code with a new object at the start of the outer loop.:
library(dplyr)
for (i in 2005:2015){
## CHANGED - make a different object for the inner loop to subset from
ntm_data_years <-subset(ntm_data_wip, StartDate <=i & EndDate >i)
for(s in c(1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33)){
ntm_data <-subset(ntm_data_years, ISIC4==s) ## CHANGED to subset(ntm_data_years, ...)
ntm_data <- ntm_data[!is.na(ntm_data$ntmcode)&ntm_data$ntmcode!="",]
ntm_data <- ntm_data[,c("reporter", "ntmcode", "hs6")] ## This is the line that removes ISIC4 from the data frame
...
}
I merged nine plots together and I would like to group them based on different characteristics (A,B,C). Is there a simple way to add labels or annotations at the bottom of plots? When using cowplot or GridExtra i receive the following error:
In as_grob.default(plot) :
Cannot convert object of class list into a grob.
Sample data
list(list(stats = structure(c(43, 96.5, 297.5, 707.5, 778), .Dim = c(5L,
1L)), n = 36, conf = structure(c(136.603333333333, 458.396666666667
), .Dim = 2:1), out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(2, 10.5, 55.5, 102, 128), .Dim = c(5L,
1L)), n = 36, conf = structure(c(31.405, 79.595), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(1, 3, 5.5, 77, 88), .Dim = c(5L,
1L)), n = 36, conf = structure(c(-13.9866666666667, 24.9866666666667
), .Dim = 2:1), out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(531, 632.5, 701, 726.5, 786), .Dim = c(5L,
1L)), n = 36, conf = structure(c(676.246666666667, 725.753333333333
), .Dim = 2:1), out = c(485, 464, 446), group = c(1, 1, 1
), names = ""), list(stats = structure(c(104,
109.5, 113.5, 121, 125), .Dim = c(5L, 1L)), n = 36, conf = structure(c(110.471666666667,
116.528333333333), .Dim = 2:1), out = c(91, 91, 88, 84, 84,
79), group = c(1, 1, 1, 1, 1, 1), names = ""),
list(stats = structure(c(28, 53.5, 83.5, 88, 91), .Dim = c(5L,
1L)), n = 36, conf = structure(c(74.415, 92.585), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(80, 89, 102.5, 153, 236), .Dim = c(5L,
1L)), n = 36, conf = structure(c(85.6466666666667, 119.353333333333
), .Dim = 2:1), out = c(343, 318, 299, 257), group = c(1,
1, 1, 1), names = """"), list(stats = structure(c(7,
12, 22.5, 44, 72), .Dim = c(5L, 1L)), n = 36, conf = structure(c(14.0733333333333,
30.9266666666667), .Dim = 2:1), out = numeric(0), group = numeric(0),
names = ""), list(stats = structure(c(5,
5, 6, 12.5, 21), .Dim = c(5L, 1L)), n = 36, conf = structure(c(4.025,
7.975), .Dim = 2:1), out = numeric(0), group = numeric(0),
names = ""))
Many thanks
I agree with the idea of using ggplot2 graphics with facets, but given your plot objects, you could do something like this (to get you started). I used ggplotify instead of cowplot because I ran into trouble with the figure margins, but you might be able to fix that by changing the null device (not tested).
Edit:
Added individual labels and y axis labels, as well as outer margins. You might have to adjust some of that depending on the output size of your composite plot. This may show you how you could adjust those settings for individual plots. Still, using ggplot2 to generate the plots would make things quite a bit easier.
library(grid)
library(gridExtra)
library(ggplotify)
sdt <- list(list(stats = structure(c(43, 96.5, 297.5, 707.5, 778), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(136.603333333333, 458.396666666667), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(2, 10.5, 55.5, 102, 128), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(31.405, 79.595), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(1, 3, 5.5, 77, 88), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(-13.9866666666667, 24.9866666666667), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(531, 632.5, 701, 726.5, 786), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(676.246666666667, 725.753333333333), .Dim = 2:1),
out = c(485, 464, 446), group = c(1, 1, 1), names = ""),
list(stats = structure(c(104, 109.5, 113.5, 121, 125), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(110.471666666667, 116.528333333333), .Dim = 2:1),
out = c(91, 91, 88, 84, 84, 79), group = c(1, 1, 1, 1, 1, 1), names = ""),
list(stats = structure(c(28, 53.5, 83.5, 88, 91), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(74.415, 92.585), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(80, 89, 102.5, 153, 236), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(85.6466666666667, 119.353333333333), .Dim = 2:1),
out = c(343, 318, 299, 257), group = c(1,1, 1, 1), names = ""),
list(stats = structure(c(7, 12, 22.5, 44, 72), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(14.0733333333333, 30.9266666666667), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(5, 5, 6, 12.5, 21), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(4.025, 7.975), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""))
sublabels <- paste0(rep(LETTERS[1:3], each=3), 1:3)
gplts <- lapply(1:9, function(x) as.grob(function(y=sdt[[x]]) {
par(oma=c(0,3,0,3))
bxp(y, ylab="values", main=sublabels[x])}))
grid.arrange(rectGrob(gp=gpar(col="red")), rectGrob(gp=gpar(col="green")),
rectGrob(gp=gpar(col="yellow")), nrow=1, newpage =T)
vp <- viewport(.33/2,0.45, gp = gpar(col="red"))
grid.text("Group A",
y = .1, just = c("center", "bottom"),
gp = gpar(fontsize=20), vp = vp)
vp <- viewport(.5,.45, gp = gpar(col="green"))
grid.text("Group B",
y = .1, just = c("center", "bottom"),
gp = gpar(fontsize=20), vp = vp)
vp <- viewport(1-(.33/2),.45, gp = gpar(col="yellow"))
grid.text("Group C",
y = .1, just = c("center", "bottom"),
gp = gpar(fontsize=20), vp = vp)
grid.arrange(grobs=gplts, nrow=1, newpage=F)
Created on 2021-03-25 by the reprex package (v1.0.0)
I am using following Data frame.
df2<-final.data%>% gather(Hospital,Attendance,contains("Attendance"))
df2 %>% spread(Hospital, Attendance)
> dput(final.data[0:2,])
structure(list(RoyalPerth.Attendance = c(235, 209), RoyalPerth.Admissions = c(99,
97), RoyalPerth.Tri1 = c("8", "N/A"), RoyalPerth.Tri2 = c(33,
41), RoyalPerth.Tri3 = c(89, 73), RoyalPerth.Tri4 = c(85, 80),
RoyalPert
h.Tri5 = c("20", "14"), Fremantle.Attendance = c(155,
145), Fremantle.Admissions = c(70, 56), Fremantle.Tri1 = c("N/A",
"N/A"), Fremantle.Tri2 = c(25, 22), Fremantle.Tri3 = c(67,
51), Fremantle.Tri4 = c(54, 47), Fremantle.Tri5 = c(9, 24
), PrincessMargaret.Attendance = c(252, 219), PrincessMargaret.Admissions = c(59,
47), PrincessMargaret.Tri1 = c("N/A", "N/A"), PrincessMargaret.Tri2 = c("13",
"14"), PrincessMargaret.Tri3 = c(75, 61), PrincessMargaret.Tri4 = c(159,
139), PrincessMargaret.Tri5 = c("4", "4"), KingEdward.Attendance = c(52,
43), KingEdward.Admissions = c("6", "7"), KingEdward.Tri1 = c("N/A",
"N/A"), KingEdward.Tri2 = c("N/A", "N/A"), KingEdward.Tri3 = c("7",
"N/A"), KingEdward.Tri4 = c(20, 25), KingEdward.Tri5 = c("25",
"17"), SirCharles.Attendance = c(209, 184), SirCharles.Admissions = c(109,
112), SirCharles.Tri1 = c("N/A", "N/A"), SirCharles.Tri2 = c(42,
43), SirCharles.Tri3 = c(108, 73), SirCharles.Tri4 = c(47,
61), SirCharles.Tri5 = c("11", "5"), Armadale.Attendance = c(166,
175), Armadale.Admissions = c(19, 25), Armadale.Tri1 = c("N/A",
"N/A"), Armadale.Tri2 = c(16, 26), Armadale.Tri3 = c(62,
73), Armadale.Tri4 = c(79, 55), Armadale.Tri5 = c("9", "19"
), Swan.Attendance = c(133, 129), Swan.Admissions = c(17,
25), Swan.Tri1 = c("N/A", "N/A"), Swan.Tri2 = c(29, 25),
Swan.Tri3 = c(59, 57), Swan.Tri4 = c(42, 43), Swan.Tri5 = c("N/A",
"4"), Rockingham.Attendance = c(155, 145), Rockingham.Admissions = c("10",
"24"), Rockingham.Tri1 = c("N/A", "N/A"), Rockingham.Tri2 = c(12,
26), Rockingham.Tri3 = c(51, 45), Rockingham.Tri4 = c(81,
65), Rockingham.Tri5 = c("11", "8"), Joondalup.Attendance = c(267,
241), Joondalup.Admissions = c(73, 81), Joondalup.Tri1 = c("N/A",
"N/A"), Joondalup.Tri2 = c(27, 23), Joondalup.Tri3 = c(75,
78), Joondalup.Tri4 = c(151, 133), Joondalup.Tri5 = c("12",
"7")), row.names = 1:2, class = "data.frame")
Error:
Warning message:
attributes are not identical across measure variables;
they will be dropped
I have tried below things:
hospital.dataset<-gather(hospital,triage,sum,Tri1:Tri5) to gather Triage
after using cbind on the data set.
I want to covert it into long data set using gather.
dput(hospital.dataset[1:2,])
structure(list(Date = structure(c(-714598, -714597), class = "Date"), [enter image description here][1]
Attendance = c(235, 209), Admissions = c(99, 97), Hospital = structure(c(1L,
1L), .Label = c("RoyalPerth Hospital", "Fremantle Hospital",
"Princess Margaret Hospital", "KingEdward Hospital", "SirCharles Hospital",
"Armadale Hospital", "Swan Hospital", "Rockingham Hospital",
"Joondalup Hospital"), class = "factor"), triage = c("Tri1",
"Tri1"), sum = c(8, 0)), row.names = 1:2, class = "data.frame")
Like this.
Thanks in advance.
Expected Dataframe
Note: This solution feels like a lot of effort. So please consider there may be more elegant approaches available.
One issue with this data is that the values you want "wide" (Attendance, Admissions) are mixed in with the values you want "long" (Tri1, Tri2, etc).
This solution uses pivot_longer on the entire data frame (note: pivot_longer is the new gather syntax) , and then separate to pull out the hospital name from the specific data field.
Then it splits into two data frames, applies pivot_wider to the Attendance/Admissions columns, and rejoins after that.
library(tidyverse)
final_data_long <- final.data.raw %>%
mutate_all(as.character) %>%
mutate(row_n = row_number()) %>%
pivot_longer(-row_n, names_to = "field", values_to = "value") %>%
separate(field, into = c("hospital", "category"))
attend_admit <- final_data_long %>%
filter(str_detect(category, "Attendance|Admissions"))
triage <- final_data_long %>% anti_join(attend_admit)
attend_admit_long <-
attend_admit %>%
group_by(row_n) %>%
pivot_wider(id_cols = c(row_n, hospital), names_from = category,
values_from = value)
triage %>%
inner_join(attend_admit_long, by = c("row_n", "hospital")) %>%
arrange(hospital) %>%
select(-row_n)
Output
# A tibble: 90 x 5
hospital category value Attendance Admissions
<chr> <chr> <chr> <chr> <chr>
1 Armadale Tri1 N/A 166 19
2 Armadale Tri2 16 166 19
3 Armadale Tri3 62 166 19
4 Armadale Tri4 79 166 19
5 Armadale Tri5 9 166 19
6 Armadale Tri1 N/A 175 25
7 Armadale Tri2 26 175 25
8 Armadale Tri3 73 175 25
9 Armadale Tri4 55 175 25
10 Armadale Tri5 19 175 25
# … with 80 more rows
Data
*I couldn't get OP's dput to work, here's a version that can be copy/pasted:
final.data.raw <- structure(
list(RoyalPerth.Attendance = c(235, 209), RoyalPerth.Admissions = c(99, 97), RoyalPerth.Tri1 = c("8", "N/A"),
RoyalPerth.Tri2 = c(33, 41), RoyalPerth.Tri3 = c(89, 73), RoyalPerth.Tri4 = c(85, 80),
RoyalPerth.Tri5 = c("20", "14"), Fremantle.Attendance = c(155, 145), Fremantle.Admissions = c(70, 56),
Fremantle.Tri1 = c("N/A", "N/A"), Fremantle.Tri2 = c(25, 22), Fremantle.Tri3 = c(67, 51),
Fremantle.Tri4 = c(54, 47), Fremantle.Tri5 = c(9, 24), PrincessMargaret.Attendance = c(252, 219),
PrincessMargaret.Admissions = c(59,47), PrincessMargaret.Tri1 = c("N/A", "N/A"), PrincessMargaret.Tri2 = c("13", "14"),
PrincessMargaret.Tri3 = c(75, 61), PrincessMargaret.Tri4 = c(159, 139), PrincessMargaret.Tri5 = c("4", "4"),
KingEdward.Attendance = c(52, 43), KingEdward.Admissions = c("6", "7"), KingEdward.Tri1 = c("N/A", "N/A"),
KingEdward.Tri2 = c("N/A", "N/A"), KingEdward.Tri3 = c("7", "N/A"), KingEdward.Tri4 = c(20, 25),
KingEdward.Tri5 = c("25", "17"), SirCharles.Attendance = c(209, 184), SirCharles.Admissions = c(109, 112),
SirCharles.Tri1 = c("N/A", "N/A"), SirCharles.Tri2 = c(42, 43), SirCharles.Tri3 = c(108, 73),
SirCharles.Tri4 = c(47, 61), SirCharles.Tri5 = c("11", "5"), Armadale.Attendance = c(166, 175),
Armadale.Admissions = c(19, 25), Armadale.Tri1 = c("N/A", "N/A"), Armadale.Tri2 = c(16, 26),
Armadale.Tri3 = c(62, 73), Armadale.Tri4 = c(79, 55), Armadale.Tri5 = c("9", "19"),
Swan.Attendance = c(133, 129), Swan.Admissions = c(17, 25), Swan.Tri1 = c("N/A", "N/A"),
Swan.Tri2 = c(29, 25), Swan.Tri3 = c(59, 57), Swan.Tri4 = c(42, 43),
Swan.Tri5 = c("N/A", "4"), Rockingham.Attendance = c(155, 145), Rockingham.Admissions = c("10", "24"),
Rockingham.Tri1 = c("N/A", "N/A"), Rockingham.Tri2 = c(12, 26), Rockingham.Tri3 = c(51, 45),
Rockingham.Tri4 = c(81, 65), Rockingham.Tri5 = c("11", "8"), Joondalup.Attendance = c(267, 241),
Joondalup.Admissions = c(73, 81), Joondalup.Tri1 = c("N/A", "N/A"), Joondalup.Tri2 = c(27, 23),
Joondalup.Tri3 = c(75, 78), Joondalup.Tri4 = c(151, 133), Joondalup.Tri5 = c("12", "7")),
row.names = 1:2, class = "data.frame")