Why hover in plotly barchart does not work? - r

I've got data like this ...
# rok miesiac ile kwartal miesiac2 kwartal2 miesiac3 limit serwis typ ile2 ile_proc lp
# (dbl) (dbl) (dbl) (dbl) (chr) (fctr) (chr) (dbl) (chr) (chr) (dbl) (dbl) (dbl)
# 1 2017 1 31.5 1 1 Q1 2017 Styczeń 0 Sport wizyty 32.5 97 1
# 2 2017 2 1.0 1 2 Q1 2017 Luty 0 Sport wizyty 32.5 3 1
... and I try to draw this plot from plotly library ...
plot_ly(tab,
x = ~lp,
y = ~ile,
color = ~miesiac2,
type = "bar",
text = ~miesiac3,
hoverinfo = "text")
... and everything is ok but hover. It does not work and I have no idea why. What is curious when I have the same format of data but a bit 'longer', everything works.
I have no idea where the problem is. I hope you do!
Simple data:
structure(list(rok = c(2017, 2017), miesiac = c(1, 2), ile = c(31.5,
1), kwartal = c(1, 1), miesiac2 = c("1", "2"), kwartal2 = structure(c(1L,
1L), .Label = "Q1 2017", class = "factor"), miesiac3 = c("Styczeń",
"Luty"), limit = c(97, 97), serwis = c("Sport", "Sport"), typ = c("wizyty",
"wizyty"), ile2 = c(32.5, 32.5), ile_proc = c(97, 3), lp = c(1,
1)), class = "data.frame", .Names = c("rok", "miesiac", "ile",
"kwartal", "miesiac2", "kwartal2", "miesiac3", "limit", "serwis",
"typ", "ile2", "ile_proc", "lp"), row.names = c(NA, -2L))
'Longer' data:
structure(list(rok = c(2016, 2016, 2016, 2016, 2016, 2016, 2016,
2016, 2016, 2017, 2017), miesiac = c(4, 5, 6, 7, 8, 9, 10, 11,
12, 1, 2), ile = c(80.1, 87.5, 159, 104, 125.3, 74.2, 84.9, 74.4,
75.3, 81.8, 2.4), kwartal = c(2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1
), miesiac2 = c("1", "2", "3", "1", "2", "3", "1", "2", "3",
"1", "2"), kwartal2 = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L, 4L, 4L), .Label = c("Q2 2016", "Q3 2016", "Q4 2016",
"Q1 2017"), class = "factor"), miesiac3 = c("Kwiecień", "Maj",
"Czerwiec", "Lipiec", "Sierpień", "Wrzesień", "Październik",
"Listopad", "Grudzień", "Styczeń", "Luty"), limit = c(308, 308,
308, 300, 300, 300, 245, 245, 245, 244, 244), serwis = c("Sport",
"Sport", "Sport", "Sport", "Sport", "Sport", "Sport", "Sport",
"Sport", "Sport", "Sport"), typ = c("odslony", "odslony", "odslony",
"odslony", "odslony", "odslony", "odslony", "odslony", "odslony",
"odslony", "odslony"), ile2 = c(326.6, 326.6, 326.6, 303.5, 303.5,
303.5, 234.6, 234.6, 234.6, 84.2, 84.2), ile_proc = c(25, 27,
49, 34, 41, 24, 36, 32, 32, 97, 3), lp = c(1, 1, 1, 2, 2, 2,
3, 3, 3, 4, 4)), class = "data.frame", .Names = c("rok", "miesiac",
"ile", "kwartal", "miesiac2", "kwartal2", "miesiac3", "limit",
"serwis", "typ", "ile2", "ile_proc", "lp"), row.names = c(NA,
-11L))

The plotting works for me, although I do get a warning
Warning message:
In RColorBrewer::brewer.pal(N, "Set2") :
minimal value for n is 3, returning requested palette with 3 different levels
This is just a warning and can in this case be ignored. For the curious, it originates from RColorBrewer and can be avoided by manually specifying the colors.
library(RColorBrewer)
# display.brewer.all() # see all the palettes
# generate colors beforehand - same warning. extract only first two
cols <- brewer.pal(n = 2, name = "Set2")[1:2]
plot_ly(xyshort,
x = ~lp,
y = ~ile,
color = ~miesiac2,
colors = cols, # explicitly name colors
type = "bar",
text = ~miesiac3,
hoverinfo = "text")
R 3.3.2 on Windows 7 and plotly_4.5.6.

Related

Using Map on Function if name of df lists match

I have given a
a namend vector col:
col <- c(id = "CLUSTER", x = "LONGNUM", y = "LATNUM", n = "Severely.stunted.child.under.5.years..Total", pos = "Severely.stunted.child.under.5.years.Yes")
#a List of Dataframes with the the Elements of col as Columns and namend after a specific study area. (see deput below) The List Results contains 19 different files (different years)
a list of shapefiles of with 6 Elements (corresponding countries):
study_area <- c("Ethiopia", "Liberia", "Malawi", "Rwanda", "Uganda", "Zimbabwe")
Countries <- lapply(study_area, function(x){gisco_get_countries(country= x, resolution = 60 )})
Countries <- lapply(Countries, function(x) {as_Spatial(x, cast = TRUE, IDs = c("CNTR_NAME", "ISO§_CODE", "CNRT_ID", "NAME_ENGL", "FID"))})
names(Countries) <- study_area
I would like to preform the function from the prevR Library :
s.prevR(Results[[1]], col, Countries[[1]])
But actually for every element in the lists where the names fit:
I tried something like: Map(function(x, y) { as.prevR(x, col, y)}, Results, Countries)
But there it does (obviously) not match by names of x and y
dput( dput(Results[[1]][1:5,1:24])
structure(list(CLUSTER = c("", "1", "10", "100", "101"), Severely.stunted.child.under.5.years.No = c(3438,
8, 7, 9, 6), Severely.stunted.child.under.5.years.Yes = c(1047,
4, NA, 7, 1), Severely.stunted.child.under.5.years..Total = c(4485,
12, 7, 16, 7), Stunted.child.under.5.years.No = c(2531, 2, 7,
7, 5), Stunted.child.under.5.years.Yes = c(1954, 10, NA, 9, 2
), Stunted.child.under.5.years..Total = c(4485, 12, 7, 16, 7),
Severely.wasted.child.under.5.years.No = c(4295, 11, 7, 16,
7), Severely.wasted.child.under.5.years.Yes = c(190, 1, NA,
NA, NA), Severely.wasted.child.under.5.years..Total = c(4485,
12, 7, 16, 7), Wasted.child.under.5.years.No = c(3957, 10,
7, 16, 6), Wasted.child.under.5.years.Yes = c(528, 2, NA,
NA, 1), Wasted.child.under.5.years..Total = c(4485, 12, 7,
16, 7), Severely.underweight.child.under.5.years.No = c(4028,
10, 7, 12, 7), Severely.underweight.child.under.5.years.Yes = c(457,
2, NA, 4, NA), Severely.underweight.child.under.5.years..Total = c(4485,
12, 7, 16, 7), Underweight.child.under.5.years.No = c(3185,
7, 7, 12, 5), Underweight.child.under.5.years.Yes = c(1300,
5, NA, 4, 2), Underweight.child.under.5.years..Total = c(4485,
12, 7, 16, 7), LATNUM = c(NA, 10.889096, 5.323272, 8.830199,
10.806748), LONGNUM = c(NA, 37.269565, 39.556812, 40.72964,
39.7703), SurveyId = c("ET2005DHS", "ET2005DHS", "ET2005DHS",
"ET2005DHS", "ET2005DHS"), DHSC = c("ET", "ET", "ET", "ET",
"ET"), Country = c("Ethiopia", "Ethiopia", "Ethiopia", "Ethiopia",
"Ethiopia")), row.names = c(NA, 5L), class = "data.frame")
and Countries
dput(Countries[[1]])
new("SpatialPolygonsDataFrame", data = structure(list(CNTR_NAME = "Federal Democratic Republic of Ethiopia",
ISO3_CODE = "ETH", CNTR_ID = "ET", NAME_ENGL = "Ethiopia",
FID = "ET"), class = "data.frame", row.names = 1L), polygons = list(
new("Polygons", Polygons = list(new("Polygon", labpt = c(39.6420582930584,
8.63562315843106), area = 93.13026982, hole = FALSE, ringDir = 1L,
coords = structure(c(41.6307, 42.4043, 41.816, 41.8348,
42.9681, 42.7628, 42.9804, 43.9589, 45.6126, 46.9411,
47.8524, 45.6126, 45.4747, 45.2923, 44.9162, 43.4741,
42.8138, 41.9101, 41.2328, 40.708, 39.9305, 39.5667,
38.9731, 38.1026, 36.9621, 35.9477, 35.8294, 35.3235,
35.0325, 34.9588, 34.5428, 33.7557, 33.0448, 33.2485,
33.8204, 34.0937, 34.1132, 34.4181, 34.8021, 35.2153,
35.6227, 36.1342, 36.5603, 37.2972, 37.5268, 37.9201,
38.5391, 39.0217, 40.0851, 40.8941, 41.6307, 13.3913,
12.4686, 11.6292, 11.0448, 10.9974, 10.7159, 10.0644,
9.0545, 8.4674, 8.0224, 7.9151, 5.5657, 5.4241, 5.2367,
4.9368, 4.7993, 4.301, 3.9823, 3.9616, 4.2326, 3.8858,
3.5224, 3.5158, 3.6459, 4.3833, 4.62, 5.2367, 5.413,
5.8494, 6.4537, 6.7418, 7.6074, 7.899, 8.381, 8.4168,
8.6026, 9.4986, 10.6735, 10.8052, 11.9187, 12.5064, 12.8315,
14.2577, 14.3876, 14.2588, 14.8128, 14.4413, 14.5899,
14.5456, 14.0891, 13.3913), dim = c(51L, 2L)))), plotOrder = 1L,
labpt = c(39.6420582930584, 8.63562315843106), ID = "1",
area = 93.13026982)), plotOrder = 1L, bbox = structure(c(33.0448,
3.5158, 47.8524, 14.8128), dim = c(2L, 2L), dimnames = list(c("x",
"y"), c("min", "max"))), proj4string = new("CRS", projargs = "+proj=longlat +datum=WGS84 +no_defs"))
If the Countries names are all in the Results names and if 'Results' have duplicates for names, then we can make the Countries to have the same length by replicating based on the names of the 'Results'
Map(function(x, y) { as.prevR(x, col, y)}, Results, Countries[names(Results)])

use of pivot_wider to plot the evolution of variables in R

I would like to plot the evolution of the number of workers per category ("A", "D", "F", "I"), from 2017 to 2021, with a stacked bar chart (with the labels in the middle of each bar, for each category), one bar per year. Yet my dataset isn't in the right way to do this, I think I need to use pivot_wider() or pivot_longer() from what I have seen here, but I don't really know how to manipulate these functions. Could anyone help ?
Here is the structure of my dataset, for reproducibility :
structure(list(A = c("10", "7", "8", "8", "9", "Total"), D = c(23,
14, 29, 35, 16, 117), F = c(8, 7, 11, 6, 6, 38), I = c(449, 498,
415, 470, 531, 2363), annee = c("2017", "2018", "2019", "2020",
"2021", NA)), core = structure(list(A = c("10", "7", "8", "8",
"9"), D = c(23, 14, 29, 35, 16), F = c(8, 7, 11, 6, 6), I = c(449,
498, 415, 470, 531)), class = "data.frame", row.names = c(NA,
-5L)), tabyl_type = "two_way", totals = "row", row.names = c(NA,
6L), class = c("tabyl", "data.frame"))
library(tidyverse)
library(ggrepel)
df <- structure(list(A = c("10", "7", "8", "8", "9", "Total"), D = c(
23,
14, 29, 35, 16, 117
), F = c(8, 7, 11, 6, 6, 38), I = c(
449, 498,
415, 470, 531, 2363
), annee = c(
"2017", "2018", "2019", "2020",
"2021", NA
)), core = structure(list(A = c(
"10", "7", "8", "8",
"9"
), D = c(23, 14, 29, 35, 16), F = c(8, 7, 11, 6, 6), I = c(
449,
498, 415, 470, 531
)), class = "data.frame", row.names = c(
NA,
-5L
)), tabyl_type = "two_way", totals = "row", row.names = c(
NA,
6L
), class = c("tabyl", "data.frame"))
df |>
filter(!is.na(annee)) |>
mutate(A = as.double(A)) |>
pivot_longer(-annee, names_to = "category") |>
ggplot(aes(annee, value, fill = category, label = value)) +
geom_col() +
geom_label_repel(position = position_stack(), max.overlaps = 20)
Created on 2022-08-08 by the reprex package (v2.0.1)
Once you remove the total row, and ensuring that A through I are numeric, you can pivot_longer and pass to ggplot() like this:
data %>%
filter(A!="Total") %>%
mutate(across(A:I, as.numeric)) %>%
pivot_longer(cols = -annee, names_to = "group", values_to = "ct") %>%
ggplot(aes(annee,ct,fill=group)) +
geom_col()
I did not add the category labels, since group I dominates each year; you might want to reconsider that visualization

Error with loops in R: Error "object not found"

I have panel data with years from 2005 to 2015 and sectors from 1 to 33 (excluding 2, 4 and 31). I would like to run some loops and save the output for each year-sector combination separately. This is my code:
for (i in 2005:2015){
ntm_data <-subset(ntm_data_wip, StartDate <=i & EndDate >i)
for(s in c(1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33)){
ntm_data <-subset(ntm_data, ISIC4==s)
# Once the data is loaded, I exclude NTM codes which are missing.
# I only need the reporter, NTM code and product codes (HS 6-digit codes).
ntm_data <- ntm_data[!is.na(ntm_data$ntmcode)&ntm_data$ntmcode!="",]
ntm_data <- ntm_data[,c("reporter", "ntmcode", "hs6")]
# I group the data by reporter, NTM and product code (hs6) and count the number of combinations in a new variable called count.
ntm_data <- ntm_data %>% group_by(reporter, ntmcode, hs6) %>%
summarise(count = n())
head(ntm_data)
# I prepare the regulatory matrix by creating a list of countries for which I want the regulatory distance. The
# regulatory matrix shows the distance between two countries and has as column and row names the ISO3 codes of the countries.
# As specified above, I am interested in having the analysis for all available countries.
avail_iso3s <- unique(ntm_data$reporter)
# I create an empty regulatory distance matrix. For column size I use the length of avail_iso3s and add 1 for the reporter column.
# I populate the column names with reporter and the ISO3 codes with the option dimnames.
regulatory_distance_matrix <- data.frame(matrix(vector(),0,length(avail_iso3s)+1,
dimnames = list(c(), c("reporter", avail_iso3s )
)),
stringsAsFactors=F)
#' Now I can move on to calculating the regulatory distance formula in page 3 of "DEEP REGIONAL INTEGRATION AND NON-TARIFF MEASURES:A METHODOLOGY FOR DATA ANALYSIS (2015)" .
#' As N is a constant, I start with calculating it outside of the loop
N <- ntm_data %>% group_by(ntmcode, hs6) %>% count()
N <- nrow(N)
# I now fill in the regulatory distance matrix with values
for (g in 1:length(avail_iso3s)){
country_a <- ntm_data[ntm_data$reporter==avail_iso3s[g],c("ntmcode", "hs6")]
country_a$country_a <- 1
regulatory_distance_matrix[g,"reporter"] <- avail_iso3s[g]
for (k in 1:length(avail_iso3s)){
if (!is.na(regulatory_distance_matrix[k,avail_iso3s[g]])){next }
country_b <- ntm_data[ntm_data$reporter==avail_iso3s[k],c("ntmcode", "hs6")]
country_b$country_b <- 1
merged <- merge(country_a, country_b, by=c("ntmcode", "hs6"), all = TRUE)
merged[is.na(merged)] <- 0
merged$abs_diff <- abs(merged$country_a-merged$country_b)
rd <- sum(merged$abs_diff)/N
regulatory_distance_matrix[g,avail_iso3s[k]] <- rd
}
}
# Now I fill in the missing values and create a Stata dta.file.
for (g in 1:length(avail_iso3s)){
for (k in 1:length(avail_iso3s)){
if (is.na(regulatory_distance_matrix[k,avail_iso3s[g]])){
regulatory_distance_matrix[k,avail_iso3s[g]] <- regulatory_distance_matrix[g,avail_iso3s[k]]
}
}
}
regulatory_distance_matrix$year <-i
regulatory_distance_matrix$ISIC4 <-s
write.dta(regulatory_distance_matrix, paste0("C:/Users/Utente/Desktop/Master's thesis/Thesis analysis/- RD construction/Binary sectoral RD/regulatory_distance_matrix_",i,"_",s,".dta"))
}
}
However, after the first file (regulatory_distance_matrix_",i,"_",s,".dta") is correctly created, I get the following error during the creation of the second file:
Error in eval(e, x, parent.frame()) : oggetto "ISIC4" non trovato
Does someone how to fix this issue?
Thanks in advance!
EDIT:
> dput(head(ntm_data_wip))
structure(list(reporter = c("TUR", "ARG", "BRA", "CHN", "USA",
"EUN"), Reporter_ISO_N = c("792", "032", "076", "156", "842",
"918"), hs6 = c("910610", "851679", "040221", "620449", "021012",
"284990"), ntmcode = c("B31", "A11", "B33", "B83", "A83", "B33"
), partner = c("TON", "WLD", "WLD", "IRN", "VAT", "WLD"), Partner_ISO_N = c("776",
"000", "000", "364", "336", "000"), nbr = c(1L, 1L, 1L, 1L, 2L,
1L), Year = c(2016L, 2014L, 2013L, 2016L, 2017L, 2011L), NTMNomenclature = c("M4",
"M4", "M4", "M4", "M4", "M4"), NomenCode = c("H4", "H4", "H4",
"H4", "H4", "H3"), Dataset_id = c(161L, 174L, 174L, 131L, 179L,
111L), ntm_1_digit = c("B", "A", "B", "B", "A", "B"), StartDate = c(2015L,
2006L, 2008L, 2011L, 1992L, 2009L), EndDate = c(9999L, 9999L,
9999L, 9999L, 9999L, 2011L), new_ISIC4 = c("32", "28", "10",
"13", "10", "19"), ISIC4 = c(32L, 28L, 10L, 13L, 10L, 19L)), datalabel = "", time.stamp = "31 Jul 2021 11:34", formats = c("%9s",
"%9s", "%9s", "%9s", "%9s", "%9s", "%9.0g", "%12.0g", "%9s",
"%9s", "%12.0g", "%9s", "%10.0g", "%10.0g", "%9s", "%10.0g"), types = c(3L,
3L, 6L, 4L, 3L, 3L, 65530L, 65529L, 2L, 3L, 65529L, 1L, 65529L,
65529L, 2L, 65530L), val.labels = structure(c("", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", ""), .Names = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "")), var.labels = c("",
"", "", "", "", "", "Number of NTM, distinct codes", "", "",
"", "", "", "(min) StartDate", "(max) EndDate", "", ""), version = 118L, label.table = list(), expansion.fields = list(
c("ISIC4", "destring", "Characters removed were:"), c("ISIC4",
"destring_cmd", "destring new_ISIC4, gen(ISIC4)")), byteorder = "LSF", orig.dim = c(6953474L,
16L), row.names = c(NA, 6L), class = "data.frame")
This is too long for a comment.
The issue is that the inner loop of for (s in c(...)) {...} uses ntm_data which overwrites itself within the inner loop. So in the first pass, ntm_data$ICIS4 is a column. Plus, since we are subsetting, we don’t want subsequent iterations having been filtered based on the previous s.
Here's a snippet of the top of the beginning code with a new object at the start of the outer loop.:
library(dplyr)
for (i in 2005:2015){
## CHANGED - make a different object for the inner loop to subset from
ntm_data_years <-subset(ntm_data_wip, StartDate <=i & EndDate >i)
for(s in c(1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33)){
ntm_data <-subset(ntm_data_years, ISIC4==s) ## CHANGED to subset(ntm_data_years, ...)
ntm_data <- ntm_data[!is.na(ntm_data$ntmcode)&ntm_data$ntmcode!="",]
ntm_data <- ntm_data[,c("reporter", "ntmcode", "hs6")] ## This is the line that removes ISIC4 from the data frame
...
}

Label group of plots

I merged nine plots together and I would like to group them based on different characteristics (A,B,C). Is there a simple way to add labels or annotations at the bottom of plots? When using cowplot or GridExtra i receive the following error:
In as_grob.default(plot) :
Cannot convert object of class list into a grob.
Sample data
list(list(stats = structure(c(43, 96.5, 297.5, 707.5, 778), .Dim = c(5L,
1L)), n = 36, conf = structure(c(136.603333333333, 458.396666666667
), .Dim = 2:1), out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(2, 10.5, 55.5, 102, 128), .Dim = c(5L,
1L)), n = 36, conf = structure(c(31.405, 79.595), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(1, 3, 5.5, 77, 88), .Dim = c(5L,
1L)), n = 36, conf = structure(c(-13.9866666666667, 24.9866666666667
), .Dim = 2:1), out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(531, 632.5, 701, 726.5, 786), .Dim = c(5L,
1L)), n = 36, conf = structure(c(676.246666666667, 725.753333333333
), .Dim = 2:1), out = c(485, 464, 446), group = c(1, 1, 1
), names = ""), list(stats = structure(c(104,
109.5, 113.5, 121, 125), .Dim = c(5L, 1L)), n = 36, conf = structure(c(110.471666666667,
116.528333333333), .Dim = 2:1), out = c(91, 91, 88, 84, 84,
79), group = c(1, 1, 1, 1, 1, 1), names = ""),
list(stats = structure(c(28, 53.5, 83.5, 88, 91), .Dim = c(5L,
1L)), n = 36, conf = structure(c(74.415, 92.585), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(80, 89, 102.5, 153, 236), .Dim = c(5L,
1L)), n = 36, conf = structure(c(85.6466666666667, 119.353333333333
), .Dim = 2:1), out = c(343, 318, 299, 257), group = c(1,
1, 1, 1), names = """"), list(stats = structure(c(7,
12, 22.5, 44, 72), .Dim = c(5L, 1L)), n = 36, conf = structure(c(14.0733333333333,
30.9266666666667), .Dim = 2:1), out = numeric(0), group = numeric(0),
names = ""), list(stats = structure(c(5,
5, 6, 12.5, 21), .Dim = c(5L, 1L)), n = 36, conf = structure(c(4.025,
7.975), .Dim = 2:1), out = numeric(0), group = numeric(0),
names = ""))
Many thanks
I agree with the idea of using ggplot2 graphics with facets, but given your plot objects, you could do something like this (to get you started). I used ggplotify instead of cowplot because I ran into trouble with the figure margins, but you might be able to fix that by changing the null device (not tested).
Edit:
Added individual labels and y axis labels, as well as outer margins. You might have to adjust some of that depending on the output size of your composite plot. This may show you how you could adjust those settings for individual plots. Still, using ggplot2 to generate the plots would make things quite a bit easier.
library(grid)
library(gridExtra)
library(ggplotify)
sdt <- list(list(stats = structure(c(43, 96.5, 297.5, 707.5, 778), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(136.603333333333, 458.396666666667), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(2, 10.5, 55.5, 102, 128), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(31.405, 79.595), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(1, 3, 5.5, 77, 88), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(-13.9866666666667, 24.9866666666667), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(531, 632.5, 701, 726.5, 786), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(676.246666666667, 725.753333333333), .Dim = 2:1),
out = c(485, 464, 446), group = c(1, 1, 1), names = ""),
list(stats = structure(c(104, 109.5, 113.5, 121, 125), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(110.471666666667, 116.528333333333), .Dim = 2:1),
out = c(91, 91, 88, 84, 84, 79), group = c(1, 1, 1, 1, 1, 1), names = ""),
list(stats = structure(c(28, 53.5, 83.5, 88, 91), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(74.415, 92.585), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(80, 89, 102.5, 153, 236), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(85.6466666666667, 119.353333333333), .Dim = 2:1),
out = c(343, 318, 299, 257), group = c(1,1, 1, 1), names = ""),
list(stats = structure(c(7, 12, 22.5, 44, 72), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(14.0733333333333, 30.9266666666667), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(5, 5, 6, 12.5, 21), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(4.025, 7.975), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""))
sublabels <- paste0(rep(LETTERS[1:3], each=3), 1:3)
gplts <- lapply(1:9, function(x) as.grob(function(y=sdt[[x]]) {
par(oma=c(0,3,0,3))
bxp(y, ylab="values", main=sublabels[x])}))
grid.arrange(rectGrob(gp=gpar(col="red")), rectGrob(gp=gpar(col="green")),
rectGrob(gp=gpar(col="yellow")), nrow=1, newpage =T)
vp <- viewport(.33/2,0.45, gp = gpar(col="red"))
grid.text("Group A",
y = .1, just = c("center", "bottom"),
gp = gpar(fontsize=20), vp = vp)
vp <- viewport(.5,.45, gp = gpar(col="green"))
grid.text("Group B",
y = .1, just = c("center", "bottom"),
gp = gpar(fontsize=20), vp = vp)
vp <- viewport(1-(.33/2),.45, gp = gpar(col="yellow"))
grid.text("Group C",
y = .1, just = c("center", "bottom"),
gp = gpar(fontsize=20), vp = vp)
grid.arrange(grobs=gplts, nrow=1, newpage=F)
Created on 2021-03-25 by the reprex package (v1.0.0)

Error "Attribute not identical" in using gather function for the multi column in R

I am using following Data frame.
df2<-final.data%>% gather(Hospital,Attendance,contains("Attendance"))
df2 %>% spread(Hospital, Attendance)
> dput(final.data[0:2,])
structure(list(RoyalPerth.Attendance = c(235, 209), RoyalPerth.Admissions = c(99,
97), RoyalPerth.Tri1 = c("8", "N/A"), RoyalPerth.Tri2 = c(33,
41), RoyalPerth.Tri3 = c(89, 73), RoyalPerth.Tri4 = c(85, 80),
RoyalPert
h.Tri5 = c("20", "14"), Fremantle.Attendance = c(155,
145), Fremantle.Admissions = c(70, 56), Fremantle.Tri1 = c("N/A",
"N/A"), Fremantle.Tri2 = c(25, 22), Fremantle.Tri3 = c(67,
51), Fremantle.Tri4 = c(54, 47), Fremantle.Tri5 = c(9, 24
), PrincessMargaret.Attendance = c(252, 219), PrincessMargaret.Admissions = c(59,
47), PrincessMargaret.Tri1 = c("N/A", "N/A"), PrincessMargaret.Tri2 = c("13",
"14"), PrincessMargaret.Tri3 = c(75, 61), PrincessMargaret.Tri4 = c(159,
139), PrincessMargaret.Tri5 = c("4", "4"), KingEdward.Attendance = c(52,
43), KingEdward.Admissions = c("6", "7"), KingEdward.Tri1 = c("N/A",
"N/A"), KingEdward.Tri2 = c("N/A", "N/A"), KingEdward.Tri3 = c("7",
"N/A"), KingEdward.Tri4 = c(20, 25), KingEdward.Tri5 = c("25",
"17"), SirCharles.Attendance = c(209, 184), SirCharles.Admissions = c(109,
112), SirCharles.Tri1 = c("N/A", "N/A"), SirCharles.Tri2 = c(42,
43), SirCharles.Tri3 = c(108, 73), SirCharles.Tri4 = c(47,
61), SirCharles.Tri5 = c("11", "5"), Armadale.Attendance = c(166,
175), Armadale.Admissions = c(19, 25), Armadale.Tri1 = c("N/A",
"N/A"), Armadale.Tri2 = c(16, 26), Armadale.Tri3 = c(62,
73), Armadale.Tri4 = c(79, 55), Armadale.Tri5 = c("9", "19"
), Swan.Attendance = c(133, 129), Swan.Admissions = c(17,
25), Swan.Tri1 = c("N/A", "N/A"), Swan.Tri2 = c(29, 25),
Swan.Tri3 = c(59, 57), Swan.Tri4 = c(42, 43), Swan.Tri5 = c("N/A",
"4"), Rockingham.Attendance = c(155, 145), Rockingham.Admissions = c("10",
"24"), Rockingham.Tri1 = c("N/A", "N/A"), Rockingham.Tri2 = c(12,
26), Rockingham.Tri3 = c(51, 45), Rockingham.Tri4 = c(81,
65), Rockingham.Tri5 = c("11", "8"), Joondalup.Attendance = c(267,
241), Joondalup.Admissions = c(73, 81), Joondalup.Tri1 = c("N/A",
"N/A"), Joondalup.Tri2 = c(27, 23), Joondalup.Tri3 = c(75,
78), Joondalup.Tri4 = c(151, 133), Joondalup.Tri5 = c("12",
"7")), row.names = 1:2, class = "data.frame")
Error:
Warning message:
attributes are not identical across measure variables;
they will be dropped
I have tried below things:
hospital.dataset<-gather(hospital,triage,sum,Tri1:Tri5) to gather Triage
after using cbind on the data set.
I want to covert it into long data set using gather.
dput(hospital.dataset[1:2,])
structure(list(Date = structure(c(-714598, -714597), class = "Date"), [enter image description here][1]
Attendance = c(235, 209), Admissions = c(99, 97), Hospital = structure(c(1L,
1L), .Label = c("RoyalPerth Hospital", "Fremantle Hospital",
"Princess Margaret Hospital", "KingEdward Hospital", "SirCharles Hospital",
"Armadale Hospital", "Swan Hospital", "Rockingham Hospital",
"Joondalup Hospital"), class = "factor"), triage = c("Tri1",
"Tri1"), sum = c(8, 0)), row.names = 1:2, class = "data.frame")
Like this.
Thanks in advance.
Expected Dataframe
Note: This solution feels like a lot of effort. So please consider there may be more elegant approaches available.
One issue with this data is that the values you want "wide" (Attendance, Admissions) are mixed in with the values you want "long" (Tri1, Tri2, etc).
This solution uses pivot_longer on the entire data frame (note: pivot_longer is the new gather syntax) , and then separate to pull out the hospital name from the specific data field.
Then it splits into two data frames, applies pivot_wider to the Attendance/Admissions columns, and rejoins after that.
library(tidyverse)
final_data_long <- final.data.raw %>%
mutate_all(as.character) %>%
mutate(row_n = row_number()) %>%
pivot_longer(-row_n, names_to = "field", values_to = "value") %>%
separate(field, into = c("hospital", "category"))
attend_admit <- final_data_long %>%
filter(str_detect(category, "Attendance|Admissions"))
triage <- final_data_long %>% anti_join(attend_admit)
attend_admit_long <-
attend_admit %>%
group_by(row_n) %>%
pivot_wider(id_cols = c(row_n, hospital), names_from = category,
values_from = value)
triage %>%
inner_join(attend_admit_long, by = c("row_n", "hospital")) %>%
arrange(hospital) %>%
select(-row_n)
Output
# A tibble: 90 x 5
hospital category value Attendance Admissions
<chr> <chr> <chr> <chr> <chr>
1 Armadale Tri1 N/A 166 19
2 Armadale Tri2 16 166 19
3 Armadale Tri3 62 166 19
4 Armadale Tri4 79 166 19
5 Armadale Tri5 9 166 19
6 Armadale Tri1 N/A 175 25
7 Armadale Tri2 26 175 25
8 Armadale Tri3 73 175 25
9 Armadale Tri4 55 175 25
10 Armadale Tri5 19 175 25
# … with 80 more rows
Data
*I couldn't get OP's dput to work, here's a version that can be copy/pasted:
final.data.raw <- structure(
list(RoyalPerth.Attendance = c(235, 209), RoyalPerth.Admissions = c(99, 97), RoyalPerth.Tri1 = c("8", "N/A"),
RoyalPerth.Tri2 = c(33, 41), RoyalPerth.Tri3 = c(89, 73), RoyalPerth.Tri4 = c(85, 80),
RoyalPerth.Tri5 = c("20", "14"), Fremantle.Attendance = c(155, 145), Fremantle.Admissions = c(70, 56),
Fremantle.Tri1 = c("N/A", "N/A"), Fremantle.Tri2 = c(25, 22), Fremantle.Tri3 = c(67, 51),
Fremantle.Tri4 = c(54, 47), Fremantle.Tri5 = c(9, 24), PrincessMargaret.Attendance = c(252, 219),
PrincessMargaret.Admissions = c(59,47), PrincessMargaret.Tri1 = c("N/A", "N/A"), PrincessMargaret.Tri2 = c("13", "14"),
PrincessMargaret.Tri3 = c(75, 61), PrincessMargaret.Tri4 = c(159, 139), PrincessMargaret.Tri5 = c("4", "4"),
KingEdward.Attendance = c(52, 43), KingEdward.Admissions = c("6", "7"), KingEdward.Tri1 = c("N/A", "N/A"),
KingEdward.Tri2 = c("N/A", "N/A"), KingEdward.Tri3 = c("7", "N/A"), KingEdward.Tri4 = c(20, 25),
KingEdward.Tri5 = c("25", "17"), SirCharles.Attendance = c(209, 184), SirCharles.Admissions = c(109, 112),
SirCharles.Tri1 = c("N/A", "N/A"), SirCharles.Tri2 = c(42, 43), SirCharles.Tri3 = c(108, 73),
SirCharles.Tri4 = c(47, 61), SirCharles.Tri5 = c("11", "5"), Armadale.Attendance = c(166, 175),
Armadale.Admissions = c(19, 25), Armadale.Tri1 = c("N/A", "N/A"), Armadale.Tri2 = c(16, 26),
Armadale.Tri3 = c(62, 73), Armadale.Tri4 = c(79, 55), Armadale.Tri5 = c("9", "19"),
Swan.Attendance = c(133, 129), Swan.Admissions = c(17, 25), Swan.Tri1 = c("N/A", "N/A"),
Swan.Tri2 = c(29, 25), Swan.Tri3 = c(59, 57), Swan.Tri4 = c(42, 43),
Swan.Tri5 = c("N/A", "4"), Rockingham.Attendance = c(155, 145), Rockingham.Admissions = c("10", "24"),
Rockingham.Tri1 = c("N/A", "N/A"), Rockingham.Tri2 = c(12, 26), Rockingham.Tri3 = c(51, 45),
Rockingham.Tri4 = c(81, 65), Rockingham.Tri5 = c("11", "8"), Joondalup.Attendance = c(267, 241),
Joondalup.Admissions = c(73, 81), Joondalup.Tri1 = c("N/A", "N/A"), Joondalup.Tri2 = c(27, 23),
Joondalup.Tri3 = c(75, 78), Joondalup.Tri4 = c(151, 133), Joondalup.Tri5 = c("12", "7")),
row.names = 1:2, class = "data.frame")

Resources