How to get conditional weighted means for several columns - r

For the following dataframe:
eu <- structure(list(land = structure(c(1L, 4L, 5L, 12L, 9L, 13L, 16L, 18L, 27L, 10L, 25L, 21L, 28L, 19L, 8L, 26L, 6L, 3L, 15L, 14L, 11L, 17L, 20L, 23L, 24L, 2L, 22L, 7L), .Label = c("Belgie", "Bulgarije", "Cyprus", "Denemarken", "Duitsland", "Estland", "Europese Unie", "Finland", "Frankrijk", "Griekenland", "Hongarije", "Ierland", "Italie", "Letland", "Litouwen", "Luxemburg", "Malta", "Nederland", "Oostenrijk", "Polen", "Portugal", "Roemenie", "Slovenie", "Slowakije", "Spanje", "Tsjechie", "Verenigd Koninkrijk", "Zweden"), class = "factor"), `1979` = c(91.36, 47.82, 65.73, 63.61, 60.71, 85.65, 88.91, 58.12, 32.35, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 61.99), `1981` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 81.48, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `1984` = c(92.09, 52.38, 56.76, 47.56, 56.72, 82.47, 88.79, 50.88, 32.57, 80.59, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 58.98), `1987` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 68.52, 72.42, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `1989` = c(90.73, 46.17, 62.28, 68.28, 48.8, 81.07, 87.39, 47.48, 36.37, 80.03, 54.71, 51.1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 58.41), `1994` = c(90.66, 52.92, 60.02, 43.98, 52.71, 73.6, 88.55, 35.69, 36.43, 73.18, 59.14, 35.54, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 56.67), `1995` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 41.63, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `1996` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 67.73, 57.6, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `1999` = c(91.05, 50.46, 45.19, 50.21, 46.76, 69.76, 87.27, 30.02, 24, 70.25, 63.05, 39.93, 38.84, 49.4, 30.14, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 49.51), `2004` = c(90.81, 47.89, 43, 58.58, 42.76, 71.72, 91.35, 39.26, 38.52, 63.22, 45.14, 38.6, 37.85, 42.43, 39.43, 28.3, 26.83, 72.5, 48.38, 41.34, 38.5, 82.39, 20.87, 28.35, 16.97, NA, NA, 45.47), `2007` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 29.22, 29.47, NA), `2009` = c(90.39, 59.54, 43.3, 58.64, 40.63, 65.05, 90.75, 36.75, 34.7, 52.61, 44.9, 36.78, 45.53, 45.97, 40.3, 28.2, 43.9, 59.4, 20.98, 53.7, 36.31, 78.79, 24.53, 28.33, 19.64, 38.99, 27.67, 43), inwoners = c(11161642, 5602628, 80523746, 4591087, 65578819, 59685227, 537039, 16779575, 63896071, 11062508, 46727890, 10487289, 9555893, 8451860, 5426674, 10516125, 1320174, 865878, 2971905, 2023825, 9908798, 421364, 38533299, 2058821, 5410836, 7284552, 20020074, 501403599), plicht = structure(c(1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("ja", "nee"), class = "factor")), .Names = c("land", "1979", "1981", "1984", "1987", "1989", "1994", "1995", "1996", "1999", "2004", "2007", "2009", "inwoners", "plicht"), row.names = c(NA, -28L), class = "data.frame")
I need conditional column means. I can do that with:
verplicht <- c("Europese Unie (stemplicht)", colMeans(eu[eu$plicht=="ja",c(2:13)], na.rm=TRUE), NA)
vrij <- c("Europese Unie (geen stemplicht)", colMeans(eu[eu$plicht=="nee",c(2:13)], na.rm=TRUE), NA)
eu2 <- rbind(eu, verplicht, vrij)
However, I need weighted column means with country population (the inwoners column) as the weights. I tried to that with:
verplicht <- c("Europese Unie (stemplicht)", lapply(eu[eu$plicht=="ja",c(2:13)], weighted.mean(x, eu[eu$plicht=="ja",14], na.rm=TRUE)), NA)
but that resulted in the following error:
Error in weighted.mean.default(x, eu[eu$plicht == "ja", 14], na.rm = TRUE) :
'x' and 'w' must have the same length
I understand what the error-message is saying, but don't know how to solve this. Any suggestions?

The problem is with how you're using lapply. Here's the correct code:
lapply(eu[eu$plicht=='ja',2:13], weighted.mean, eu[eu$plicht=='ja','inwoners'], na.rm=TRUE)
lapply(eu[eu$plicht=='nee',2:13], weighted.mean, eu[eu$plicht=='nee','inwoners'], na.rm=TRUE)
Notice how weighted.mean is used as an argument, rather than inside an anonymous function with x as an argument. You could equivalently do:
lapply(eu[eu$plicht=='ja',2:13], function(x) weighted.mean(x, eu[eu$plicht=='ja','inwoners'], na.rm=TRUE))
lapply(eu[eu$plicht=='nee',2:13], function(x) weighted.mean(x, eu[eu$plicht=='nee','inwoners'], na.rm=TRUE))
But you're currently kind of mixing the two different ways of using lapply.

If inwoners is the population, then
> (weights <- with(eu, inwoners/sum(inwoners)))
# [1] 0.0111303968 0.0055869443 0.0802983327 0.0045782350 0.0653952416
# [6] 0.0595181478 0.0005355356 0.0167326033 0.0637172042 0.0110315403
# [11] 0.0465970828 0.0104579315 0.0095291428 0.0084282004 0.0054114829
# [16] 0.0104866868 0.0013164784 0.0008634541 0.0029635856 0.0020181596
# [21] 0.0098810599 0.0004201845 0.0384254312 0.0020530577 0.0053956892
# [26] 0.0072641601 0.0199640310 0.5000000000
and the weighted mean of the 2004 column, for example, is
> weighted.mean(eu$`2004`, w = weights, na.rm = TRUE)
# [1] 45.31782
To get the weighted mean of each of the year columns for when plicht == 'ja',
> s <- subset(eu, plicht == "ja")
> w2 <- weights[as.numeric(rownames(s))]
> newDF <- do.call(rbind, lapply(2:13, function(i){
data.frame(wtMean.ja = weighted.mean(s[,i], w = w2, na.rm = TRUE))
}))
> rownames(newDF) <- names(s)[2:13]
> newDF
# wtMean.ja
# 1979 86.56735
# 1981 81.48000
# 1984 83.56127
# 1987 68.52000
# 1989 72.30636
# 1994 69.86950
# 1995 NaN
# 1996 NaN
# 1999 69.28708
# 2004 63.17060
# 2007 NaN
# 2009 58.99465

Related

Normalisation and simplification of a plot

I'd like to make a graph that captures the impact of the intervention. As you can see in the plot below, the y-axis of my figure has different scales (e.g. values), making comparison difficult. Is it possible to normalise the y axis (0 and 1) on both figures? Is it also possible to create a plot that shows the intervention rate per day? For Monday, for example, display a black line without SEDM and a red line with
Sample code:
df %>%
group_by(Day, Time = ceiling_date(as.POSIXct(Time), '10 minutes')) %>%
summarise(kW= mean(kW)) %>%
ungroup
df$Day<- factor(df$Day,levels = c( "Monday", "Tuesday","Wednesday","Thursday","Friday","Saturday", "Sunday"))
#as.POSIXct(df$Time,format="%H:%M")
ggplot(transform(df, Time = as.POSIXct(Time)),
aes(x = Time,y =kW, group=Day)) +
geom_smooth(aes(color=Day)) +
#geom_boxplot()+
labs(x="", y="kW", title="Monthly electricity consumption without SEDM (House 3)") +
scale_x_datetime(date_labels = '%H:%M', date_breaks = '2 hours',expand = expansion(mult = c(0, 0)))+
#facet_wrap(~Day, ncol=1 )+
theme_bw()+
theme(axis.text.x = element_text(angle = 90, hjust = 1,family="Times", face="bold", size=12, color="black"),
axis.title.x = element_text(family="Times", face="bold", size=16, color="black"),
axis.text.y = element_text(family="Times", face="bold", size=12, color="black"),
axis.title.y = element_text(family="Times", face="bold", size=16, color="black"),
strip.text = element_text(size=15, face="bold"),
plot.title = element_text(size=20, face="bold"))+
theme(legend.title = element_blank(),
legend.text = element_text(family="Times", color = "black", size = 16,face="bold"),
legend.position="top",
legend.box = "horizontal",
plot.title = element_text(hjust = 0.5))+
guides(fill = guide_legend(nrow = 1))
Plot:
Data:
#for the first plot (without SEDM) first 300
structure(list(structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("Monday", "Tuesday", "Wednesday", "Thursday",
"Friday", "Saturday", "Sunday"), class = "factor"), structure(c(41220,
41280, 41340, 41400, 41460, 41520, 41580, 41640, 41700, 41760,
41820, 41880, 41940, 42000, 42060, 42120, 42180, 42240, 42300,
42360, 42420, 42480, 42540, 42600, 42660, 42720, 42780, 42840,
42900, 42960, 43020, 43080, 43140, 43200, 43260, 43320, 43380,
43440, 43500, 43560, 43620, 43680, 43740, 43800, 43860, 43920,
43980, 44040, 44100, 44160, 44220, 44280, 44340, 44400, 44460,
44520, 44580, 44640, 44700, 44760, 44820, 44880, 44940, 45000,
45060, 45120, 45180, 45240, 45300, 45360, 45420, 45480, 45540,
45600, 45660, 45720, 45780, 45840, 45900, 45960, 46020, 46080,
46140, 46200, 46260, 46320, 46380, 46440, 46500, 46560, 46620,
46680, 46740, 46800, 46860, 46920, 46980, 47040, 47100, 47160,
47220, 47280, 47340, 47400, 47460, 47520, 47580, 47640, 47700,
47760, 47820, 47880, 47940, 48000, 48060, 48120, 48180, 48240,
48300, 48360, 48420, 48480, 48540, 48600, 48660, 48720, 48780,
48840, 48900, 48960, 49020, 49080, 49140, 49200, 49260, 49320,
49380, 49440, 49500, 49560, 49620, 49680, 49740, 49800, 49860,
49920, 49980, 50040, 50100, 50160, 50220, 50280, 50340, 50400,
50460, 50520, 50580, 50640, 50700, 50760, 50820, 50880, 50940,
51000, 51060, 51120, 51180, 51240, 51300, 51360, 51420, 51480,
51540, 51600, 51660, 51720, 51780, 51840, 51900, 51960, 52020,
52080, 52140, 52200, 52260, 52320, 52380, 52440, 52500, 52560,
52620, 52680, 52740, 52800, 52860, 52920, 52980, 53040, 53100,
53160, 53220, 53280, 53340, 53400, 53460, 53520, 53580, 53640,
53700, 53760, 53820, 53880, 53940, 54000, 54060, 54120, 54180,
54240, 54300, 54360, 54420, 54480, 54540, 54600, 54660, 54720,
54780, 54840, 54900, 54960, 55020, 55080, 55140, 55200, 55260,
55320, 55380, 55440, 55500, 55560, 55620, 55680, 55740, 55800,
55860, 55920, 55980, 56040, 56100, 56160, 56220, 56280, 56340,
56400, 56460, 56520, 56580, 56640, 56700, 56760, 56820, 56880,
56940, 57000, 57060, 57120, 57180, 57240, 57300, 57360, 57420,
57480, 57540, 57600, 57660, 57720, 57780, 57840, 57900, 57960,
58020, 58080, 58140, 58200, 58260, 58320, 58380, 58440, 58500,
58560, 58620, 58680, 58740, 58800, 58860, 58920, 58980, 59040,
59100, 59160), class = c("hms", "difftime"), units = "secs"),
c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2.951667, 41.055, 10.733333, 2.951667, 3.22, 4.83,
3.22, 3.22, 3.22, 2.951667, 3.488333, 13.416667, 3.22, 3.22,
13.416667, 43.738333, 3.22, 3.22, 3.22, 3.488333, 3.22, 3.22,
3.488333, 2.951667, 11.806667, 3.756667, 45.348333, 4.025,
4.025, 4.293333, 4.83, 4.83, 15.026667, 4.561667, 46.153333,
4.025, 4.025, 3.756667, 4.025, 4.293333, 44.275, 44.811667,
3.756667, 3.756667, 3.756667, 3.756667, 3.756667, 3.756667,
3.756667, 3.756667, 3.756667, 44.006667, 3.756667, 3.756667,
3.756667, 3.488333, 3.756667, 3.756667, 44.006667, 44.006667,
3.756667, 3.756667, 4.293333, 3.756667, 4.293333, 4.561667,
4.83, 4.025, 4.025, 24.418333, 4.561667, 4.025, 4.293333,
32.736667, 9.928333, 1.073333, 1.341667, 1.073333, 0.805,
20.93, 41.591667, 0.805, 0.805, 1.073333, 1.341667, 1.61,
2.415, 43.201667), c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("Day",
"Time", "kW", NA, NA, NA, NA, NA, NA), row.names = c(NA, 300L
), class = "data.frame")
#for the second plot (with SEDM) first 500
structure(list(Day = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L), .Label = c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"), class = "factor"),
Time = structure(c(43380, 43440, 43500, 43560, 43620, 43680,
43740, 43800, 43860, 43920, 65700, 65760, 65820, 65880, 65940,
66000, 66060, 66120, 66180, 66240, 66300, 66360, 66420, 66480,
66540, 66600, 66660, 66720, 66780, 66840, 66900, 66960, 67020,
67080, 67140, 67200, 67260, 67320, 67380, 67440, 67500, 67560,
67620, 67680, 67740, 67800, 67860, 67920, 67980, 68040, 68100,
68160, 68220, 68280, 68340, 68400, 68460, 68520, 68580, 68640,
68700, 68760, 68820, 68880, 68940, 69000, 69060, 69120, 69180,
69240, 69300, 69360, 69420, 69480, 69540, 69600, 69660, 69720,
69780, 69840, 69900, 69960, 70020, 70080, 70140, 70200, 70260,
70320, 70380, 70440, 70500, 70560, 70620, 70680, 70740, 70800,
70860, 70920, 70980, 71040, 71100, 71160, 71220, 71280, 71340,
71400, 71460, 71520, 71580, 71640, 71700, 71760, 71820, 71880,
71940, 72000, 72060, 72120, 72180, 72240, 72300, 72360, 72420,
72480, 72540, 72600, 72660, 72720, 72780, 72840, 72900, 72960,
73020, 73080, 73140, 73200, 73260, 73320, 73380, 73440, 73500,
73560, 73620, 73680, 73740, 73800, 73860, 73920, 73980, 74040,
74100, 74160, 74220, 74280, 74340, 74400, 74460, 74520, 74580,
74640, 74700, 74760, 74820, 74880, 74940, 75000, 75060, 75120,
75180, 75240, 75300, 75360, 75420, 75480, 75540, 75600, 75660,
75720, 75780, 75840, 75900, 75960, 76020, 76080, 76140, 76200,
76260, 76320, 76380, 76440, 76500, 76560, 76620, 76680, 76740,
76800, 76860, 76920, 76980, 77040, 77100, 77160, 77220, 77280,
77340, 77400, 77460, 77520, 77580, 77640, 77700, 77760, 77820,
77880, 77940, 78000, 78060, 78120, 78180, 78240, 78300, 78360,
78420, 78480, 78540, 78600, 78660, 78720, 78780, 78840, 78900,
78960, 79020, 79080, 79140, 79200, 79260, 79320, 79380, 79440,
79500, 79560, 79620, 79680, 79740, 79800, 79860, 79920, 79980,
80040, 80100, 80160, 80220, 80280, 80340, 80400, 80460, 80520,
80580, 80640, 80700, 80760, 80820, 80880, 80940, 81000, 81060,
81120, 81180, 81240, 81300, 81360, 81420, 81480, 81540, 81600,
81660, 81720, 81780, 81840, 81900, 81960, 82020, 82080, 82140,
82200, 82260, 82320, 82380, 82440, 82500, 82560, 82620, 82680,
82740, 82800, 82860, 82920, 82980, 83040), class = c("hms",
"difftime"), units = "secs"), kW = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 6.976667, 6.976667, 7.245, 7.245, 7.245, 7.245,
7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245,
7.245, 7.245, 7.245, 7.245, 7.245, 13.416667, 30.053333,
4.025, 4.025, 4.025, 4.025, 42.396667, 41.591667, 4.025,
4.025, 4.025, 4.025, 4.025, 4.025, 4.293333, 4.025, 4.025,
4.293333, 4.025, 4.025, 23.881667, 23.881667, 4.293333, 4.025,
4.293333, 42.665, 43.201667, 29.785, 4.293333, 16.636667,
4.025, 4.025, 4.025, 4.025, 4.025, 4.025, 4.293333, 5.903333,
5.635, 5.903333, 31.126667, 5.635, 5.635, 5.635, 5.635, 5.635,
5.903333, 5.635, 5.635, 5.635, 39.176667, 60.106667, 75.67,
75.67, 76.475, 30.321667, 6.976667, 7.781667, 7.781667, 39.713333,
39.713333, 39.713333, 31.663333, 31.663333, 31.663333, 18.515,
31.663333, 37.298333, 49.91, 59.301667, 24.15, 28.711667,
30.59, 35.956667, 38.64, 41.055, 24.955, 42.665, 52.325,
43.47, 31.395, 50.983333, 52.325, 40.518333, 44.543333, 50.178333,
41.591667, 50.715, 54.74, 50.983333, 65.741667, 48.031667,
41.591667, 50.715, 28.98, 5.366667, 7.781667, 28.98, 16.368333,
4.561667, 4.561667, 2.683333, 2.951667, 2.951667, 2.951667,
2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667,
2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667,
2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667,
2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667,
2.683333, 2.683333, 2.415, 2.415, 3.756667, 3.756667, 3.756667,
4.293333, 5.366667, 5.635, 5.366667, 5.366667, 4.83, 4.83,
4.561667, 3.22, 2.951667, 2.951667, 3.22, 3.22, 3.22, 3.22,
2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667,
2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667,
2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667,
2.951667, 2.951667, 2.415, 2.415, 2.415, 2.415, 2.146667,
2.415, 2.146667, 2.415, 2.415, 2.415, 2.146667, 2.415, 2.415,
2.415, 2.415, 2.951667, 2.415, 2.415, 2.415, 2.415, 2.415,
2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415,
2.415, 2.415, 2.415, 2.415, 2.415, 2.683333, 2.683333, 2.683333,
2.683333, 2.683333, 2.415, 2.683333, 3.488333, 2.951667,
3.756667, 3.756667, 3.488333, 3.488333, 4.025, 4.025, 2.415,
2.415, 1.878333, 1.878333, 2.146667, 2.415, 1.878333, 2.146667,
2.146667, 2.146667, 1.878333, 1.878333, 1.878333, 1.878333,
1.878333, 1.878333, 1.878333, 1.878333, 1.878333, 1.878333,
1.878333, 1.878333, 1.878333, 3.488333, 3.22, 2.951667, 2.683333,
2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667,
2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667,
2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.683333,
2.951667, 2.951667, 2.951667, 2.683333)), row.names = c(NA,
300L), class = "data.frame")
You could scale your "kW" variable with min and max like this:
library(ggplot2)
df$Day<- factor(df$Day,levels = c( "Monday", "Tuesday","Wednesday","Thursday","Friday","Saturday", "Sunday"))
#as.POSIXct(df$Time,format="%H:%M")
maxs <- max(df$kW)
mins <- min(df$kW)
df$kW <- scale(df$kW, center = mins, scale = maxs-mins)
ggplot(transform(df, Time = as.POSIXct(Time)),
aes(x = Time,y =kW, group=Day)) +
geom_smooth(aes(color=Day)) +
#geom_boxplot()+
labs(x="", y="kW", title="Monthly electricity consumption without SEDM (House 3)") +
scale_x_datetime(date_labels = '%H:%M', date_breaks = '2 hours',expand = expansion(mult = c(0, 0)))+
#facet_wrap(~Day, ncol=1 )+
theme_bw()+
theme(axis.text.x = element_text(angle = 90, hjust = 1,family="Times", face="bold", size=12, color="black"),
axis.title.x = element_text(family="Times", face="bold", size=16, color="black"),
axis.text.y = element_text(family="Times", face="bold", size=12, color="black"),
axis.title.y = element_text(family="Times", face="bold", size=16, color="black"),
strip.text = element_text(size=15, face="bold"),
plot.title = element_text(size=20, face="bold"))+
theme(legend.title = element_blank(),
legend.text = element_text(family="Times", color = "black", size = 16,face="bold"),
legend.position="top",
legend.box = "horizontal",
plot.title = element_text(hjust = 0.5))+
guides(fill = guide_legend(nrow = 1))
Output:

How to group multiple variables using r_bind and select? [duplicate]

This question already has answers here:
Reshaping data.frame from wide to long format
(8 answers)
Closed 12 months ago.
My data look like this:
dput(srkw.dat)
structure(list(year = c(1962L, 1976L, 1981L, 1981L, 1982L,
1987L, 1989L, 1990L, 1992L, 1992L, 1992L, 1994L, 1998L, 2003L,
2003L, 2003L, 2003L, 2004L, 2004L, 2004L, 2005L, 2005L, 2005L,
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2007L, 2007L, 2007L, 2008L, 2008L,
2008L, 2009L, 2009L, 2009L, 2010L, 2010L, 2010L, 2011L, 2011L,
2013L, 2013L, 2013L, 2014L, 2014L, 2014L, 2014L, 2014L, 2015L,
2015L, 2015L, 2015L, 2015L, 2016L, 2016L, 2016L, 2016L, 2016L,
2016L, 2017L, 2018L, 2018L, 2018L, 2019L, 2020L, 2020L, 2020L,
2020L), whaleid1 = c("Lx", "Lx", "Lx", "Lx", "Lx", "L5",
"Lx", "Kx", "L5", "Lx", "L21", "L5", "Lx", "Jx", "L5", "L7",
"Jx", "L21", "L54", "Lx", "Jx", "J27", "J16", "J11", "Jx",
"L5", "L5", "J30", "L95", "Lx", "Jx", "J35", "K40", "Lx",
"L12", "Jx", "L106", "L21", "Kx", "L83", "L83", "L57", "J31",
"J27", "J30", "L55", "L22", "K36", "Jx", "L72", "K21", NA,
"K16", "J16", "J35", "L72", "J36", "L22", "K22", "K22", "L77",
"J47", "Jx", "J40", "L119", "J2", "J35", "L103", "L77", "L87",
"J39", "J22", "J38", "L115", "J31", NA, "J37", "Lx"), whalesex1 = c(NA,
0L, NA, NA, 0L, 0L, NA, NA, 0L, 0L, 0L, 0L, 1L, NA, 0L, 0L,
NA, 0L, 0L, NA, NA, 1L, 0L, 0L, NA, 0L, 0L, 1L, 1L, NA, NA,
0L, 0L, NA, 0L, NA, 1L, 0L, NA, 0L, 0L, 1L, 0L, 1L, 1L, 0L,
0L, 0L, NA, 0L, 1L, NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, NA, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, NA,
0L, NA), whaleage1 = c(NA, NA, NA, NA, NA, 23L, NA, NA, 28L,
NA, 42L, 30L, NA, NA, 39L, 42L, NA, 54L, 27L, NA, NA, 14L,
33L, 33L, NA, 41L, 41L, 10L, 9L, NA, NA, 8L, 43L, NA, 73L,
NA, 2L, 57L, NA, 18L, 18L, 31L, 14L, 18L, 14L, 33L, 39L,
7L, NA, 25L, 27L, NA, 28L, 42L, 16L, 28L, 15L, 43L, 28L,
28L, 28L, 5L, NA, 12L, 4L, 60L, 18L, 13L, 29L, 24L, 15L,
23L, 15L, 9L, 25L, NA, 19L, NA), whaleid2 = c("Lx", "Lx",
"Lx", "Lx", "Lx", "L58", "Lx", "Kx", "L58", "Lx", "Lx", "L58",
NA, "Jx", "L73", "L57", "Jx", "L47", "L100", "Lx", NA, "J31",
"J26", "J27", "Jx", "L73", "L73", "J31", "L72", "Lx", "Jx",
"J37", "K16", "Lx", "L41", NA, "L41", "L26", NA, "L110",
"L110", NA, "J36", "J31", "Kx", NA, "L94", "K42", NA, "Kx",
"K25", NA, NA, "J26", "J47", "L105", "J41", "L94", NA, "K33",
"L119", NA, NA, NA, NA, NA, "J47", "L109", "L119", "Jx",
NA, "J40", NA, NA, "J56", NA, "Jx", "Lx"), whalesex2 = c(NA,
NA, NA, NA, NA, 1L, NA, NA, 1L, 0L, NA, 1L, NA, NA, 1L, 1L,
NA, 0L, 1L, NA, NA, 0L, 1L, 1L, NA, 1L, 1L, 0L, 0L, NA, NA,
0L, 0L, NA, 1L, NA, 1L, 0L, NA, 1L, 1L, NA, 0L, 0L, NA, NA,
0L, 1L, NA, NA, 1L, NA, NA, 1L, 1L, 1L, 0L, 0L, NA, 1L, 0L,
NA, NA, NA, NA, NA, 1L, 1L, 0L, NA, NA, 0L, NA, NA, 0L, NA,
NA, NA), whaleage2 = c(NA, NA, NA, NA, NA, 7L, NA, NA, 12L,
NA, NA, 14L, NA, NA, 17L, 26L, NA, 30L, 3L, NA, NA, 10L,
14L, 14L, NA, 19L, 19L, 10L, 19L, NA, NA, 5L, 21L, NA, 29L,
NA, 30L, 51L, NA, 1L, 1L, NA, 10L, 14L, NA, NA, 15L, 2L,
NA, NA, 22L, NA, NA, 23L, 4L, 10L, 9L, 19L, NA, 14L, 3L,
NA, NA, NA, NA, NA, 6L, 9L, 4L, NA, NA, 14L, NA, NA, 1L,
NA, NA, NA), whaleid3 = c(NA, NA, NA, NA, "Lx", "L73", "Lx",
"Kx", "L73", "Lx", NA, "L73", NA, "Jx", NA, NA, NA, "Lx",
NA, NA, NA, "Jx", "J36", "J39", NA, "L67", "L67", NA, NA,
NA, NA, "J40", "K35", "Lx", "L77", NA, "L57", "L47", NA,
"L47", "L91", NA, "J39", "J39", NA, NA, "L113", NA, NA, NA,
"K26", NA, NA, "J36", NA, NA, "J42", "L113", NA, NA, NA,
NA, NA, NA, NA, NA, "L87", "L123", "L113", "Jx", NA, "J49",
NA, NA, "J36", NA, NA, "Lx"), whalesex3 = c(NA, NA, NA, NA,
0L, 1L, NA, NA, 1L, NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0L, 1L, NA, 0L, 0L, NA, NA, NA, NA, 0L, 1L, NA,
0L, NA, 1L, 0L, NA, 0L, 0L, NA, 1L, 1L, NA, NA, 0L, NA, NA,
NA, 1L, NA, NA, 0L, NA, NA, 0L, 0L, NA, NA, NA, NA, NA, NA,
NA, NA, 1L, 1L, 0L, 0L, NA, 1L, NA, NA, 0L, NA, NA, NA),
whaleage3 = c(NA, NA, NA, NA, NA, 1L, NA, NA, 6L, NA, NA,
8L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 6L, 2L, NA, 20L,
20L, NA, NA, NA, NA, 2L, 4L, NA, 19L, NA, 30L, 33L, NA, 34L,
13L, NA, 6L, 6L, NA, NA, 1L, NA, NA, NA, 20L, NA, NA, 15L,
NA, NA, 7L, 5L, NA, NA, NA, NA, NA, NA, NA, NA, 24L, 1L,
7L, NA, NA, 6L, NA, NA, 21L, NA, NA, NA), whaleid4 = c(NA,
NA, NA, NA, "Lx", NA, "Lx", "Kx", NA, "Lx", NA, NA, NA, "Jx",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "L101", "L101",
NA, NA, NA, NA, NA, NA, "Lx", "L94", NA, "L82", "Lx", NA,
"L111", NA, NA, "Kx", NA, NA, NA, NA, NA, NA, NA, "K35",
NA, NA, "J42", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, "Jx", NA, NA, NA, NA, "J47", NA, NA, "Lx"
), whalesex4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1L, 1L, NA, NA, NA, NA, NA, NA, NA, 0L, NA, 0L, NA, NA, 0L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, 0L, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 1L, NA, NA, NA), whaleage4 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 3L, 3L, NA, NA, NA, NA, NA, NA,
NA, 11L, NA, 17L, NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 11L, NA, NA, 7L, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 10L,
NA, NA, NA), whaleid5 = c(NA, NA, NA, NA, NA, NA, "Lx", "Kx",
NA, NA, NA, NA, NA, "Jx", NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "L86",
NA, NA, "L91", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "Jx", NA, NA, NA, NA, "J49", NA, NA, NA), whalesex5 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 0L, NA, NA, 0L, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA,
NA, NA), whaleage5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 16L, NA,
NA, 13L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 8L, NA, NA, NA), whaleid6 = c(NA,
NA, NA, NA, NA, NA, "Lx", "Kx", NA, NA, NA, NA, NA, "Jx",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, "L95", NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Jx", NA, NA,
NA, NA, NA, NA, NA, NA), whalesex6 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), whaleage6 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 11L, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), whaleid7 = c(NA, NA, NA, NA, NA, NA, "Lx", "Kx",
NA, NA, NA, NA, NA, "Jx", NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "L77",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, "Jx", NA, NA, NA, NA, NA, NA, NA, NA), whalesex7 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA), whaleage7 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 20L, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), whaleid8 = c(NA,
NA, NA, NA, NA, NA, "Lx", "Kx", NA, NA, NA, NA, NA, "Jx",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, "L94", NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Jx", NA, NA,
NA, NA, NA, NA, NA, NA), whalesex8 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 0L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), whaleage8 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 12L, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), event.id = 1:78), row.names = c(NA, -78L), class = "data.frame")
I want to use r_bind to group by whale IDs, while ascribing each whale's respective age and sex to its ID. I got this far:
events.by.id =
rbind(
srkw.dat %>%
select(event.id, year, id = whaleid1),
srkw.dat %>%
select(event.id, year, id = whaleid2),
srkw.dat %>%
select(event.id, year, id = whaleid3),
srkw.dat %>%
select(event.id, year, id = whaleid4),
srkw.dat %>%
select(event.id, year, id = whaleid5),
srkw.dat %>%
select(event.id, year, id = whaleid6),
srkw.dat %>%
select(event.id, year, id = whaleid7)
) %>%
filter(!is.na(id))
But I wasn't sure what extra syntax is needed to link each whale's age and sex to its ID?
Additionally, after that is done, is there an elegant way to selectively remove the whale IDs (and their ascribed sexes/ages) that are Jx, Lx, Kx, etc?
Thanks so much!
I think it would help if you get data in long format using pivot_longer with id, sex and age as separate columns. This might be simpler than your attempt.
res <- tidyr::pivot_longer(srkw.dat,
cols = -year,
names_to = c('.value', 'num'),
names_pattern = 'whale(.*?)(\\d+)',
values_drop_na = TRUE)
res
# A tibble: 211 x 5
# year num id sex age
# <int> <chr> <chr> <int> <int>
# 1 1962 1 Lx NA NA
# 2 1962 2 Lx NA NA
# 3 1976 1 Lx 0 NA
# 4 1976 2 Lx NA NA
# 5 1981 1 Lx NA NA
# 6 1981 2 Lx NA NA
# 7 1981 1 Lx NA NA
# 8 1981 2 Lx NA NA
# 9 1982 1 Lx 0 NA
#10 1982 2 Lx NA NA
# … with 201 more rows
It would be easy now to filter data with res. For example, to drop id's c('Jx', 'Lx', 'Kx') you may do -
res %>% filter(!id %in% c('Jx', 'Lx', 'Kx'))
What you want to do is pivot your data. You've actually kind of developed your own version of tidyr::pivot_longer() -- but it's easier with Hadley's version.
To your second question, you can use stringr::str_ends() to filter by ids ending in "x".
library(tidyverse)
srkw.dat_long <- srkw.dat %>%
pivot_longer(
cols = starts_with("whale"),
names_to = c(".value", NA),
names_pattern = "whale(\\D+)(\\d+)"
) %>%
filter(!(is.na(id) | str_ends(id, "x")))
# # A tibble: 132 x 5
# year event.id id sex age
# <int> <int> <chr> <int> <int>
# 1 1987 6 L5 0 23
# 2 1987 6 L58 1 7
# 3 1987 6 L73 1 1
# 4 1992 9 L5 0 28
# 5 1992 9 L58 1 12
# 6 1992 9 L73 1 6
# 7 1992 11 L21 0 42
# 8 1994 12 L5 0 30
# 9 1994 12 L58 1 14
# 10 1994 12 L73 1 8
# # ... with 122 more rows

R Problems with glm-model due to missing values

I have problems with putting my data into a glm model. I think the problem is because I have many missing values in my data (below). I tried this so far:
baseformula = as.formula(df)
glm(baseformula, data = df, family = poisson(link = "log"), na.action = na.exclude)
I am getting an Error:
Error in glm.fit(x = numeric(0), y = integer(0), weights = NULL, start
= NULL, : object 'fit' not found
Can somebody help me with this? When a variable is NA in my formula, I just want the glm to ignore the NAs and use these variables the same as variables without NA.
structure(list(V1 = c(0L, 1L, 3L, 0L, 0L, 0L, 2L, 0L, 1L, 1L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 2L, 0L, 0L, 0L, 0L,
0L, 2L, 0L, 0L, 1L, 5L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 3L, 0L, 1L,
0L), V48 = c(97.33, 96.88, 85.33, 83.75, 75.58, 86.13, 83, 95.75,
88.46, 80.25, 75, 67.17, 69.33, 64.08, 70.75, 78.46, 85.58, 83.42,
96.17, 76.5, 76.42, 65.38, 69.79, 68.38, 84.67, 89.67, 91.29,
80.54, 64.63, 72.29, 76.54, 65.33, 96.92, 91.38, 88.92, 80.63,
85.5, 76.38, 76.21, 78.29, 89.29, 87.04, 78.67), V49 = c(-0.9,
-0.1, 0, 0.9, -0.2, -6.3, -4.9, -1.2, -0.3, -1.4, 7.3, 10.5,
10.8, 17.5, 10.8, 9.2, 7.3, 8.2, 10.2, 8.5, 10.4, 25.6, 26.7,
28, 20.1, 20.2, 15.7, 15.3, 21.6, 24.8, 22.4, 27.1, 14.3, 13.8,
17.1, 19.5, 22.9, 21.9, 17.2, 18.9, 16.3, 14.2, 18.5), V58 = c(0.16208333,
-0.02576069, -0.24859501, -0.39733779, -0.35568168, -0.13908246,
-0.11529523, -0.07094469, 0.07592036, 0.13803538, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), V59 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 0.40727943, 0.44007391, 0.50582446, 0.59001139,
0.55057958, 0.53888617, 0.55019019, 0.42592698, 0.347516, 0.52019593,
0.69611622, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), V61 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 0.04555282, 0.16109391, 0.13651381, -0.02339007,
-0.24799358, -0.14477839, -0.0845835, -0.13505766, -0.06910931,
0.05876354, 0.11372484, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), V68 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.01575957,
-0.19924471, -0.39083879, -0.26620543, -0.10669409, -0.05650572,
0.06644096, 0.24769837, -0.11404654, -0.49358358, -0.27725445,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), V71 = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, -0.1563703, -0.23797044, -0.37304736, -0.27425744,
-0.02347071, 0.36391633, 0.44316418, 0.21940339, 0.02321926,
-0.01531807, -0.05197635, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), V73 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.46298985,
-0.7644245, -0.82771396, -0.81243484, -0.75591058, -0.55440085,
-0.35516327, -0.05602486, -0.12290976, -0.14458255, -0.17033091
), V77 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, -0.04571093, 0.25592819, 0.35649173, 0.3507695, 0.30446594,
0.36505183, 0.54215354, 0.47808018, 0.40325075, 0.32091592, 0.09212919
)), .Names = c("V1", "V48", "V49", "V58", "V59", "V61", "V68",
"V71", "V73", "V77"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L,
70L, 152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L, 160L, 161L,
162L, 244L, 245L, 246L, 247L, 248L, 249L, 250L, 251L, 252L, 253L,
254L), class = "data.frame")

ggplot - bar chart with multiple y-variables

I'm trying to create a bar chart with three y-variables (Total_us_received, Total_us_required, Total_us_received_from.CERF) using ggplot2. All three y-variables are measured on the same scale (US$).
This far I've created a bar chart with Total_us_received as the y-variables and Disaster_category as the x-variable using this code:
ggplot(Template.2006.2017.text,
aes(Disaster_category, y=Total_US_received)) +
geom_bar(stat ="identity", fill="lightblue") +
coord_flip()
However, every attempt I've made to include the other two y-variables into the graph have failed. How can I include the other two variables into the graph?
A follow-up questions: Can I make the graph show the mean of every category of the x-variable (Disaster_subtype) without NA:s instead of the total sum?
Here's my data in dput (compressed version):
structure(list(Disaster_category = structure(c(1L, 15L, 17L,
15L, 5L, 8L, 13L, 8L, 2L, 8L, 2L, 3L, 8L, 2L, 8L, 2L, 10L, 5L,
7L, 8L, 15L, 2L, 8L, 2L, 15L, 15L, 8L, 15L, 2L, 17L, 2L, 7L,
2L, 8L, 2L, 3L, 2L, 8L, 8L, 2L, 8L, 17L, 2L, 3L, 8L, 8L, 2L,
8L, 8L, 8L, 2L, 8L, 3L, 2L, 3L, 2L, 8L, 2L, 3L, 8L, 2L, 8L, 2L,
15L, 5L, 8L, 13L, 8L, 15L, 2L, 8L, 2L, 3L, 2L, 3L, 15L, 8L, 3L,
2L, 3L, 8L, 2L, 3L, 2L, 8L, 2L, 8L, 15L, 2L, 8L, 8L, 5L, 2L,
8L, 2L, 3L, 2L, 17L, 2L, 17L, 2L, 4L, 5L, 8L, 8L, 2L, 8L, 15L,
2L, 15L, 15L, 7L, 2L, 8L, 2L, 15L, 15L, 7L, 8L, 17L, 2L, 15L,
8L, 2L, 17L, 2L, 3L, 8L, 2L, 5L, 2L, 8L, 2L, 8L, 8L, 15L, 2L,
8L, 2L, 15L, 8L, 2L, 15L, 8L, 7L, 8L, 15L, 2L, 8L, 8L, 7L, 13L,
8L, 2L, 8L, 2L, 8L, 8L, 3L, 2L, 13L, 2L, 3L, 8L, 2L, 15L, 15L,
8L, 15L, 2L, 5L, 3L, 3L, 8L, 3L, 2L, 8L, 8L, 3L, 2L, 8L, 2L,
15L, 2L, 17L, 2L, 5L, 2L, 8L, 2L, 15L, 2L, 3L, 8L, 8L, 2L, 8L,
8L, 2L, 3L), .Label = c("", " ", "Disease", "Disease related disaster",
"Drought", "Drought & storm", "Extreme temperature / fire", "Flood",
"Flood & drought", "Insect infestation", "Insect infestation & drought",
"Landslide & flood", "Landslide / mudslide", "Other", "Storm",
"Storm & flood", "Winter"), class = "factor"), Total_US_received_from.CERF = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 678307.8333,
678307.8333, 678307.8333, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1110469.5, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 1905355, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2493246,
2493246, 2493246, 2493246, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 333333.3333, 333333.3333, 333333.3333, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 9365420,
NA, NA, 14321419, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
Total_US_received = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 15507224.5, 15507224.5, 15507224.5,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 333333.3333, 333333.3333,
333333.3333, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), Total_US_required = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 20502064.83, 20502064.83, 20502064.83, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 3070192, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 49955895.25, 49955895.25, 49955895.25, 49955895.25,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 333333.3333,
333333.3333, 333333.3333, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA,
200L), class = "data.frame")
You can transform you data into long format and then plot them:
library(tidyr)
library(ggplot2)
my_data %>%
gather(Total_US_category, Total_US, Total_US_received, Total_US_required, Total_US_received_from.CERF) %>%
ggplot(aes(Disaster_category, y = Total_US, fill = Total_US_category)) +
geom_col(position = position_dodge()) +
coord_flip()
If you want to plot the mean-value per disaster you can first summarize the data with dplyr:
library(dplyr)
my_data_sum <- my_data %>%
gather(Total_US_category, Total_US, Total_US_received, Total_US_required, Total_US_received_from.CERF) %>%
group_by(Disaster_category, Total_US_category) %>%
summarize(Total_US_mean = mean(Total_US, na.rm = T))
my_data_sum
# A tibble: 33 x 3
# Groups: Disaster_category [?]
# Disaster_category Total_US_category Total_US_mean
# <fct> <chr> <dbl>
# 1 "" Total_US_received NaN
# 2 "" Total_US_received_from.CERF NaN
# 3 "" Total_US_required NaN
# 4 " " Total_US_received NaN
# 5 " " Total_US_received_from.CERF NaN
# 6 " " Total_US_required NaN
# 7 Disease Total_US_received NaN
# 8 Disease Total_US_received_from.CERF NaN
# 9 Disease Total_US_required NaN
# 10 Disease related disaster Total_US_received NaN
# ... with 23 more rows
And then plot the data:
ggplot(my_data_sum, aes(Disaster_category, y = Total_US_mean, fill = Total_US_category)) +
geom_col(position = position_dodge()) +
coord_flip()

How to short the length of dput

In the last question I did they pointed out that less data would be easy to read and understand as part of the reproducible example. On the way to asking again I tried to shorten the data via dput(head(data)) but I get the same as if I do dput(data) or dput(data[1:6, ]) or even dput(data)[1:6, ] (in this last case I get also the 6 first rows of the data after the whole dput)
Is there a simple way to do it? At the dput options I didn't find anything and there must be a solution to avoid deleting by hand what I do not want to show.
Here is the whole dput data:
>dput(data)
structure(list(GOterm = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L,
47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L,
60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 71L, 72L, 76L, 77L, 78L,
83L, 87L, 88L, 89L, 93L, 96L, 97L, 101L, 103L, 104L, 105L, 106L,
109L, 111L, 113L, 114L, 116L), .Label = c("GO:0000746", "GO:0000910",
"GO:0006091", "GO:0006259", "GO:0006351", "GO:0006399", "GO:0006412",
"GO:0006457", "GO:0006464", "GO:0006468", "GO:0006486", "GO:0006520",
"GO:0006725", "GO:0006766", "GO:0006810", "GO:0006811", "GO:0006839",
"GO:0006897", "GO:0006950", "GO:0006970", "GO:0006974", "GO:0006979",
"GO:0006986", "GO:0006997", "GO:0007005", "GO:0007010", "GO:0007029",
"GO:0007031", "GO:0007033", "GO:0007034", "GO:0007049", "GO:0007059",
"GO:0007114", "GO:0007124", "GO:0007126", "GO:0007165", "GO:0009408",
"GO:0009409", "GO:0015031", "GO:0016044", "GO:0016050", "GO:0016070",
"GO:0016071", "GO:0016072", "GO:0016192", "GO:0016567", "GO:0016568",
"GO:0016570", "GO:0019725", "GO:0030435", "GO:0031505", "GO:0032196",
"GO:0032989", "GO:0042221", "GO:0042254", "GO:0042594", "GO:0043543",
"GO:0044255", "GO:0044257", "GO:0044262", "GO:0045333", "GO:0046483",
"GO:0048193", "GO:0051169", "GO:0051186", "GO:0051276", "GO:0070271",
"GO:0000278", "GO:0000902", "GO:0002181", "GO:0005975", "GO:0006325",
"GO:0006353", "GO:0006360", "GO:0006366", "GO:0006383", "GO:0006397",
"GO:0006401", "GO:0006414", "GO:0006418", "GO:0006470", "GO:0006605",
"GO:0006629", "GO:0006865", "GO:0006869", "GO:0006873", "GO:0006887",
"GO:0006914", "GO:0008033", "GO:0008213", "GO:0008643", "GO:0009311",
"GO:0009451", "GO:0015931", "GO:0016197", "GO:0023052", "GO:0031399",
"GO:0032543", "GO:0042255", "GO:0042273", "GO:0042274", "GO:0043144",
"GO:0043934", "GO:0045454", "GO:0051052", "GO:0051321", "GO:0051603",
"GO:0051604", "GO:0051726", "GO:0055086", "GO:0070647", "GO:0000054",
"GO:0001403", "GO:0006352", "GO:0006354", "GO:0006364", "GO:0006413",
"GO:0006417", "GO:0006497", "GO:0008380", "GO:0009072", "GO:0051049",
"GO:0061025", "GO:0071554"), class = "factor"), GOdesc = structure(c(16L,
17L, 23L, 19L, 58L, 62L, 59L, 37L, 39L, 40L, 38L, 3L, 4L, 67L,
60L, 27L, 30L, 20L, 51L, 48L, 46L, 49L, 52L, 33L, 29L, 18L, 21L,
34L, 64L, 63L, 2L, 14L, 1L, 43L, 28L, 56L, 47L, 45L, 41L, 9L,
65L, 54L, 31L, 55L, 66L, 42L, 12L, 26L, 7L, 57L, 22L, 61L, 6L,
44L, 53L, 50L, 35L, 8L, 10L, 5L, 11L, 25L, 24L, 32L, 15L, 13L,
36L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), .Label = c("cell budding", "cell cycle",
"cellular amino acid and metabolic process", "cellular aromatic compound metabolic process",
"cellular carbohydrate metabolic process", "cellular component morphogenesis",
"cellular homeostasis", "cellular lipid metabolic process", "cellular membrane organization",
"cellular protein catabolic process", "cellular respiration",
"chromatin modification", "chromosome organization and biogenesis",
"chromosome segregation", "cofactor metabolic process", "conjugation",
"cytokinesis", "cytoskeleton organization and biogenesis", "DNA metabolic process",
"endocytosis", "ER organization and biogenesis", "fungal-type cell wall organization",
"generation of precursor metabolites and energy", "golgi vesicle transport",
"heterocycle metabolic process", "histone modification", "ion transport",
"meiosis", "mitchondrion organization", "mitochondrial transport",
"mRNA metabolic process", "nuclear transport", "nucleus organization",
"peroxisome organization", "protein acylation", "protein complex biogenesis",
"protein folding", "protein glycosylation", "protein modification process",
"protein phosphorylation", "protein transport", "protein ubiquitination",
"pseudohyphal growth", "response to chemical stimulus", "response to cold",
"response to DNA damage stimulus", "response to heat", "response to osmotic stress",
"response to oxidative stress", "response to starvation", "response to stress",
"response to unfolded protein", "ribosome biogenesis", "RNA metabolic process",
"rRNA metabolic process", "signal transduction", "sporulation resulting in formation of a cellular spore",
"transcription", "translation", "transport", "transposition",
"tRNA metabolic process", "vacuolar transport", "vacuole organizations",
"vesicle organization", "vesicle-mediated transport", "vitamin metabolic process"
), class = "factor"), GSA_p33_SC = c(NA, -1, NA, NA, NA, NA,
NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, -1, NA, NA,
-1, -1, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA, NA, NA,
NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 1, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1,
NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA,
NA), GSA_p38_SC = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA,
NA, NA, NA, -1, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA), GSA_p38_X33 = c(NA,
1, NA, NA, NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 1,
1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, -1, NA, NA, 1, NA, NA), GSA_p52_SC = c(NA, NA, NA, NA,
NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA,
-1, -1, NA, NA, NA), GSA_p52_X33 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1,
NA, -1, NA, 1, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, -1, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, -1, NA,
NA, NA, NA), GSA_p64_SC = c(NA, NA, NA, NA, NA, NA, NA, 1, NA,
NA, 1, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA,
1, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, -1, NA, -1, -1,
NA, NA, NA, -1, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, -1, 1,
-1, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA
), GSA_p64_X33 = c(1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1,
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 1, NA, NA,
NA, NA, NA, NA, -1, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA,
NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, -1, -1), GSA_SC_X33 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA,
NA, NA, NA, NA, NA, NA, -1, NA, 1, NA, NA, NA, NA, NA, NA, 1,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA,
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA,
1, NA, NA, 1, -1, NA, -1, NA, NA, NA, -1, 1, NA, NA, NA, NA,
NA, -1, NA, NA, NA, NA, NA, NA)), .Names = c("GOterm", "GOdesc",
"GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC",
"GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA,
-89L), class = "data.frame")
A shortened version could be like:
structure(list(GOterm = structure(c(1L, 2L, 3L, 4L, 5L, 6L),
.Label = c("GO:0000746", "GO:0000910", "GO:0006091", "GO:0006259",
"GO:0006351", "GO:0006399"), class = "factor"),
GOdesc = structure(c(16L,17L, 23L, 19L, 58L, 62L),
.Label = c("cell budding", "cell cycle",
"cellular amino acid and metabolic process", "cellular aromatic compound
metabolic process", "cellular carbohydrate metabolic process", "cellular
component morphogenesis"), class = "factor"),
GSA_p33_SC = c(NA, -1, NA, NA, NA, NA),
GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA),
GSA_p38_SC = c(NA, NA, NA, NA, NA, NA),
GSA_p38_X33 = c(NA, 1, NA, NA, NA, NA),
GSA_p52_SC = c(NA, NA, NA, NA, NA, NA),
GSA_p52_X33 = c(NA, NA, NA, NA, NA, NA),
GSA_p64_SC = c(NA, NA, NA, NA, NA, NA),
GSA_p64_X33 = c(1, NA, NA, NA, NA, NA),
GSA_SC_X33 = c(NA, NA, NA, NA, NA, NA)),
.Names = c("GOterm", "GOdesc",
"GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC",
"GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA,
-6L), class = "data.frame"))
All of that extra funk is from your factor levels. If you know your problem will still be reproducible after dropping these levels, then you can consider (wait for it) droplevels:
> dput(droplevels(head(data)))
structure(list(GOterm = structure(1:6, .Label = c("GO:0000746",
"GO:0000910", "GO:0006091", "GO:0006259", "GO:0006351", "GO:0006399"
), class = "factor"), GOdesc = structure(c(1L, 2L, 4L, 3L, 5L,
6L), .Label = c("conjugation", "cytokinesis", "DNA metabolic process",
"generation of precursor metabolites and energy", "transcription",
"tRNA metabolic process"), class = "factor"), GSA_p33_SC = c(NA,
-1, NA, NA, NA, NA), GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA),
GSA_p38_SC = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), GSA_p38_X33 = c(NA, 1, NA, NA, NA, NA), GSA_p52_SC = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p52_X33 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p64_SC = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p64_X33 = c(1,
NA, NA, NA, NA, NA), GSA_SC_X33 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_)), .Names = c("GOterm", "GOdesc",
"GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC",
"GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA,
6L), class = "data.frame")
This is more easily demonstrated in the following example:
x <- factor("A", levels = LETTERS)
x
# [1] A
# Levels: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
dput(x)
# structure(1L, .Label = c("A", "B", "C", "D", "E", "F", "G", "H",
# "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U",
# "V", "W", "X", "Y", "Z"), class = "factor")
dput(droplevels(x))
# structure(1L, .Label = "A", class = "factor")
Another way to shorten it up would be to convert the columns to character before dput. The data can then be read back in with as.data.frame and factor levels are preserved.
First subset
> data2 <- data[sample(nrow(data), 4), ]
Then dput as characters
> d <- dput(lapply(data2, as.character))
structure(list(GOterm = c("GO:0000746", "GO:0070647", "GO:0006914",
"GO:0007010"), GOdesc = c("conjugation", NA, NA, "cytoskeleton organization and biogenesis"
), GSA_p33_SC = c(NA_character_, NA_character_, NA_character_,
NA_character_), GSA_p33_X33 = c(NA, NA, "1", "1"), GSA_p38_SC = c(NA_character_,
NA_character_, NA_character_, NA_character_), GSA_p38_X33 = c(NA_character_,
NA_character_, NA_character_, NA_character_), GSA_p52_SC = c(NA,
"-1", NA, NA), GSA_p52_X33 = c(NA, NA, NA, "1"), GSA_p64_SC = c(NA,
NA, NA, "1"), GSA_p64_X33 = c("1", NA, NA, NA), GSA_SC_X33 = c(NA,
NA, NA, "1")), .Names = c("GOterm", "GOdesc", "GSA_p33_SC", "GSA_p33_X33",
"GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", "GSA_p52_X33", "GSA_p64_SC",
"GSA_p64_X33", "GSA_SC_X33"))
And read back in
> as.data.frame(d)

Resources