R plotly : Separate (with a blank) x-axis label Data - r

I have data with several data agregations (Year, Quarter, Month).
I try to leave a space between each Date aggregation in x-axis label.
I obtain this for the moment :
And I want to obtain :
Here my data :
My dataframe (dput) :
r8_plot = structure(list(DATE = c(2016L, 2017L, 2018L, 201701L, 201702L,
201703L, 201704L, 201801L, 201802L, 201803L, 201804L, 201801L,
201802L, 201803L, 201804L, 201805L, 201806L, 201807L, 201808L,
201809L, 201810L, 201811L, 201812L, 201844L, 201845L, 201846L,
201847L, 201848L, 201849L, 201850L), Var1 = c(6.64, 6.21, 6.53,
6.31, 6.01, 6.36, 6.17, 6.76, 6.37, 6.68, 6.27, 7.5, 6.49, 6.4,
6.54, 6.18, 6.37, 5.98, 6.37, 7.48, 6.6, 5.97, 6.25, 5.42, 6.18,
5.81, 6.46, 6.36, 6.05, 6.35), Var2 = c(2.38, 2.25, 2.36, 2.22,
2.52, 1.98, 2.27, 2.44, 2.31, 2.27, 2.41, 2.53, 2.25, 2.51, 2.35,
2.42, 2.19, 2.51, 1.91, 2.38, 2.34, 2.29, 2.68, 2.15, 1.89, 2.6,
2.52, 2.37, 2.97, 2.71), Var3 = c(4.26, 3.96, 4.17, 4.09, 3.5,
4.38, 3.9, 4.32, 4.06, 4.4, 3.86, 4.96, 4.23, 3.9, 4.19, 3.77,
4.18, 3.47, 4.46, 5.1, 4.26, 3.68, 3.57, 3.27, 4.29, 3.2, 3.95,
3.99, 3.09, 3.64), Var4 = c(35.84, 36.17, 36.08, 35.2, 41.86,
31.17, 36.76, 36.07, 36.27, 34.07, 38.43, 33.78, 34.76, 39.18,
35.95, 39.07, 34.35, 42.04, 29.91, 31.8, 35.48, 38.38, 42.86,
39.72, 30.53, 44.85, 38.94, 37.24, 48.98, 42.63), Var5 = c("Y",
"Y", "Y", "Q", "Q", "Q", "Q", "Q", "Q", "Q", "Q", "M", "M", "M",
"M", "M", "M", "M", "M", "M", "M", "M", "M", "W", "W", "W", "W",
"W", "W", "W"), Var6 = structure(1:30, .Label = c("2016", "2017",
"2018", "Q1-2017", "Q2-2017", "Q3-2017", "Q4-2017", "Q1-2018",
"Q2-2018", "Q3-2018", "Q4-2018", "M01-2018", "M02-2018", "M03-2018",
"M04-2018", "M05-2018", "M06-2018", "M07-2018", "M08-2018", "M09-2018",
"M10-2018", "M11-2018", "M12-2018", "W44-2018", "W45-2018", "W46-2018",
"W47-2018", "W48-2018", "W49-2018", "W50-2018"), class = "factor"),
Var7 = c(7.1, 6.7, 6.7, 6.7, 6.7, 6.6, 6.6, 6.7, 6.7, 6.6,
6.6, 6.7, 6.7, 6.7, 6.7, 6.7, 6.6, 6.6, 6.6, 6.6, 6.6, 6.6,
6.6, 6.6, 6.6, 6.6, 6.6, 6.6, 6.6, 6.6)), .Names = c("DATE",
"Var1", "Var2", "Var3", "Var4", "Var5", "Var6", "Var7"), row.names = c(NA,
30L), class = "data.frame")
r8_plot$Var6 = factor(r8_plot$Var6, labels = unique(r8_plot$Var6), levels=unique(r8_plot$Var6))
library(plotly)
r8_plot %>% plot_ly(x = ~Var6) %>%
add_bars(y = ~Var1,marker = list(color = '#00802b'),
name = "Var1") %>%
add_bars(y = ~Var2,marker = list(color = '#ff9933'),
name = "Var2")%>%
add_lines(y = ~Var4,
name = "Var4",
yaxis = "y2", line = list(color = '#1a1aff'))%>%
add_lines(y = ~Var7,
name = "Var7",
yaxis = "y1")%>%
layout(barmode = "stack",
yaxis2 = list(overlaying = "y",side = "right"),barmode = "stack",xaxis = list(title = 'DATE'), yaxis = list(title = 'All quantity'), title ="Chart") %>% layout(height = 750, width = 1000, hovermode = 'closest',margin = list(b = 115))
Thanks in advance

You can add NaN y-values to any data frame and Plotly will interrupt the plot at this position.
For example
library(plotly)
data = data.frame(list(x = c(1, 2, NaN, 3, 4),
y = c(1, 2, NaN, 3, 4)))
plot_ly(data, x = ~x) %>%
add_lines(y = ~y)
will give you
For more complex data, like in the question, a function introducing the empty rows might be easier than doing it manually.
split_by_date <- function(data) {
data_length <- length(data[,1])
index <- 0
new_data <- data
new_line <- list(replicate(length(data), NaN))
for (i in 2:length(data$DATE)) {
if (substr(toString(data$Var6[[i]]), 1, 1) != substr(toString(data$Var6[[i - 1]]), 1, 1)) {
new_data <- rbind.data.frame(new_data[1:i + index - 1,], new_line[[1]], data[i:data_length,])
new_data$Var6[[index + i]] <- paste(replicate(index + 1, " "), collapse = " ")
index <- index + 1
rownames(new_data) <- 1:as.integer(data_length + index)
}
}
return(new_data)
}
We just need to make sure that the x-value is always unique, i.e. simply concatenate an increasing number of spaces. Otherwise we'll just get a single break in the graph.
Also another row x is introduced to help with plotting the x-values in the correct order.
r8_plot = structure(list(DATE = c(2016L, 2017L, 2018L, 201701L, 201702L, 201703L, 201704L, 201801L, 201802L, 201803L, 201804L, 201801L, 201802L, 201803L, 201804L, 201805L, 201806L, 201807L, 201808L, 201809L, 201810L, 201811L, 201812L, 201844L, 201845L, 201846L, 201847L, 201848L, 201849L, 201850L),
Var1 = c(6.64, 6.21, 6.53, 6.31, 6.01, 6.36, 6.17, 6.76, 6.37, 6.68, 6.27, 7.5, 6.49, 6.4, 6.54, 6.18, 6.37, 5.98, 6.37, 7.48, 6.6, 5.97, 6.25, 5.42, 6.18, 5.81, 6.46, 6.36, 6.05, 6.35),
Var2 = c(2.38, 2.25, 2.36, 2.22, 2.52, 1.98, 2.27, 2.44, 2.31, 2.27, 2.41, 2.53, 2.25, 2.51, 2.35, 2.42, 2.19, 2.51, 1.91, 2.38, 2.34, 2.29, 2.68, 2.15, 1.89, 2.6, 2.52, 2.37, 2.97, 2.71),
Var3 = c(4.26, 3.96, 4.17, 4.09, 3.5, 4.38, 3.9, 4.32, 4.06, 4.4, 3.86, 4.96, 4.23, 3.9, 4.19, 3.77, 4.18, 3.47, 4.46, 5.1, 4.26, 3.68, 3.57, 3.27, 4.29, 3.2, 3.95, 3.99, 3.09, 3.64),
Var4 = c(35.84, 36.17, 36.08, 35.2, 41.86, 31.17, 36.76, 36.07, 36.27, 34.07, 38.43, 33.78, 34.76, 39.18, 35.95, 39.07, 34.35, 42.04, 29.91, 31.8, 35.48, 38.38, 42.86, 39.72, 30.53, 44.85, 38.94, 37.24, 48.98, 42.63),
Var5 = c("Y", "Y", "Y", "Q", "Q", "Q", "Q", "Q", "Q", "Q", "Q", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "W", "W", "W", "W", "W", "W", "W"),
Var6 = c("2016", "2017", "2018", "Q1-2017", "Q2-2017", "Q3-2017", "Q4-2017", "Q1-2018", "Q2-2018", "Q3-2018", "Q4-2018", "M01-2018", "M02-2018", "M03-2018","M04-2018", "M05-2018", "M06-2018", "M07-2018", "M08-2018", "M09-2018", "M10-2018", "M11-2018", "M12-2018", "W44-2018", "W45-2018", "W46-2018", "W47-2018", "W48-2018", "W49-2018", "W50-2018"),
Var7 = c(7.1, 6.7, 6.7, 6.7, 6.7, 6.6, 6.6, 6.7, 6.7, 6.6, 6.6, 6.7, 6.7, 6.7, 6.7, 6.7, 6.6, 6.6, 6.6, 6.6, 6.6, 6.6,6.6, 6.6, 6.6, 6.6, 6.6, 6.6, 6.6, 6.6)),
.Names = c("DATE", "Var1", "Var2", "Var3", "Var4", "Var5", "Var6", "Var7"), row.names = c(NA, 30L), class = "data.frame")
plot <- split_by_date(r8_plot)
plot$x <- structure(1:length(plot$Var6), .Label = plot$Var6, class = "factor")
plot %>% plot_ly(x = ~x, height = 750, width = 1000) %>%
add_bars(y = ~Var1,
marker = list(color = '#00802b'),
name = "Var1") %>%
add_bars(y = ~Var2,
marker = list(color = '#ff9933'),
name = "Var2") %>%
add_lines(y = ~Var4,
name = "Var4",
yaxis = "y2",
line = list(color = '#1a1aff')) %>%
add_lines(y = ~Var7,
name = "Var7",
yaxis = "y1") %>%
layout(barmode = "stack",
xaxis = list(title = 'DATE', range = c(-0.1, 10)),
yaxis = list(title = 'All quantity'),
yaxis2 = list(overlaying = "y",
side = "right"),
title ="Chart",
hovermode = 'closest')

Related

I want to grow a variable according to the weight of each country and the global changes (Creating a Bartik Instrument in R)

I am conducting some regression analysis and I need to first create a Bartik Instrument to use as an IV. Essentially, I have 10 decile groups of the income distribution. These are both at a global level and a country level (as there is an unbalanced panel of countries in the dataset). I want to grow each country's decile groups according to worldwide changes.
The image above represents the world and shows the percentage of people in each income decile on the left-hand side. On the right are the calculated percentage changes for each income decile between years. There are 10 columns all up for the 10 deciles.
The image below shows the country's decile groups. The starting year will be 1990 for each country (ie, the beginning decile proportion for each decile will be the year 1990 for each country. This serves as the "weight" in all of the statistics). Then, each decile will grow at the same percentage change as the global level.
For example, if dp1 is 1.92 in 1990 for the country Afghanistan, 1991 will be calculated from the global percentage change between 1990 and 1991. Because the global change was -2.857%, the predicted value of dp1 in 1991 for Afghanistan will be 1.865. This value will then be used in the calculation for predicting 1992.
The issue is, it needs to start at 1990 for each country and end in the final predicted year of 2019. I cannot just use a mutate function as it won't recognize that each country restarts in 1990.
Any guidance on this issue will be greatly appreciated. Please let me know if you need to see any more of the data as it is all open source and can therefore be freely shared.
Dput of the world data frame:
structure(list(Entity = c("World", "World", "World", "World",
"World", "World", "World", "World", "World", "World", "World",
"World", "World", "World", "World", "World", "World", "World",
"World", "World", "World", "World", "World", "World", "World",
"World", "World", "World", "World", "World", "World"), Year = c(1990,
1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020), Code = c("WLD",
"WLD", "WLD", "WLD", "WLD", "WLD", "WLD", "WLD", "WLD", "WLD",
"WLD", "WLD", "WLD", "WLD", "WLD", "WLD", "WLD", "WLD", "WLD",
"WLD", "WLD", "WLD", "WLD", "WLD", "WLD", "WLD", "WLD", "WLD",
"WLD", "WLD", "WLD"), gini = c("69.95", "70.18", "70.07", "69.88",
"69.79", "69.51", "69.16", "68.93", "68.98", "68.83", "68.76",
"68.43", "68.1", "67.68", "67.24", "66.79", "66.22", "65.57",
"64.9", "63.8", "63.28", "62.92", "62.54", "62.11", "61.68",
"61.47", "61.12", "60.92", "60.81", "60.65", "60.6"), palma = c(14.44,
14.74, 14.2, 13.7, 13.28, 12.95, 12.46, 12.31, 12.12, 12.04,
12.12, 11.74, 11.49, 11.18, 10.91, 10.55, 10.08, 9.67, 9.27,
8.53, 8.29, 8.12, 7.91, 7.74, 7.47, 7.36, 7.25, 7.11, 7.07, 6.95,
6.97), dp1 = c(0.35, 0.34, 0.35, 0.36, 0.36, 0.38, 0.38, 0.38,
0.39, 0.38, 0.38, 0.39, 0.38, 0.38, 0.39, 0.39, 0.4, 0.4, 0.4,
0.43, 0.43, 0.43, 0.43, 0.43, 0.45, 0.45, 0.44, 0.43, 0.44, 0.44,
0.43), dp2 = c(0.71, 0.72, 0.76, 0.78, 0.82, 0.82, 0.84, 0.86,
0.87, 0.9, 0.88, 0.89, 0.9, 0.92, 0.93, 0.93, 0.95, 0.98, 1.01,
1.07, 1.09, 1.1, 1.1, 1.14, 1.14, 1.14, 1.17, 1.19, 1.18, 1.19,
1.2), dp3 = c(1.09, 1.06, 1.1, 1.18, 1.19, 1.25, 1.29, 1.31,
1.3, 1.33, 1.31, 1.36, 1.38, 1.39, 1.43, 1.48, 1.5, 1.54, 1.59,
1.69, 1.72, 1.74, 1.79, 1.81, 1.83, 1.88, 1.89, 1.88, 1.94, 1.95,
1.94), dp4 = c(1.52, 1.5, 1.59, 1.64, 1.74, 1.75, 1.82, 1.82,
1.9, 1.89, 1.88, 1.93, 1.97, 2, 2.02, 2.07, 2.18, 2.24, 2.29,
2.44, 2.48, 2.51, 2.57, 2.58, 2.71, 2.72, 2.73, 2.82, 2.76, 2.81,
2.8), dp5 = c(2.11, 2.15, 2.27, 2.34, 2.42, 2.51, 2.53, 2.56,
2.6, 2.64, 2.7, 2.72, 2.74, 2.8, 2.92, 2.99, 3.05, 3.1, 3.2,
3.44, 3.52, 3.57, 3.65, 3.77, 3.76, 3.86, 3.93, 3.96, 3.95, 4.03,
4.04), dp6 = c(3.23, 3.18, 3.25, 3.38, 3.44, 3.52, 3.6, 3.64,
3.66, 3.74, 3.68, 3.87, 3.98, 4.08, 4.07, 4.14, 4.35, 4.54, 4.68,
4.85, 5.02, 5.11, 5.2, 5.29, 5.48, 5.43, 5.54, 5.56, 5.62, 5.6,
5.57), dp7 = c(5.49, 5.42, 5.43, 5.42, 5.37, 5.41, 5.5, 5.66,
5.49, 5.57, 5.67, 5.73, 5.86, 6.03, 6.23, 6.23, 6.49, 6.63, 6.91,
7.12, 7.37, 7.38, 7.59, 7.72, 7.84, 7.94, 7.92, 8.02, 8, 8.05,
8.13), dp8 = c(10.96, 10.76, 10.3, 10.04, 9.78, 9.73, 9.78, 9.82,
9.67, 9.61, 9.7, 9.75, 9.73, 10, 10.18, 10.5, 10.55, 10.88, 11.04,
11.32, 11.4, 11.63, 11.62, 11.72, 11.78, 11.82, 12.05, 11.85,
12.1, 12.08, 12.12), dp9 = c(21.51, 21.26, 20.81, 20.53, 20.22,
20.17, 20.15, 20.03, 19.9, 19.75, 19.77, 19.7, 19.88, 19.72,
19.74, 19.75, 19.69, 19.71, 19.75, 19.51, 19.48, 19.49, 19.39,
19.36, 19.23, 19.14, 19.05, 19.37, 19.25, 19.3, 19.38), dp10 = c(52.93,
53.51, 54.05, 54.24, 54.58, 54.39, 54.02, 53.85, 54.14, 54.13,
53.96, 53.6, 53.13, 52.61, 52.04, 51.45, 50.77, 49.93, 49.08,
48.07, 47.44, 46.98, 46.61, 46.14, 45.75, 45.56, 45.23, 44.9,
44.72, 44.52, 44.37), `dp1_PChangeFrom-1` = c(NA, -0.0285714285714284,
0.0294117647058822, 0.0285714285714286, 0, 0.0555555555555556,
0, 0, 0.0263157894736842, -0.0256410256410257, 0, 0.0263157894736842,
-0.0256410256410257, 0, 0.0263157894736842, 0, 0.0256410256410257,
0, 0, 0.0749999999999999, 0, 0, 0, 0, 0.0465116279069768, 0,
-0.0222222222222222, -0.0227272727272727, 0.0232558139534884,
0, -0.0227272727272727), `dp2_PChangeFrom-1` = c(NA, 0.0140845070422535,
0.0555555555555556, 0.0263157894736842, 0.0512820512820512, 0,
0.024390243902439, 0.0238095238095238, 0.0116279069767442, 0.0344827586206897,
-0.0222222222222222, 0.0113636363636364, 0.0112359550561798,
0.0222222222222222, 0.0108695652173913, 0, 0.0215053763440859,
0.0315789473684211, 0.0306122448979592, 0.0594059405940595, 0.0186915887850467,
0.00917431192660551, 0, 0.0363636363636362, 0, 0, 0.0263157894736842,
0.0170940170940171, -0.00840336134453782, 0.00847457627118645,
0.00840336134453782), `dp3_PChangeFrom-1` = c(NA, -0.0275229357798165,
0.0377358490566038, 0.0727272727272726, 0.00847457627118645,
0.0504201680672269, 0.032, 0.0155038759689923, -0.00763358778625955,
0.0230769230769231, -0.0150375939849624, 0.0381679389312977,
0.014705882352941, 0.00724637681159421, 0.0287769784172662, 0.034965034965035,
0.0135135135135135, 0.0266666666666667, 0.0324675324675325, 0.0628930817610062,
0.0177514792899408, 0.0116279069767442, 0.0287356321839081, 0.0111731843575419,
0.0110497237569061, 0.0273224043715846, 0.00531914893617022,
-0.0052910052910053, 0.0319148936170213, 0.00515463917525774,
-0.00512820512820513), `dp4_PChangeFrom-1` = c(NA, -0.0131578947368421,
0.0600000000000001, 0.031446540880503, 0.0609756097560976, 0.00574712643678161,
0.04, 0, 0.0439560439560439, -0.00526315789473685, -0.0052910052910053,
0.0265957446808511, 0.0207253886010363, 0.0152284263959391, 0.01,
0.0247524752475247, 0.0531400966183576, 0.0275229357798165, 0.0223214285714285,
0.0655021834061135, 0.0163934426229508, 0.0120967741935483, 0.0239043824701195,
0.00389105058365768, 0.0503875968992248, 0.00369003690036909,
0.00367647058823522, 0.0329670329670329, -0.0212765957446809,
0.0181159420289856, -0.00355871886121005), `dp5_PChangeFrom-1` = c(NA,
0.018957345971564, 0.0558139534883721, 0.0308370044052863, 0.0341880341880342,
0.037190082644628, 0.00796812749003985, 0.0118577075098815, 0.015625,
0.0153846153846154, 0.0227272727272727, 0.00740740740740741,
0.00735294117647059, 0.021897810218978, 0.0428571428571429, 0.0239726027397261,
0.0200668896321069, 0.0163934426229509, 0.0322580645161291, 0.0749999999999999,
0.0232558139534884, 0.0142045454545454, 0.0224089635854342, 0.0328767123287672,
-0.00265251989389927, 0.0265957446808511, 0.0181347150259068,
0.00763358778625949, -0.00252525252525247, 0.020253164556962,
0.00248138957816372), `dp6_PChangeFrom-1` = c(NA, -0.0154798761609907,
0.0220125786163522, 0.04, 0.0177514792899408, 0.0232558139534884,
0.0227272727272727, 0.0111111111111111, 0.0054945054945055, 0.0218579234972678,
-0.0160427807486631, 0.0516304347826087, 0.0284237726098191,
0.0251256281407035, -0.00245098039215681, 0.017199017199017,
0.0507246376811594, 0.0436781609195403, 0.0308370044052863, 0.0363247863247863,
0.0350515463917526, 0.0179282868525898, 0.0176125244618395, 0.0173076923076923,
0.0359168241965974, -0.00912408759124101, 0.0202578268876612,
0.00361010830324902, 0.0107913669064749, -0.00355871886121005,
-0.00535714285714274), `dp7_PChangeFrom-1` = c(NA, -0.0127504553734062,
0.00184501845018446, -0.00184162062615097, -0.00922509225092248,
0.00744878957169461, 0.0166358595194085, 0.0290909090909091,
-0.0300353356890459, 0.0145719489981785, 0.0179533213644524,
0.0105820105820107, 0.0226876090750436, 0.0290102389078498, 0.033167495854063,
0, 0.0417335473515248, 0.0215716486902927, 0.0422322775263952,
0.0303907380607815, 0.0351123595505618, 0.00135685210312073,
0.0284552845528455, 0.0171277997364954, 0.0155440414507772, 0.0127551020408164,
-0.00251889168765749, 0.0126262626262626, -0.00249376558603486,
0.00625000000000009, 0.00993788819875777), `dp8_PChangeFrom-1` = c(NA,
-0.0182481751824818, -0.0427509293680297, -0.0252427184466021,
-0.0258964143426295, -0.0051124744376277, 0.00513874614594028,
0.00408997955010234, -0.0152749490835031, -0.00620475698035165,
0.00936524453694067, 0.00515463917525781, -0.00205128205128201,
0.0277492291880781, 0.018, 0.031434184675835, 0.00476190476190483,
0.0312796208530806, 0.014705882352941, 0.0253623188405798, 0.00706713780918729,
0.0201754385964913, -0.00085984522785912, 0.00860585197934608,
0.00511945392491457, 0.00339558573853998, 0.0194585448392555,
-0.0165975103734441, 0.0210970464135021, -0.00165289256198344,
0.00331125827814562), `dp9_PChangeFrom-1` = c(NA, -0.0116225011622501,
-0.0211665098777047, -0.0134550696780393, -0.015099853872382,
-0.00247279920870411, -0.000991571641051221, -0.00595533498759293,
-0.00649026460309548, -0.00753768844221098, 0.00101265822784808,
-0.0035407182599899, 0.00913705583756344, -0.00804828973843059,
0.00101419878296144, 0.000506585612968671, -0.00303797468354424,
0.00101574403250379, 0.00202942668696089, -0.0121518987341771,
-0.00153767298821123, 0.000513347022587167, -0.00513083632632108,
-0.0015471892728211, -0.0067148760330578, -0.00468018720748829,
-0.00470219435736676, 0.0167979002624672, -0.0061951471347445,
0.00259740259740263, 0.00414507772020717), `dp10_PChangeFrom-1` = c(NA,
0.0109578688834309, 0.0100915716688469, 0.00351526364477345,
0.00626843657817102, -0.00348112861854155, -0.00680272108843533,
-0.00314698259903743, 0.00538532961931289, -0.000184706316956003,
-0.00314058747459822, -0.00667160859896218, -0.00876865671641789,
-0.00978731413514028, -0.0108344421212697, -0.011337432744043,
-0.0132167152575316, -0.0165452038605476, -0.0170238333667134,
-0.0205786471067644, -0.0131058872477637, -0.00969645868465432,
-0.00787569178373771, -0.0100836730315383, -0.00845253576072823,
-0.00415300546448082, -0.00724319578577712, -0.00729604244970149,
-0.00400890868596881, -0.00447227191413228, -0.00336927223719689
)), row.names = c(NA, -31L), class = "data.frame")
dput of the countries data frame::
structure(list(Year = numeric(0), Entity = character(0), Code = character(0),
gini = character(0), palma = numeric(0), dp1 = numeric(0),
dp2 = numeric(0), dp3 = numeric(0), dp4 = numeric(0), dp5 = numeric(0),
dp6 = numeric(0), dp7 = numeric(0), dp8 = numeric(0), dp9 = numeric(0),
dp10 = numeric(0)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = integer(0), groups = structure(list(
Entity = character(0), Year = numeric(0), .rows = structure(list(), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = integer(0), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))

How to rearrange dataset for ScatterPlot using ggplot in R

I'm trying to plot the countries current co2 emissions over the 6 years but I'm having trouble with the way the data is entered in the excel file. I tried using unlisting and tried combining new vars but with no luck. Any help on determining aes(x =, y=) for the data sets I provided?
structure(list(`2010` = c(5.78, 7.34, 8.74, 1.45, 17.9), `2011` = c(5.76,
7.56, 8.49, 1.56, 17.1), `2012` = c(5.75, 7.36, 7.62, 1.56, 17.5
), `2013` = c(5.23, 6.71, 7.36, 1.7, 17.5), `2014` = c(5.3, 6.42,
7.04, 1.76, 16.9), `2015` = c(5.31, 6.04, 6.73, 1.79, 16.4)), row.names = c(59L,
62L, 69L, 79L, 184L), class = "data.frame")
Assuming each row is a different country:
library(tidyverse)
j_df <- structure(list(
`2010` = c(5.78, 7.34, 8.74, 1.45, 17.9),
`2011` = c(5.76, 7.56, 8.49, 1.56, 17.1),
`2012` = c(5.75, 7.36, 7.62, 1.56, 17.5 ),
`2013` = c(5.23, 6.71, 7.36, 1.7, 17.5),
`2014` = c(5.3, 6.42, 7.04, 1.76, 16.9),
`2015` = c(5.31, 6.04, 6.73, 1.79, 16.4)
), row.names = c(59L, 62L, 69L, 79L, 184L), class = "data.frame")
j_df %>%
rownames_to_column(var = "rowname") %>%
rename(Country = rowname) %>%
pivot_longer(cols = `2010`:`2015`, names_to = "year", values_to = "C_Emissions") %>%
ggplot(aes(x = year, y = C_Emissions, color = Country)) +
geom_point() +
geom_line(aes(group = Country))

Getting the distance matrix back from already clustered data

I have used hclust in the TSclust package to do agglomerative hierarchical clustering. My question is, Can I get the dissimlarity (distance) matrix back from hclust? I wanted the values of the distance to rank which variable is closer to a single variable in the group of variables.
example: If (x1,x2, x3,x4,x5,x6,x7,x8,x9,x10) are the variables used to form the distance matrix, then what I wanted is the distance between x3 and the rest of variables (x3x1,x3x2,x3x4,x3x5, and so on). Can we do that? Here is the code and reproducible data.
Data:
structure(list(x1 = c(186.41, 100.18, 12.3, 14.38, 25.97, 0.06,
0, 6.17, 244.06, 19.26, 256.18, 255.69, 121.88, 75, 121.45, 11.34,
34.68, 3.09, 34.3, 26.13, 111.31), x2 = c(327.2, 8.05, 4.23,
6.7, 3.12, 1.91, 37.03, 39.17, 140.06, 83.72, 263.29, 261.22,
202.48, 23.27, 2.87, 7.17, 14.48, 3.41, 5.95, 70.56, 91.58),
x3 = c(220.18, 126.14, 98.59, 8.56, 0.5, 0.9, 17.45, 191.1,
164.64, 224.36, 262.86, 237.75, 254.88, 42.05, 9.12, 0.04,
12.22, 0.61, 61.86, 114.08, 78.94), x4 = c(90.74, 26.11,
47.86, 10.86, 3.74, 23.69, 61.79, 68.12, 87.92, 171.76, 260.98,
266.62, 96.27, 57.15, 78.89, 16.73, 6.59, 49.44, 57.21, 202.2,
67.17), x5 = c(134.09, 27.06, 7.44, 4.53, 17, 47.66, 95.96,
129.53, 40.23, 157.37, 172.61, 248.56, 160.84, 421.94, 109.93,
22.77, 2.11, 49.18, 64.13, 52.61, 180.87), x6 = c(173.17,
46.68, 6.54, 3.05, 0.35, 0.12, 5.09, 72.46, 58.19, 112.31,
233.77, 215.82, 100.63, 65.84, 2.69, 0.01, 3.63, 12.93, 66.55,
28, 61.74), x7 = c(157.22, 141.81, 19.98, 116.18, 16.55,
122.3, 62.67, 141.84, 78.3, 227.27, 340.22, 351.38, 147.73,
0.3, 56.12, 33.2, 5.51, 54.4, 82.98, 152.66, 218.26), x8 = c(274.08,
51.92, 54.86, 15.37, 0.31, 0.05, 36.3, 162.04, 171.78, 181.39,
310.73, 261.55, 237.99, 123.99, 1.92, 0.74, 0.23, 18.51,
7.68, 65.55, 171.33), x9 = c(262.71, 192.34, 2.75, 21.68,
1.69, 3.92, 0.09, 9.33, 120.36, 282.92, 236.7, 161.59, 255.44,
126.44, 7.63, 2.04, 1.02, 0.12, 5.87, 146.25, 134.11), x10 = c(82.71,
44.09, 1.52, 2.63, 4.38, 28.64, 168.43, 80.62, 20.36, 39.29,
302.31, 247.52, 165.73, 18.27, 2.67, 1.77, 23.13, 53.47,
53.14, 46.61, 86.29)), class = "data.frame", row.names = c(NA,
-21L))
Code:
as.ts(cdata)
library(dplyr) # data wrangling
library(ggplot2) # grammar of graphics
library(ggdendro) # dendrograms
library(TSclust) # cluster time series
cluster analysis
dist_ts <- TSclust::diss(SERIES = t(cdata), METHOD = "INT.PER") # note the data frame must be transposed
hc <- stats::hclust(dist_ts, method="complete") # method can be also "average" or diana (for DIvisive ANAlysis Clustering)
hcdata <- ggdendro::dendro_data(hc)
names_order <- hcdata$labels$label
# Use the following to remove labels from dendogram so not doubling up - but good for checking hcdata$labels$label <- ""
hcdata%>%ggdendro::ggdendrogram(., rotate=FALSE, leaf_labels=FALSE)
I believe the object you are looking for is stored in the variable dist_ts:
dist_ts <- TSclust::diss(SERIES = t(cdata), METHOD = "INT.PER")
print(dist_ts)

How to add test onto each bar?

I am trying to add text onto each bar, can someone please help me show how I can do? I am not a R user so please excuse my abilities. I appreciate your help. I got few responses earlier, like to use annotate or geomtext but i am not sure how to run them.
Here is my code:
library(viridis)
library(hrbrthemes)
library(tidyr)
library(dplyr)
library(tibble)
library(ggplot2)
df <- data.frame(
H1 = c(6.36, 3.03, 6.85, 4.07, 4.69, 6.27, 6.67, 3.11, 5.07, 6.14, 5.93, 6.49),
H2 = c(5.15, 5.00, 5.71, 5.50, 4.99, 5.81, 6.05, 5.76, 5.28, 5.69, 5.69, 5.06),
H3 = c(3.85, 5.13, 4.99, 4.91, 5.01, 5.73, 5.77, 5.94, 5.57, 5.35, 6.00, 4.39),
H4 = c(3.84, 4.80, 5.15, 4.85, 4.99, 5.73, 5.77, 5.45, 5.44, 5.41, 5.81, 4.46),
H5 = c(4.08, 5.17, 4.77, 5.03, 5.00, 5.49, 5.49, 5.80, 5.51, 5.18, 5.76, 4.60),
H6 = c(4.35, 5.59, 5.59, 4.83, 5.52, 5.63, 5.85, 5.74, 5.66, 5.19, 5.79, 4.84), fontface = c("bold"),
names = c("RB", "Ver", "Atl", "POR12PG28-3",
"Valery", "Rio", "CO99076-6R", "Purple",
"AC99330-1P/Y", "CO05068-1RU", "Masquerade", "Canela"),
specie = c(rep("Appearance", 12), rep("Aroma" , 12), rep("Flavor" , 12),
rep("Overall" , 12), rep("Aftertaste", 12), rep("Texture", 12)),
condition = rep(c("RB", "Ver", "Atl", "POR12PG28-3",
"Valery", "Rio", "CO99076-6R", "Purple",
"AC99330-1P/Y", "CO05068-1RU", "Masquerade", "Canela") , 6))
df <- df %>%
pivot_longer(starts_with("H"), names_to = "h.names")
df
#one condition per plot
nameframe <- enframe(unique(df$h.names))
specieframe <- enframe(unique(df$specie))
names.labs <- c("Appearance", "Aroma", "Flavor", "Overall", "Aftertaste", "Texture")
names(names.labs) <- c("H1", "H2", "H3", "H4", "H5", "H6")
ggplot() +
geom_col(data = df, mapping = aes(x = names, y = value),
position = "dodge") +
coord_flip() +
ylim(c(0,9)) +
scale_y_continuous(breaks=seq(0.0, 9, 3), limits=c(0, 9), labels = c("0", "3", "6", "Like\nExtremely")) +
labs(y = "", x = "") + theme(legend.title = element_blank(), axis.text.y = element_text(face = "bold", size = 11),
axis.text.x = element_text(face = "bold", size = 9)) +
scale_fill_discrete(breaks = c("Appearance", "Aroma", "Flavor", "Overall", "Aftertaste", "Texture")) +
facet_wrap(~h.names, labeller = labeller(h.names = names.labs))
#add text onto each bar
p <- p + annotate("text", label = "Test", size = 4, x = 15, y = 5)
print(p)
text(x = H,
y = y,
labels = c("ab", "e", "a", "d", "cd", "ab", "ab", "e", "c", "ab", "b", "ab"),
pos = 2)
Here is how you would add the text you had in your original question to each bar. Please note that I moved data = df, mapping = aes(x = names, y = value up into the ggplot() aesthetic where it will be applied to each layer in the plot. Next, used case_when from the dplyr package to add the bar plot labels as a new column to df. After that you pass in the new column into geom_text like so geom_text(aes(label = bar_labels, hjust = 0)) to apply the labels on the tip of each bar.
library(viridis)
library(tidyr)
library(dplyr)
library(tibble)
library(ggplot2)
df <- data.frame(
H1 = c(6.36, 3.03, 6.85, 4.07, 4.69, 6.27, 6.67, 3.11, 5.07, 6.14, 5.93, 6.49),
H2 = c(5.15, 5.00, 5.71, 5.50, 4.99, 5.81, 6.05, 5.76, 5.28, 5.69, 5.69, 5.06),
H3 = c(3.85, 5.13, 4.99, 4.91, 5.01, 5.73, 5.77, 5.94, 5.57, 5.35, 6.00, 4.39),
H4 = c(3.84, 4.80, 5.15, 4.85, 4.99, 5.73, 5.77, 5.45, 5.44, 5.41, 5.81, 4.46),
H5 = c(4.08, 5.17, 4.77, 5.03, 5.00, 5.49, 5.49, 5.80, 5.51, 5.18, 5.76, 4.60),
H6 = c(4.35, 5.59, 5.59, 4.83, 5.52, 5.63, 5.85, 5.74, 5.66, 5.19, 5.79, 4.84), fontface = c("bold"),
names = c("RB", "Ver", "Atl", "POR12PG28-3",
"Valery", "Rio", "CO99076-6R", "Purple",
"AC99330-1P/Y", "CO05068-1RU", "Masquerade", "Canela"),
specie = c(rep("Appearance", 12), rep("Aroma" , 12), rep("Flavor" , 12),
rep("Overall" , 12), rep("Aftertaste", 12), rep("Texture", 12)),
condition = rep(c("RB", "Ver", "Atl", "POR12PG28-3",
"Valery", "Rio", "CO99076-6R", "Purple",
"AC99330-1P/Y", "CO05068-1RU", "Masquerade", "Canela") , 6))
df <- df %>%
pivot_longer(starts_with("H"), names_to = "h.names")
#one condition per plot
nameframe <- enframe(unique(df$h.names))
specieframe <- enframe(unique(df$specie))
names.labs <- c("Appearance", "Aroma", "Flavor", "Overall", "Aftertaste", "Texture")
names(names.labs) <- c("H1", "H2", "H3", "H4", "H5", "H6")
#add text onto each bar
df <- df %>%
arrange(desc(names)) %>%
group_by(names) %>%
mutate(
bar_labels = case_when(
names == "Ver" ~ "ab",
names == "Valery" ~ "e",
names == "Rio" ~ "a",
names == "RB" ~ "d",
names == "Purple" ~ "cd",
names == "POR12PG28-3" ~ "ab",
names == "Masquerade" ~ "ab",
names == "CO99076-6R" ~ "e",
names == "CO05068-1RU" ~ "c",
names == "Canela" ~ "ab",
names == "Atl" ~ "b",
names == "AC99330-1P/Y" ~ "ab",
TRUE ~ as.character(NA)
))
ggplot(data = df, mapping = aes(x = names, y = value)) +
geom_col(position = "dodge") +
coord_flip() +
ylim(c(0,9)) +
scale_y_continuous(breaks=seq(0.0, 9, 3), limits=c(0, 9), labels = c("0", "3", "6", "Like\nExtremely")) +
labs(y = "", x = "") + theme(legend.title = element_blank(), axis.text.y = element_text(face = "bold", size = 11),
axis.text.x = element_text(face = "bold", size = 9)) +
scale_fill_discrete(breaks = c("Appearance", "Aroma", "Flavor", "Overall", "Aftertaste", "Texture")) +
facet_wrap(~h.names, labeller = labeller(h.names = names.labs)) +
geom_text(aes(label = bar_labels, hjust = 0))
Created on 2021-03-10 by the reprex package (v0.3.0)

Problem with ggplot: labels and error bars overlap

I made a barplot with error bars and labels written on the bars.
My problem is: I want the labels to appear on the bars and also next to the error bars. That is, I don't want labels and error bars to overlap.
An example with my code:
i <- data.frame(
nbr =c(15.18 ,11.53 ,13.37 ,9.2, 10.9, 12.23 ,9.53, 9.81, 7.86, 12.79,
22.03 ,17.64 ,18.1, 16.78 ,17.53 ,16.97 ,17.76 ,18.35 ,12.82 ,20.91,
22.09 ,19.18 ,17.54 ,18.45 ,19.83 ,16.99 ,19.69 ,19.45 ,13.07 ,21.41,
12.13 ,9.76, 10.79 ,10.74 ,12.43 ,9.65, 12.18 ,11.63 ,6.74, 12.31,
17.5, 14.75 ,15.2, 13.89 ,15.24 ,17.43 ,15.22 ,14.04,9.49, 15.86,
8.09, 5.86, 6.68, 7.34, 8.01, 6.35, 8.4, 7.4, 3.88, 6.92 ),
SD = c(4.46, 4.19, 2.27, 2.19, 5.10, 7.25, 8.42, 6.47, 6.04, 7.48, 6.38, 6.05, 3.58, 3.85,
6.94, 6.87, 6.32, 4.28, 4.10, 7.34, 7.46, 6.62, 4.28, 5.24, 8.00, 8.10, 7.73, 5.18,
5.53, 7.96, 7.46, 7.05, 4.47, 4.73, 8.15, 6.95, 5.88, 3.20, 4.01, 7.34, 7.24, 6.98,
5.98, 4.53, 4.22, 7.21, 4.02, 4.30, 1.96, 2.11, 4.98, 7.16, 8.45, 6.39, 6.20, 7.03,
6.10, 6.42, 3.77, 3.53),
x2=rep(c("a", "b", "c", "d", "e", "f", "g",
"h", "i", "j"),6),
s = c(rep(c(rep(c("3"),10),
rep(c("4"),10),
rep(c("5"),10),
rep(c("6"),10),
rep(c("7"),10),
rep(c("8"),10)),1)))
ii <- i[order(i$s, i$nbr ), ]
sn <- factor(x = 1:60, labels = ii$x2)
ii$sn <- sn
scale_x_reordered <- function(..., sep = "___") {
reg <- paste0(sep, ".+$")
ggplot2::scale_x_discrete(labels = function(x) gsub(reg, "", x), ...)
}
reorder_within <- function(x, by, within, fun = mean, sep = "___", ...) {
new_x <- paste(x, within, sep = sep)
stats::reorder(new_x, by, FUN = fun)
}
dummy2 <- data.frame(s = levels(i$s)[-1], Z = c( 4,16,16,8,4))
dummy2$s <- factor(dummy2$s)
ggplot(ii, aes(reorder_within(sn, nbr, s), nbr,
label =x2)) +
geom_bar(stat = 'identity') +
geom_text(aes(y = 0,fontface=2), angle = 90, hjust = -.05, size = 4)+
scale_x_reordered() +
facet_wrap(.~ s, scales = "free_x", ncol=2)+
#geom_text(aes(label=nbr), vjust=1.6, color="white", size=3.5)+
theme(axis.text.x = element_blank(),
axis.title=element_text(size=16),
axis.text=element_text(face = "bold"),
strip.text.x = element_text(size = 14,face="bold")
)+ geom_errorbar(aes(reorder_within(sn, nbr, s),ymin=nbr-SD, ymax=nbr+SD), width=.2, position=position_dodge(.9))
Example of expected parcel:
I want all the labels to be written next to the error bars on the bars.
Thanks for your help !
I found this solution and wanted to share it with you:
geom_text(aes(y = 0,fontface=2), angle = 90, vjust = -1, hjust = -.05, size = 4)

Resources