How to rearrange dataset for ScatterPlot using ggplot in R - r

I'm trying to plot the countries current co2 emissions over the 6 years but I'm having trouble with the way the data is entered in the excel file. I tried using unlisting and tried combining new vars but with no luck. Any help on determining aes(x =, y=) for the data sets I provided?
structure(list(`2010` = c(5.78, 7.34, 8.74, 1.45, 17.9), `2011` = c(5.76,
7.56, 8.49, 1.56, 17.1), `2012` = c(5.75, 7.36, 7.62, 1.56, 17.5
), `2013` = c(5.23, 6.71, 7.36, 1.7, 17.5), `2014` = c(5.3, 6.42,
7.04, 1.76, 16.9), `2015` = c(5.31, 6.04, 6.73, 1.79, 16.4)), row.names = c(59L,
62L, 69L, 79L, 184L), class = "data.frame")

Assuming each row is a different country:
library(tidyverse)
j_df <- structure(list(
`2010` = c(5.78, 7.34, 8.74, 1.45, 17.9),
`2011` = c(5.76, 7.56, 8.49, 1.56, 17.1),
`2012` = c(5.75, 7.36, 7.62, 1.56, 17.5 ),
`2013` = c(5.23, 6.71, 7.36, 1.7, 17.5),
`2014` = c(5.3, 6.42, 7.04, 1.76, 16.9),
`2015` = c(5.31, 6.04, 6.73, 1.79, 16.4)
), row.names = c(59L, 62L, 69L, 79L, 184L), class = "data.frame")
j_df %>%
rownames_to_column(var = "rowname") %>%
rename(Country = rowname) %>%
pivot_longer(cols = `2010`:`2015`, names_to = "year", values_to = "C_Emissions") %>%
ggplot(aes(x = year, y = C_Emissions, color = Country)) +
geom_point() +
geom_line(aes(group = Country))

Related

How to make hierarchical cluster pheatmap in r?

I have use this code to make hierarchical cluster heatmap but no color is coming
library(tidyverse)
Mydata <- structure(list(Location = c("Karnaphuli River", "Sangu River", "Kutubdia Channel", "Moheshkhali Channel", "Bakkhali River", "Naf River", "St. Martin's Island", "Mean "), Cr = c(114.92, 2.75, 18.88, 27.6, 39.5, 12.8, 17.45, 33.41), Pb = c(31.29, 26.42, 52.3, 59.45, 34.65, 12.8, 9.5, 32.34), Cu = c(9.48, 54.39, 52.4, 73.28, 76.26, 19.48, 8.94, 42.03), Zn = c(66.2, 71.17, 98.7, 95.3, 127.84, 27.76, 21.78, 72.67), As = c(89.67, 9.85, 8.82, 18.54, 15.38, 7.55, 16.45, 23.75), Cd = c(1.06, 0, 0.96, 2.78, 3.12, 0.79, 0.45, 1.53)), class = "data.frame", row.names = c(NA, -8L))
library(pheatmap)
Mydata %>% column_to_rownames(var = "Location") %>%
as.matrix() %>% pheatmap(Mydata, cutree_cols = 6)
You don't need to pass data again when using pipes. Try :
library(pheatmap)
Mydata %>%
column_to_rownames(var = "Location") %>%
as.matrix() %>% pheatmap(cutree_cols = 6)

Add custom tick mark to Y axis in ggplot2

I'd like to show the average for my dataset and add a tick mark on the Y-axis corresponding to this mean value - highlighted in red in the below image:
Code
plt <- ggplot(dat, aes(x = time, y = value)) +
geom_point(aes(fill = value), size = 2, alpha = 0.8, shape = 21, stroke = 0.5, color = 'black') +
scale_color_gradientn(colors = RColorBrewer::brewer.pal(4,name = 'OrRd')[-1], aesthetics = 'fill') +
geom_hline(yintercept = dat[, mean(value, na.rm = T)], color = 'black', linetype = '11', size = 1.25) +
guides(fill = F)
I can use scale_y_continuous() to add a specific break point but it messes up the grid lines and I don't know how to customize that specific tick mark (if at all possible):
plt <- plt +
scale_y_continuous(breaks = round(c(seq(from = 0, to = dat[, max(value)], by = 10), dat[, mean(value)]), digits = 1) )
Data
Reduced dataset for reproducing the plot:
structure(list(time = structure(c(1607990400, 1607996400, 1608002400,
1608008400, 1608014400, 1608020400, 1608026400, 1608032400, 1608038400,
1608044400, 1608050400, 1608056400, 1608062400, 1608068400, 1608074400,
1608080400, 1608086400, 1608092400, 1608098400, 1608104400, 1608110400,
1608116400, 1608122400, 1608128400, 1608134400, 1608140400, 1608146400,
1608152400, 1608158400, 1608164400, 1608170400, 1608176400, 1608182400,
1608188400, 1608194400, 1608200400, 1608206400, 1608212400, 1608218400,
1608224400, 1608230400, 1608236400, 1608242400, 1608248400, 1608254400,
1608260400, 1608266400, 1608272400, 1608278400, 1608284400, 1608290400,
1608296400, 1608302400, 1608308400, 1608314400, 1608320400, 1608326400,
1608332400, 1608338400, 1608344400, 1608350400, 1608356400, 1608362400,
1608368400, 1608374400, 1608380400, 1608386400, 1608392400, 1608398400,
1608404400, 1608410400, 1608416400, 1608422400, 1608428400, 1608434400,
1608440400, 1608446400, 1608452400, 1608458400, 1608464400, 1608470400,
1608476400, 1608482400, 1608488400, 1608494400, 1608500400, 1608506400,
1608512400, 1608518400, 1608524400, 1608530400, 1608536400, 1608542400,
1608548400, 1608554400, 1608560400, 1608566400, 1608572400, 1608578400,
1608584400, 1608590400, 1608596400, 1608602400, 1608608400, 1608614400,
1608620400, 1608626400, 1608632400, 1608638400), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), value = c(3.87, 3.57, 4.12, 2.68,
4.85447552447552, 0, 9.85, 2.9, 0.65010183299389, 2.55242704955998,
2.94610169491525, 3.2225, 3.44, 3.2, 3.64666666666667, 3.6, 4.2236312849162,
3.56285714285714, 2.99, 2.54, 2.34, 2.245, 2.05, 2.23666666666667,
4.82, 13.81, 18.08, 4.0375, 3.96, 12.9723756906077, 23.87, 16.2053333333333,
13.0836077705828, 10.91, 5.36238095238095, 2.62, 2.5375, 2.38,
2.72, 2.345, 2.32909090909091, 3.90333333333333, 3.02166666666667,
3.94833333333333, 3.83636363636364, 4.04117647058824, 4.22139146567718,
5.57, 4.82, 3.59666666666667, 3.73873949579832, 2, 2.04, 2.57,
3.00042016806723, 3.905, 5.65, 4.271589958159, 5.28, 7.15639534883721,
5.45, 5.24295336787565, 3.11224489795918, 4.79, 2.6106976744186,
2.25, 2.08264705882353, 2.25, 2.58666666666667, 3.18682008368201,
3.24, 3.10375, 3.35833333333333, 4.39333333333333, 3.765, 7.71,
5.16117647058824, 4.95588235294118, 2.44, 2.34666666666667, 2.345,
2.375, 2.4275, 3.0975, 3.21666666666667, 4.13, 4.44663366336634,
3.60877551020408, 3.83265033407572, 3.8625, 4.2675, 6.765, 2.688,
2.43101242521859, 2.43561435803037, 2.30166666666667, 2.69, 3.18,
5.04, 4.345, 4.86529411764706, 8.57, 6.2, 6.0032, 3.82, 5.03,
7.02, 3.69716216216216, 3.00468438538206)), row.names = c(NA,
-109L), class = c("data.table", "data.frame"))
Quick, dirty, and hacky:
plt + geom_text(aes(x = dat[, min(time, na.rm = T)], y = dat[, mean(value, na.rm = T)], label = round(dat[, mean(value, na.rm = T)],1)), color = 'red', hjust = 2) + coord_cartesian(clip = 'off')
Maybe it gets you somewhere.

Y-axis values don't match the continuous values of data R

I have used the same code for similar datasets but for some reason, it is not working for this one.
This is roughly what I get. (I hid the categorical variable names)
This is the code that I have been using:
library(tidyverse)
library(ggplot2)
df%>%
arrange(desc(round), measure) %>%
mutate(concept = factor(concept, unique(concept))) %>%
ggplot() +aes(x=concept, y=measure, fill=round)+
geom_bar(stat="identity") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(name="round", values = c("purple", "red")) +
coord_flip()+
geom_errorbar(aes(ymax=upper, ymin=lower),position=position_dodge(.9))
Here is a sample df
structure(list(concept = c("ball", "camel", "wind",
"dog", "happy", "script", "condensation", "Marxism"),
measure = c(2.45, 3.2, 3.91, 3.94, 4.31, 3.63, 4.54, 4.55), upper = c(6.46,
6.11, 7.23, 7.9, 9.17, 11.23, 11.32, 15.3), lower = c(1.56,
0.79, 1.45, 1.02, 1.65, 1.31, 1.64, 3.07), round = c("One",
"Two", "Two", "Two", "Two", "Two", "Two",
"Two")), row.names = c(NA, -8L), class = "data.frame")

R plotly : Separate (with a blank) x-axis label Data

I have data with several data agregations (Year, Quarter, Month).
I try to leave a space between each Date aggregation in x-axis label.
I obtain this for the moment :
And I want to obtain :
Here my data :
My dataframe (dput) :
r8_plot = structure(list(DATE = c(2016L, 2017L, 2018L, 201701L, 201702L,
201703L, 201704L, 201801L, 201802L, 201803L, 201804L, 201801L,
201802L, 201803L, 201804L, 201805L, 201806L, 201807L, 201808L,
201809L, 201810L, 201811L, 201812L, 201844L, 201845L, 201846L,
201847L, 201848L, 201849L, 201850L), Var1 = c(6.64, 6.21, 6.53,
6.31, 6.01, 6.36, 6.17, 6.76, 6.37, 6.68, 6.27, 7.5, 6.49, 6.4,
6.54, 6.18, 6.37, 5.98, 6.37, 7.48, 6.6, 5.97, 6.25, 5.42, 6.18,
5.81, 6.46, 6.36, 6.05, 6.35), Var2 = c(2.38, 2.25, 2.36, 2.22,
2.52, 1.98, 2.27, 2.44, 2.31, 2.27, 2.41, 2.53, 2.25, 2.51, 2.35,
2.42, 2.19, 2.51, 1.91, 2.38, 2.34, 2.29, 2.68, 2.15, 1.89, 2.6,
2.52, 2.37, 2.97, 2.71), Var3 = c(4.26, 3.96, 4.17, 4.09, 3.5,
4.38, 3.9, 4.32, 4.06, 4.4, 3.86, 4.96, 4.23, 3.9, 4.19, 3.77,
4.18, 3.47, 4.46, 5.1, 4.26, 3.68, 3.57, 3.27, 4.29, 3.2, 3.95,
3.99, 3.09, 3.64), Var4 = c(35.84, 36.17, 36.08, 35.2, 41.86,
31.17, 36.76, 36.07, 36.27, 34.07, 38.43, 33.78, 34.76, 39.18,
35.95, 39.07, 34.35, 42.04, 29.91, 31.8, 35.48, 38.38, 42.86,
39.72, 30.53, 44.85, 38.94, 37.24, 48.98, 42.63), Var5 = c("Y",
"Y", "Y", "Q", "Q", "Q", "Q", "Q", "Q", "Q", "Q", "M", "M", "M",
"M", "M", "M", "M", "M", "M", "M", "M", "M", "W", "W", "W", "W",
"W", "W", "W"), Var6 = structure(1:30, .Label = c("2016", "2017",
"2018", "Q1-2017", "Q2-2017", "Q3-2017", "Q4-2017", "Q1-2018",
"Q2-2018", "Q3-2018", "Q4-2018", "M01-2018", "M02-2018", "M03-2018",
"M04-2018", "M05-2018", "M06-2018", "M07-2018", "M08-2018", "M09-2018",
"M10-2018", "M11-2018", "M12-2018", "W44-2018", "W45-2018", "W46-2018",
"W47-2018", "W48-2018", "W49-2018", "W50-2018"), class = "factor"),
Var7 = c(7.1, 6.7, 6.7, 6.7, 6.7, 6.6, 6.6, 6.7, 6.7, 6.6,
6.6, 6.7, 6.7, 6.7, 6.7, 6.7, 6.6, 6.6, 6.6, 6.6, 6.6, 6.6,
6.6, 6.6, 6.6, 6.6, 6.6, 6.6, 6.6, 6.6)), .Names = c("DATE",
"Var1", "Var2", "Var3", "Var4", "Var5", "Var6", "Var7"), row.names = c(NA,
30L), class = "data.frame")
r8_plot$Var6 = factor(r8_plot$Var6, labels = unique(r8_plot$Var6), levels=unique(r8_plot$Var6))
library(plotly)
r8_plot %>% plot_ly(x = ~Var6) %>%
add_bars(y = ~Var1,marker = list(color = '#00802b'),
name = "Var1") %>%
add_bars(y = ~Var2,marker = list(color = '#ff9933'),
name = "Var2")%>%
add_lines(y = ~Var4,
name = "Var4",
yaxis = "y2", line = list(color = '#1a1aff'))%>%
add_lines(y = ~Var7,
name = "Var7",
yaxis = "y1")%>%
layout(barmode = "stack",
yaxis2 = list(overlaying = "y",side = "right"),barmode = "stack",xaxis = list(title = 'DATE'), yaxis = list(title = 'All quantity'), title ="Chart") %>% layout(height = 750, width = 1000, hovermode = 'closest',margin = list(b = 115))
Thanks in advance
You can add NaN y-values to any data frame and Plotly will interrupt the plot at this position.
For example
library(plotly)
data = data.frame(list(x = c(1, 2, NaN, 3, 4),
y = c(1, 2, NaN, 3, 4)))
plot_ly(data, x = ~x) %>%
add_lines(y = ~y)
will give you
For more complex data, like in the question, a function introducing the empty rows might be easier than doing it manually.
split_by_date <- function(data) {
data_length <- length(data[,1])
index <- 0
new_data <- data
new_line <- list(replicate(length(data), NaN))
for (i in 2:length(data$DATE)) {
if (substr(toString(data$Var6[[i]]), 1, 1) != substr(toString(data$Var6[[i - 1]]), 1, 1)) {
new_data <- rbind.data.frame(new_data[1:i + index - 1,], new_line[[1]], data[i:data_length,])
new_data$Var6[[index + i]] <- paste(replicate(index + 1, " "), collapse = " ")
index <- index + 1
rownames(new_data) <- 1:as.integer(data_length + index)
}
}
return(new_data)
}
We just need to make sure that the x-value is always unique, i.e. simply concatenate an increasing number of spaces. Otherwise we'll just get a single break in the graph.
Also another row x is introduced to help with plotting the x-values in the correct order.
r8_plot = structure(list(DATE = c(2016L, 2017L, 2018L, 201701L, 201702L, 201703L, 201704L, 201801L, 201802L, 201803L, 201804L, 201801L, 201802L, 201803L, 201804L, 201805L, 201806L, 201807L, 201808L, 201809L, 201810L, 201811L, 201812L, 201844L, 201845L, 201846L, 201847L, 201848L, 201849L, 201850L),
Var1 = c(6.64, 6.21, 6.53, 6.31, 6.01, 6.36, 6.17, 6.76, 6.37, 6.68, 6.27, 7.5, 6.49, 6.4, 6.54, 6.18, 6.37, 5.98, 6.37, 7.48, 6.6, 5.97, 6.25, 5.42, 6.18, 5.81, 6.46, 6.36, 6.05, 6.35),
Var2 = c(2.38, 2.25, 2.36, 2.22, 2.52, 1.98, 2.27, 2.44, 2.31, 2.27, 2.41, 2.53, 2.25, 2.51, 2.35, 2.42, 2.19, 2.51, 1.91, 2.38, 2.34, 2.29, 2.68, 2.15, 1.89, 2.6, 2.52, 2.37, 2.97, 2.71),
Var3 = c(4.26, 3.96, 4.17, 4.09, 3.5, 4.38, 3.9, 4.32, 4.06, 4.4, 3.86, 4.96, 4.23, 3.9, 4.19, 3.77, 4.18, 3.47, 4.46, 5.1, 4.26, 3.68, 3.57, 3.27, 4.29, 3.2, 3.95, 3.99, 3.09, 3.64),
Var4 = c(35.84, 36.17, 36.08, 35.2, 41.86, 31.17, 36.76, 36.07, 36.27, 34.07, 38.43, 33.78, 34.76, 39.18, 35.95, 39.07, 34.35, 42.04, 29.91, 31.8, 35.48, 38.38, 42.86, 39.72, 30.53, 44.85, 38.94, 37.24, 48.98, 42.63),
Var5 = c("Y", "Y", "Y", "Q", "Q", "Q", "Q", "Q", "Q", "Q", "Q", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "W", "W", "W", "W", "W", "W", "W"),
Var6 = c("2016", "2017", "2018", "Q1-2017", "Q2-2017", "Q3-2017", "Q4-2017", "Q1-2018", "Q2-2018", "Q3-2018", "Q4-2018", "M01-2018", "M02-2018", "M03-2018","M04-2018", "M05-2018", "M06-2018", "M07-2018", "M08-2018", "M09-2018", "M10-2018", "M11-2018", "M12-2018", "W44-2018", "W45-2018", "W46-2018", "W47-2018", "W48-2018", "W49-2018", "W50-2018"),
Var7 = c(7.1, 6.7, 6.7, 6.7, 6.7, 6.6, 6.6, 6.7, 6.7, 6.6, 6.6, 6.7, 6.7, 6.7, 6.7, 6.7, 6.6, 6.6, 6.6, 6.6, 6.6, 6.6,6.6, 6.6, 6.6, 6.6, 6.6, 6.6, 6.6, 6.6)),
.Names = c("DATE", "Var1", "Var2", "Var3", "Var4", "Var5", "Var6", "Var7"), row.names = c(NA, 30L), class = "data.frame")
plot <- split_by_date(r8_plot)
plot$x <- structure(1:length(plot$Var6), .Label = plot$Var6, class = "factor")
plot %>% plot_ly(x = ~x, height = 750, width = 1000) %>%
add_bars(y = ~Var1,
marker = list(color = '#00802b'),
name = "Var1") %>%
add_bars(y = ~Var2,
marker = list(color = '#ff9933'),
name = "Var2") %>%
add_lines(y = ~Var4,
name = "Var4",
yaxis = "y2",
line = list(color = '#1a1aff')) %>%
add_lines(y = ~Var7,
name = "Var7",
yaxis = "y1") %>%
layout(barmode = "stack",
xaxis = list(title = 'DATE', range = c(-0.1, 10)),
yaxis = list(title = 'All quantity'),
yaxis2 = list(overlaying = "y",
side = "right"),
title ="Chart",
hovermode = 'closest')

Problem with ggplot: labels and error bars overlap

I made a barplot with error bars and labels written on the bars.
My problem is: I want the labels to appear on the bars and also next to the error bars. That is, I don't want labels and error bars to overlap.
An example with my code:
i <- data.frame(
nbr =c(15.18 ,11.53 ,13.37 ,9.2, 10.9, 12.23 ,9.53, 9.81, 7.86, 12.79,
22.03 ,17.64 ,18.1, 16.78 ,17.53 ,16.97 ,17.76 ,18.35 ,12.82 ,20.91,
22.09 ,19.18 ,17.54 ,18.45 ,19.83 ,16.99 ,19.69 ,19.45 ,13.07 ,21.41,
12.13 ,9.76, 10.79 ,10.74 ,12.43 ,9.65, 12.18 ,11.63 ,6.74, 12.31,
17.5, 14.75 ,15.2, 13.89 ,15.24 ,17.43 ,15.22 ,14.04,9.49, 15.86,
8.09, 5.86, 6.68, 7.34, 8.01, 6.35, 8.4, 7.4, 3.88, 6.92 ),
SD = c(4.46, 4.19, 2.27, 2.19, 5.10, 7.25, 8.42, 6.47, 6.04, 7.48, 6.38, 6.05, 3.58, 3.85,
6.94, 6.87, 6.32, 4.28, 4.10, 7.34, 7.46, 6.62, 4.28, 5.24, 8.00, 8.10, 7.73, 5.18,
5.53, 7.96, 7.46, 7.05, 4.47, 4.73, 8.15, 6.95, 5.88, 3.20, 4.01, 7.34, 7.24, 6.98,
5.98, 4.53, 4.22, 7.21, 4.02, 4.30, 1.96, 2.11, 4.98, 7.16, 8.45, 6.39, 6.20, 7.03,
6.10, 6.42, 3.77, 3.53),
x2=rep(c("a", "b", "c", "d", "e", "f", "g",
"h", "i", "j"),6),
s = c(rep(c(rep(c("3"),10),
rep(c("4"),10),
rep(c("5"),10),
rep(c("6"),10),
rep(c("7"),10),
rep(c("8"),10)),1)))
ii <- i[order(i$s, i$nbr ), ]
sn <- factor(x = 1:60, labels = ii$x2)
ii$sn <- sn
scale_x_reordered <- function(..., sep = "___") {
reg <- paste0(sep, ".+$")
ggplot2::scale_x_discrete(labels = function(x) gsub(reg, "", x), ...)
}
reorder_within <- function(x, by, within, fun = mean, sep = "___", ...) {
new_x <- paste(x, within, sep = sep)
stats::reorder(new_x, by, FUN = fun)
}
dummy2 <- data.frame(s = levels(i$s)[-1], Z = c( 4,16,16,8,4))
dummy2$s <- factor(dummy2$s)
ggplot(ii, aes(reorder_within(sn, nbr, s), nbr,
label =x2)) +
geom_bar(stat = 'identity') +
geom_text(aes(y = 0,fontface=2), angle = 90, hjust = -.05, size = 4)+
scale_x_reordered() +
facet_wrap(.~ s, scales = "free_x", ncol=2)+
#geom_text(aes(label=nbr), vjust=1.6, color="white", size=3.5)+
theme(axis.text.x = element_blank(),
axis.title=element_text(size=16),
axis.text=element_text(face = "bold"),
strip.text.x = element_text(size = 14,face="bold")
)+ geom_errorbar(aes(reorder_within(sn, nbr, s),ymin=nbr-SD, ymax=nbr+SD), width=.2, position=position_dodge(.9))
Example of expected parcel:
I want all the labels to be written next to the error bars on the bars.
Thanks for your help !
I found this solution and wanted to share it with you:
geom_text(aes(y = 0,fontface=2), angle = 90, vjust = -1, hjust = -.05, size = 4)

Resources