Attached point in ggplot - r

I want to connect these different points by geom_line. It does not work. Can someone help me identify the problem?
df_st <- cbind(ID = c("ID_201","ID_202","ID_203","ID_204","ID_205","ID_206","ID_207","ID_208",
"ID_209","ID_210","ID_211","ID_212"),
PARAM_1 = c(48.4,17.6,19.2,23.6,23.7,17.8,16.5,18.2,17.6,19.7,14.3,15.7),
PARAM_2 = c(14.615,8.06,7.83,10.81,10.635,9.44,7.54,8.86,6.855,8.68,7.36,6.695),
PARAM_3 = c(19.8,10.3,10.2,13.6,13.8,11.9,9.4,11.2,8.9,11.3,9.0,9.0)) %>% data.frame
df_st <- df_st %>%
mutate_at(vars(-ID), as.character)
df_st <- df_st %>%
mutate_at(vars(-ID), as.numeric)
df_st_g <- df_st %>%
dplyr::select(ID, PARAM_1,PARAM_2, PARAM_3) %>%
gather(key = "variable", value = "value",PARAM_1:PARAM_3)
ggplot(df_st_g, aes(x = ID, y = value)) +
geom_point(aes(color = variable)) +
theme_classic() ```

Do you mean something like this:
library(tidyverse)
library(tidyr)
#Data
df_st <- structure(list(ID = c("ID_201", "ID_202", "ID_203", "ID_204",
"ID_205", "ID_206", "ID_207", "ID_208", "ID_209", "ID_210", "ID_211",
"ID_212"), PARAM_1 = c(48.4, 17.6, 19.2, 23.6, 23.7, 17.8, 16.5,
18.2, 17.6, 19.7, 14.3, 15.7), PARAM_2 = c(14.615, 8.06, 7.83,
10.81, 10.635, 9.44, 7.54, 8.86, 6.855, 8.68, 7.36, 6.695), PARAM_3 = c(19.8,
10.3, 10.2, 13.6, 13.8, 11.9, 9.4, 11.2, 8.9, 11.3, 9, 9)), row.names = c(NA,
-12L), class = "data.frame")
df2 <- pivot_longer(df_st,cols = names(df_st)[-1])
#Plot
ggplot(df2, aes(x = ID, y = value,group=name,color=name)) +
geom_point() +
geom_line() +
theme_classic()

Related

Creating a graph with multiple X axis values

I have a graph in Excel that I'd like to replicate in R if it's even possible. I am new to R, so any guidance will be appreciated.
So my data looks like this
I can include the file if anyone wants it.
Then I have this graph:
I'd like to plot the same graph as in the Excel file but using R. So, is there a way to have a kind of subset for the x-axis values that belong to the main value?
I looked through the ggplot documentation and How to plot side-by-side with sub labels and without space between subplots, but to no avail.
You can use either geom_bar(position = "dodge") or facet_wrap() to achieve your desired results. Please note that you'll need to name all your variables before plotting as it looks like the first two columns of your dataframe do not have names.
library(tidyverse)
data(mtcars)
# make a nested dataframe for example purposes
df <- mtcars %>%
rownames_to_column(var = "rowname") %>%
select(c(1:5)) %>%
pivot_longer(cols = -c(rowname)) %>%
head(n = 20)
ggplot(df, aes(x = name, y = value, fill = name)) +
geom_bar(stat = "identity") +
facet_wrap(~rowname, nrow = 1) # use facet_wrap to display nestedness
ggplot(df, aes(x = rowname, y = value, fill = name)) +
geom_bar(position = "dodge", stat = "identity")
This can be helpful
library(tidyr)
library(ggplot2)
df %>%
pivot_longer(ACI:SB) %>%
mutate(across(where(is.character), as.factor)) %>%
ggplot(aes(x = R, y = value, fill=name))+
geom_bar(stat="identity", position = "dodge", width=0.75)+
facet_wrap(~A, nrow=1, strip.position="bottom") +
theme(legend.position = "bottom") +
labs(fill="", y="", x="")
Produces:
If you want "to speak R with Excel accent" and convert this nice plot into a default excel plot, then you can add at the end of the plot theme_excel_new() from ggtheme package
library(ggthemes)
... +
theme_excel_new()
It'll give the following plot
Sample data:
structure(list(A = c(25, 25, 25, 50, 50, 50, 100, 100, 100, 250,
250, 250), R = c("R1", "R2", "R3", "R1", "R2", "R3", "R1", "R2",
"R3", "R1", "R2", "R3"), ACI = c(2.94, 1.91, 8.86, 5.03, 8.77,
1.89, 7.58, 7.24, 9.44, 5.48, 7.12, 3.89), PB = c(1.01, 9.27,
2.83, 5.91, 1.1, 8.41, 3.18, 7.83, 2.68, 2.19, 5.17, 2.69), NB = c(1.81,
5.19, 5.63, 1.29, 2.56, 7.18, 9.61, 1, 7.63, 9.48, 8.19, 3.08
), Bca = c(6.5, 9.53, 9.54, 3.4, 2.62, 1.65, 3.22, 5.1, 9.24,
5.11, 2.58, 0.46), SB = c(4.18, 8.54, 3.47, 1.31, 3.74, 6.31,
3.9, 6.9, 6.89, 5.55, 4.3, 4.53), `round(2)` = c(2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2)), class = "data.frame", row.names = c(NA,
-12L))

How to connect points according to grouping instead of connecting all points in ggplot?

As the title stated, I want to connect points in every group instead of all points.
Here is the original date:
df<-structure(list(TN = c(13.6, 18, 18.5, 17, 16.9, 13.6, 17.6, 14.8,
14, 11, 12.6, 18.6, 18.8, 18.3, 19.4, 18.5, 18.9, 22, 22.3),
TX = c(29.9, 26.9, 30.5, 26.6, 25.4, 29.7, 24.1, 21.1, 23.8,
29.3, 34.4, 31.1, 32, 35.9, 36.7, 37.5, 39.2, 34.8, 33.6),
TM = c(22.5, 21.4, 23.3, 21.4, 20.2, 21.4, 19.9, 17.8, 18.9,
20.9, 24.5, 24.5, 25.1, 27.3, 28.2, 28.5, 29.2, 28.2, 26.8
), Date = c("01/06/2022", "02/06/2022", "03/06/2022", "04/06/2022",
"05/06/2022", "06/06/2022", "07/06/2022", "08/06/2022", "09/06/2022",
"10/06/2022", "11/06/2022", "12/06/2022", "13/06/2022", "14/06/2022",
"15/06/2022", "16/06/2022", "17/06/2022", "18/06/2022", "19/06/2022"
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-19L))
Here is my code:
library(ggplot2)
library(tidyr)
library(dplyr)
df %>% select(Date, TN, TX) %>%
pivot_longer(cols = c(TN,TX), names_to = "Tcombine", values_to = "Value") %>%
ggplot(aes(Date, Value,group = 1,shape=Tcombine,color=Tcombine)) +
geom_point()+
geom_line()+
theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.title.x=element_blank())
I want the points of the two groups (two colors) to be connected separately as the date changes, but I don't know why all the points are connected?
Here is final graph I got:
Any suggestions are welcome! Thank you in adavance!
Add group=Tcombine.
df %>% select(Date, TN, TX) %>%
pivot_longer(cols = c(TN,TX), names_to = "Tcombine", values_to = "Value") %>%
ggplot(aes(Date, Value,group = 1,shape=Tcombine,color=Tcombine)) +
geom_point()+
geom_line(aes(group = Tcombine))+
theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.title.x=element_blank())
Incidentally, while the ordering of your x-axis works here, the moment you get another month it will break. I suggest you convert your Date column to a proper Date-class and add scale_x_date.
df %>%
mutate(Date = as.Date(Date, format = "%d/%m/%Y")) %>%
select(Date, TN, TX) %>%
pivot_longer(cols = c(TN,TX), names_to = "Tcombine", values_to = "Value") %>%
ggplot(aes(Date, Value,group = 1,shape=Tcombine,color=Tcombine)) +
geom_point()+
geom_line(aes(group = Tcombine)) +
scale_x_date(date_breaks = "1 day") +
theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.title.x=element_blank())
While this looks very similar, you have much better control over breaks (e.g., date_breaks = "3 days") and formatting (e.g., date_labels ="%d/%m/%Y" if you really want that formatting of the dates).

How to get rid of annotations on faceted graph?

Problem
I am trying to label the left facet side of my graph while leaving out the annotations on the right side.
Data
Here are my libraries and data:
#### Libraries ####
library(tidyverse)
library(ggpubr)
library(plotly)
#### Dput ####
emlit <- structure(list(X = 1:20, Ethnicity = c("Asian (other than Chinese)",
"Filipino", "Indonesian", "Thai", "Japanese", "Korean", "South Asian",
"Indian", "Nepalese", "Pakistani", "Other South Asian", "Other Asian",
"White", "Mixed", "With Chinese parent", "Other mixed", "Others",
"All ethnic minorities", "All ethnic minorities, excluding\n foreign domestic helpers",
"Whole population"), Age_5.14 = c(65.8, 72.2, 69.4, 83.1, 26.6,
52.4, 67.4, 60.4, 69.5, 71.5, 92.5, 92, 34.8, 76.6, 84.2, 45.3,
51.3, 64.3, 64.3, 94.8), Age_15.24 = c(28.1, 29.2, 4.4, 72.9,
34.8, 50.3, 38.7, 41.4, 22.2, 54.3, 41.9, 64.7, 24.4, 82.9, 90.7,
37.4, 53.2, 40.6, 52.9, 96.9), Age_25.34 = c(4.5, 1.8, 4.6, 20,
17.2, 26.8, 6.6, 4.2, 6.4, 11.9, 12, 33.9, 15, 60.5, 82, 6.7,
11.2, 7.8, 21.8, 84.9), Age_35.44 = c(6.3, 2, 6.1, 35.7, 36.5,
25.5, 9.4, 6.2, 10.5, 10.1, 22.4, 35.7, 8.6, 63, 83.2, 4.5, 12.2,
9.5, 23.4, 84.6), Age_45.54 = c(8.1, 2.3, 8, 23.2, 43.4, 59.6,
7.5, 6.3, 3.9, 13.5, 28.3, 47.5, 13.1, 72.1, 84, 4.4, 22.4, 14.2,
27.7, 92.5), Age_55.64 = c(15.9, 4.4, 44, 27, 41.7, 52.8, 11.8,
7.4, 9.5, 2, 54.2, 39.6, 12.7, 75.3, 80.1, 2.6, 20.6, 25, 32.4,
94.8), Age_65. = c(31.1, 11.9, 82.6, 39, 46.4, 57, 9.5, 3.9,
NA, 11.4, 66.5, 74.5, 14.5, 80.5, 81, 57.5, 13.6, 42.7, 44, 82.3
), Age_Overall = c(10.1, 3.5, 6.4, 31.4, 35.1, 39.8, 20.4, 15.3,
16.4, 33.8, 30.4, 46.3, 15.4, 72.7, 83.9, 19.4, 19.8, 16.9, 35.2,
89.4)), class = "data.frame", row.names = c(NA, -20L))
I have also pivoted the data for my graph:
#### Pivot Data ####
emlitpivot <- emlit %>%
pivot_longer(cols = contains("Age"),
names_to = "Age_Range",
values_to = "Percent")
Plot
Here is my plot so far, a faceted graph that breaks down literacy by age with some notes on some important points on the left:
#### EM vs all ####
# Order
order <- c("5-14", "15-24", "25-34", "35-44", "45-54", "55-64", "65+", "Overall",
"5-14", "15-24", "25-34", "35-44", "45-54", "55-64", "65+", "Overall")
# Plot
plot <- emlitpivot %>%
filter(Ethnicity %in% c("All ethnic minorities",
"Whole population")) %>%
ggbarplot(x="Age_Range",
y="Percent",
fill = "Ethnicity",
label = T,
palette = "jco",
facet.by = "Ethnicity",
title = "EM x Native Chinese Literacy by Age",
xlab = "Age Range",
ylab = "Literacy in Chinese (By Percent)",
caption = "*Data obtained from Census and Statistics Department Hong Kong SAR, 2016.")+
theme_cleveland()+
theme(axis.text.x = element_text(angle = 45,
hjust = .5,
vjust = .5),
legend.position = "none",
plot.caption = element_text(face = "italic"))+
scale_x_discrete(labels=order)+
geom_segment(aes(x = 3, y = 15, xend = 3, yend = 48))+
geom_segment(aes(x = 1, y = 71, xend = 1, yend = 80))+
geom_segment(aes(x = 7, y = 50, xend = 7, yend = 65))+
annotate("text",
x=4,
y=53,
label = "Post-college workers can't read.")+
annotate("text",
x=3.5,
y=85,
label = "School age supports seem to boost initial literacy.")+
annotate("text",
x=6,
y=70,
label = "Increase due to generational literacy?")
# Print plot:
plot
However, you can probably guess what the problem is:
How do I get rid of the annotations on the right? I'm not sure if there is a simple way of getting rid of them, but it would be helpful to only have text on the left side.
In this case, I'll use geom_text instead of annotate, since it allows you to have subset of your data.
library(tidyverse)
library(ggpubr)
emlitpivot %>%
filter(Ethnicity %in% c(
"All ethnic minorities",
"Whole population"
)) %>%
ggbarplot(
x = "Age_Range",
y = "Percent",
fill = "Ethnicity",
label = T,
palette = "jco",
facet.by = "Ethnicity",
title = "EM x Native Chinese Literacy by Age",
xlab = "Age Range",
ylab = "Literacy in Chinese (By Percent)",
caption = "*Data obtained from Census and Statistics Department Hong Kong SAR, 2016."
) +
theme_cleveland() +
theme(
axis.text.x = element_text(
angle = 45,
hjust = .5,
vjust = .5
),
legend.position = "none",
plot.caption = element_text(face = "italic")
) +
scale_x_discrete(labels = order) +
geom_segment(data = subset(emlitpivot, Ethnicity == "All ethnic minorities"), aes(x = 3, y = 15, xend = 3, yend = 48)) +
geom_segment(data = subset(emlitpivot, Ethnicity == "All ethnic minorities"), aes(x = 1, y = 71, xend = 1, yend = 80)) +
geom_segment(data = subset(emlitpivot, Ethnicity == "All ethnic minorities"), aes(x = 7, y = 50, xend = 7, yend = 65)) +
geom_text(data = subset(emlitpivot, Ethnicity == "All ethnic minorities"), aes(4, 53), label = "Post-college workers can't read.", check_overlap = T) +
geom_text(data = subset(emlitpivot, Ethnicity == "All ethnic minorities"), aes(3.5, 85), label = "School age supports seem to boost initial literacy.", check_overlap = T) +
geom_text(data = subset(emlitpivot, Ethnicity == "All ethnic minorities"), aes(6, 70), label = "Increase due to generational literacy?", check_overlap = T)
Update remove lines in second facet:
Create a dataframe with your text labels and position and add it to the plot,
to remove the lines do the same procedure:
df for text = ann_text
df for lines = segm
ann_text <- data.frame(x = c(4, 3.5, 6),
y = c(53, 85, 70),
lab = c("Post-college workers can't read.", "School age supports seem to boost initial literacy.",
"Increase due to generational literacy?"),
Ethnicity = rep("All ethnic minorities", 3))
segm <- data.frame(x = c(3,1,7),
y = c(15, 71, 50),
xend = c(3,1,7),
yend = c(48,80,65),
Ethnicity = rep("All ethnic minorities", 3))
plot1 <- plot +
geom_text(
data = ann_text,
mapping = aes(x = x, y = y, label = lab)
)
plot1 + geom_segment(
data = segm,
mapping = aes(x = x, y = y, xend = xend, yend = yend)
)
remove the following from your code:
annotate("text",
x=4,
y=53,
label = "Post-college workers can't read.")+
annotate("text",
x=3.5,
y=85,
label = "School age supports seem to boost initial literacy.")+
annotate("text",
x=6,
y=70,
label = "Increase due to generational literacy?")

How to pass multiple list of lists as arguments to mapply function in R

Given three lists of lists (avg_errors, year_months, dfs) as follows:
avg_errors <- list(0.7, 0.92, 2.35)
Out:
[[1]]
[1] 0.7
[[2]]
[1] 0.92
[[3]]
[1] 2.35
and:
year_months <- list(list('2021-12'), list('2021-11'), list('2021-10'))
and data dfs:
dfs <- list(structure(list(id = c("M01", "M02", "S01"), `2021-10(actual)` = c(8.9,
15.7, 5.3), `2021-11(actual)` = c(7.3, 14.8, 3.1), `2021-12(actual)` = c(6.1,
14.2, 3.5), `2021-12(pred)` = c(6.113631596, 14.16243166, 3.288372517
), `2021-12(error)` = c(0.719846116, 0.154463985, 1.225992602
), mean_col = c(7.43333333333333, 14.9, 3.96666666666667), act_direction = c(-1.2,
-0.600000000000001, 0.4), pred_direction = c(-1.186368404, -0.637568340000001,
0.188372517)), row.names = c(NA, -3L), class = "data.frame"),
structure(list(id = c("M01", "M02", "S01"), `2021-09(actual)` = c(10.3,
17.3, 6.4), `2021-10(actual)` = c(8.9, 15.7, 5.3), `2021-11(actual)` = c(7.3,
14.8, 3.1), `2021-11(pred)` = c(8.352097939, 13.97318204,
3.164682627), `2021-11(error)` = c(1.998109138, 0.414373304,
0.342072615), mean_col = c(8.83333333333333, 15.9333333333333,
4.93333333333333), act_direction = c(-1.6, -0.899999999999999,
-2.2), pred_direction = c(-0.547902061, -1.72681796, -2.135317373
)), row.names = c(NA, -3L), class = "data.frame"), structure(list(
id = c("M01", "M02", "S01"), `2021-08(actual)` = c(12.6,
19.2, 8.3), `2021-09(actual)` = c(10.3, 17.3, 6.4), `2021-10(actual)` = c(8.9,
15.7, 5.3), `2021-10(pred)` = c(9.619846116, 15.54553601,
6.525992602), `2021-10(error)` = c(0.945567783, 4.883250027,
1.215819585), mean_col = c(10.6, 17.4, 6.66666666666667
), act_direction = c(-1.4, -1.6, -1.1), pred_direction = c(-0.680153884000001,
-1.75446399, 0.125992601999999)), row.names = c(NA, -3L
), class = "data.frame"))
How could I pass these variables as arguments to the function Plotting() as follows:
library(gt)
library(tidyverse)
Plotting <- function(data, year_months, errors){
for (current_month in year_months){
for (avg_error in errors){
p <- data %>%
gt() %>%
tab_header(
title = gt::html(glue("<span style='color:red'>data for {current_month}</span>")),
subtitle = md(paste0("The average error for this month is: <em>", avg_error, '</em>.'))
)
return (p)
}
}
}
mapply(Plotting, dfs, year_months, errors)
The current code gives me: Error in (function (data, errors) : argument no use (dots[[3]][[1]]).
How could call the function and pass arguments? Thanks for your help at advance.
I'm not sure if this is your desired outcome, but my guess is, that you don't need the for-loop's inside the Plotting function, since your are already looping over your input with mapply.
library(gt)
library(tidyverse)
library(glue)
Plotting <- function(data, year_months, errors){
p <- data %>%
gt() %>%
tab_header(
title = gt::html(glue("<span style='color:red'>data for {year_months}</span>")),
subtitle = md(paste0("The average error for this month is: <em>", errors, '</em>.'))
)
print(p)
}
mapply(Plotting, dfs, year_months, avg_errors)
Created on 2022-01-23 by the reprex package (v0.3.0)

How to make hierarchical cluster pheatmap in r?

I have use this code to make hierarchical cluster heatmap but no color is coming
library(tidyverse)
Mydata <- structure(list(Location = c("Karnaphuli River", "Sangu River", "Kutubdia Channel", "Moheshkhali Channel", "Bakkhali River", "Naf River", "St. Martin's Island", "Mean "), Cr = c(114.92, 2.75, 18.88, 27.6, 39.5, 12.8, 17.45, 33.41), Pb = c(31.29, 26.42, 52.3, 59.45, 34.65, 12.8, 9.5, 32.34), Cu = c(9.48, 54.39, 52.4, 73.28, 76.26, 19.48, 8.94, 42.03), Zn = c(66.2, 71.17, 98.7, 95.3, 127.84, 27.76, 21.78, 72.67), As = c(89.67, 9.85, 8.82, 18.54, 15.38, 7.55, 16.45, 23.75), Cd = c(1.06, 0, 0.96, 2.78, 3.12, 0.79, 0.45, 1.53)), class = "data.frame", row.names = c(NA, -8L))
library(pheatmap)
Mydata %>% column_to_rownames(var = "Location") %>%
as.matrix() %>% pheatmap(Mydata, cutree_cols = 6)
You don't need to pass data again when using pipes. Try :
library(pheatmap)
Mydata %>%
column_to_rownames(var = "Location") %>%
as.matrix() %>% pheatmap(cutree_cols = 6)

Resources