I am working on trying to plot a bubble chart with multiple data variables on the same chart. I will try my best to describe what I would like the final output to appear and attempts from reading online and some questions posted on the forum.
I am just getting familiar with ggplot but if there is solution with another package, I am open to it.
C1 within circles represents color 1, C2 within squares represents color 2, C3 within triangles represents color 3.
I would think these will be 8 independent panels since the y-axis is different for each one.
Really appreciate the help.
The final outputs I would prefer is bubbles that have different size and color. I think individual panels of 8 x 3 would be ideal because the y axis for each panel can be changed. But I cannot figure out how to structure the data to allow for creating 8 x 3 panels with individual y axes and the symbol size. Thanks Closest I came across: R ggplot bubble chart localised bubbles display without in single chart
Preferred output:
library (tidyverse)
library (reshape2)
library(ggplot2)
data.tb <- structure(list(Name = structure(c(7L, 8L, 9L, 1L, 10L, 2L, 11L,
3L, 12L, 13L, 4L, 14L, 5L, 15L, 6L), .Label = c("avg_row3", "avg_row4",
"avg_row5", "avg_row6_7", "avg_row8", "avg_row9", "row1", "row2",
"row3", "row4", "row5", "row6", "row7", "row8", "row9"), class = "factor"),
col1 = c(6333, 8847, 1495292, 169, 28994.1, 3.3, 12857.6,
1.5, 107154, 230344, 38.15, 837364, 132.8, 1226140, 176.74
), col2 = c(20347, 40594, 6229886, 153.5, 122769.8, 3, 44653.4,
1.1, 362972, 944725, 32.21, 3488736, 118.16, 5108506, 158.06
), total_col1_2 = c(23301, 49441, 7725178, 156.3, 151763.9,
3.1, 57511, 1.2, 470126, 1175069, 33.28, 4326100, 120.78,
6334646, 161.4), col3 = c(3313, 4668, 751824.1, 161.1, 14689.2,
3.2, 6784.2, 1.5, 107154, 230344, 72.3, 421021, 162.49, 616496,
204.37), col4 = c(10220, 20940, 3053539.5, 145.8, 60675.8,
2.9, 23034, 1.1, 362972, 944725, 62.45, 1709982, 144.11,
2503902, 182.02), total_col3_4 = c(13533, 25608, 3805363.6,
148.6, 75365, 2.9, 29818.2, 1.2, 470126, 1175069, 64.25,
2131004, 147.46, 3120398, 186.1), col5 = c(3020, 4179, 743468.1,
177.9, 14304.9, 3.4, 6073.5, 1.5, 0, 0, 0, 416342, 99.63,
609644, 145.88), col6 = c(10127, 19654, 3176346.3, 161.6,
62094, 3.2, 21619.4, 1.1, 0, 0, 0, 1778754, 90.5, 2604604,
132.52), total_col5_6 = c(13147, 23833, 3919814, 164.5, 76398.9,
3.2, 27692.9, 1.2, 0, 0, 0, 2195096, 92.1, 3214248, 134.87
)), class = "data.frame", row.names = c(NA, -15L))
data_long.tb <- melt (data.tb, id.vars = c("Name"))
data_long.tb <- data_long.tb %>% mutate(group_num =
case_when(
variable %in% c("col1", "col2", "total_col1_2") ~ "group1",
variable %in% c("col3", "col4", "total_col3_4") ~ "group2",
variable %in% c("col5", "col6", "total_col5_6") ~ "group3",
))
My attempts:
theme_set(theme_bw()) # pre-set the bw theme.
ggplot(data_long.tb,
aes(variable, value)) +
geom_jitter (aes(col=variable, size=value))
theme_set(theme_bw()) # pre-set the bw theme.
ggplot(data_long.tb,
aes(variable, value)) +
geom_jitter (aes(col=variable, size=value)) + facet_wrap(~ group_num)
Not sure this is quite it but hopefully closer.
library(tidyverse)
df %>%
pivot_longer(-Name) %>% # reshape longer for ggplot
mutate(Name = Name %>% fct_inorder %>% fct_rev) %>% # define order of Name,
# reversed so first is at bottom.
group_by(Name) %>% # within each Name...
mutate(x_pos = row_number(), # x_pos counts up in order...
shape = case_when(name %in% c("col1", "col2", "total_col1_2") ~ "circle",
name %in% c("col3", "col4", "total_col3_4") ~ "square",
TRUE ~ "triangle")) %>% # and name determines shape
ungroup() %>%
ggplot(aes(x_pos, Name, size = value, shape = shape, color = shape)) +
geom_point() +
scale_size_area() +
scale_color_manual(values = c("circle" = "red", "square" = "forestgreen",
"triangle" = "purple")) +
scale_shape_manual(values = c("circle" = 19, "square" = 15, "triangle" = 17)) +
theme_minimal()
Sample data
df <- structure(list(Name = c("row1", "row2", "row3", "avg_row3", "row4",
"avg_row4", "row5", "avg_row5", "row6", "row7", "avg_row6_7",
"row8", "avg_row8", "row9", "avg_row9"), col1 = c(6333, 8847,
1495292, 169, 28994.1, 3.3, 12857.6, 1.5, 107154, 230344, 38.15,
837364, 132.8, 1226140, 176.74), col2 = c(20347, 40594, 6229886,
153.5, 122769.8, 3, 44653.4, 1.1, 362972, 944725, 32.21, 3488736,
118.16, 5108506, 158.06), total_col1_2 = c(23301, 49441, 7725178,
156.3, 151763.9, 3.1, 57511, 1.2, 470126, 1175069, 33.28, 4326100,
120.78, 6334646, 161.4), col3 = c(3313, 4668, 751824.1, 161.1,
14689.2, 3.2, 6784.2, 1.5, 107154, 230344, 72.3, 421021, 162.49,
616496, 204.37), col4 = c(10220, 20940, 3053539.5, 145.8, 60675.8,
2.9, 23034, 1.1, 362972, 944725, 62.45, 1709982, 144.11, 2503902,
182.02), total_col3_4 = c(13533, 25608, 3805363.6, 148.6, 75365,
2.9, 29818.2, 1.2, 470126, 1175069, 64.25, 2131004, 147.46, 3120398,
186.1), col5 = c(3020, 4179, 743468.1, 177.9, 14304.9, 3.4, 6073.5,
1.5, 0, 0, 0, 416342, 99.63, 609644, 145.88), col6 = c(10127,
19654, 3176346.3, 161.6, 62094, 3.2, 21619.4, 1.1, 0, 0, 0, 1778754,
90.5, 2604604, 132.52), total_col5_6 = c(13147, 23833, 3919814,
164.5, 76398.9, 3.2, 27692.9, 1.2, 0, 0, 0, 2195096, 92.1, 3214248,
134.87)), row.names = c(NA, -15L), class = c("tbl_df", "tbl",
"data.frame"))
Related
I've been learning R for the past few months and I've struggled with something that I couldn't figure out.
I have a really simple question, how do I display percentiles 20 and 80 instead of 25 and 75 (or Q1/Q3) in a boxplot while using tidyverse?
I have tried to find documentation about it in the R Graph Gallery and in the tidyverse help and a lot other sites, but I couldn't reproduce. Usually the examples are with only 1 box, but I have a 7 to be shown.
Here is a sample of my data:
dataset <- structure(
list(
PM1 = c(0.4, 6.2, 5.1, 7.8, 8, NA, NA, 5.2),
PM2 = c(2, 8, 5.6, 8, NA, 6.4, 10.3, 7),
PM3 = c(NA, 7.2, 4.8, 4.4, NA, NA, 10.3, 5.9),
PM4 = c(1.2, 8.7, 5.4, NA, NA, NA, NA, NA),
PM5 = c(3.5, NA, 1.9, 2.2, NA, 3.5, 9.4, 0.3),
PM6 = c(1.3, NA, 1.1, NA, NA, 2.8, NA, NA),
PM7 = c(NA, NA, NA, 0.4, NA, NA, 8.8, 0.6)),
row.names = c(NA, -8L),
class = c("tbl_df", "tbl", "data.frame")
)
I can make the boxplot with this different quantiles using qboxplot, here's the code that I used:
library(qboxplot)
dataset %>%
qboxplot(
main = "Dissolved Oxygen",
probs = c(0.20, 0.50, 0.80),
ylim = c(0, 12),
ylab = "mg/L",
xlab = "Monitoring Points"
)
I have searched for something similar to probs = c(0.20, 0.50, 0.80) from the qboxplot package in the ggplot2 but I found different approaches that I couldn't reproduce, like here, here and here.
library(tidyverse)
dataset %>%
pivot_longer(
cols = everything(),
names_to = "monitoring_point",
values_to = "oxigenio_dissolvido"
) %>%
ggplot(
aes(x = monitoring_point,
y = oxigenio_dissolvido)
)+
stat_boxplot(
geom = "errorbar",
width = 0.3,
position = position_dodge(width = 0.65)
)+
geom_boxplot()+
labs(title = "Dissolved Oxygen",
y = "oxigenio_dissolvido (mg/L)")+
scale_y_continuous(
expand = expansion(mult = c(0,0)),
limits = c(0, 12)
)+
theme_bw()+
theme(
plot.title = element_text(hjust = 0.5)
)
I think I'm close to my desired output, but I really didn't get how to change the hinges. Thank you very much in advance for helping me!
Define the function to draw each element of the box plot:
f <- function(x) {
r <- quantile(x, probs = c(0.1, 0.2, 0.5, 0.8, 0.9))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
return(r)
}
Then plot it as a stat_summary()
dataset %>%
pivot_longer(
cols = everything(),
names_to = "monitoring_point",
values_to = "oxigenio_dissolvido"
) %>%
ggplot(aes(monitoring_point,
oxigenio_dissolvido)) +
geom_boxplot() +
stat_summary(fun.data = f, geom="boxplot")
Code modified from a previous related question
Below is the example data which is list containing different data frames. I want to get one data frame out of it based on following two conditions.
First:
For each data frame in the list starting column 1 keep rbind()ing columns that have exact same column name as the previous one. The moment a different column name is encounter, drop that and all the columns till the last one.
For example: If column 1 is named Banana, then column 2 is named Banana, but column 3 is Orange and then again column 4 is Banana. Then column 1 and 2 will rbind() and column 3 and 4 will be dropped.
Another example: If column 1 is named Banana then column 2 is named Orange, but column 3 is named Banana, then only column 1 will survive as starting column 2 the column name is different and I don't care about column 3 name even though it's same as column 1.
Second:
After I run the list of data frame through above condition, then I want to combine all the data frames in the list to get one data frame which I think can be achieved using following code.
Here, lst2 is output of first condition.
do.call(rowr::cbind.fill, c(lst2, list(fill = 0)))
Above code credit #akrun. Any suggestions will be helpful.
Sample Data
list(A = structure(list(`A-DIODE` = c(1.2, 0.4), `A-DIODE` = c(1.3,
0.6)), row.names = c(NA, -2L), class = "data.frame"), B = structure(list(
`B-DIODE` = c(1.4, 0.8), `B-ACC1` = c(1.5, 1), `B-ACC2` = c(1.6,
1.2), `B-ANA0` = c(1.7, 1.4), `B-ANA1` = c(1.8, 1.6), `B-BRICKID` = c(1.9,
1.8), `B-CC0` = c(2L, 2L), `B-CC1` = c(2.1, 2.2), `B-DIGDN` = c(2.2,
2.4), `B-DIGDP` = c(2.3, 2.6), `B-DN1` = c(2.4, 2.8), `B-DN2` = c(2.5,
3), `B-DP1` = c(2.6, 3.2), `B-DP2` = c(2.7, 3.4), `B-SCL` = c(2.8,
3.6), `B-SDA` = c(2.9, 3.8), `B-USB0DN` = 3:4, `B-USB0DP` = c(3.1,
4.2), `B-USB1DN` = c(3.2, 4.4), `B-USB1DP` = c(3.3, 4.6),
`B-ACC1` = c(3.4, 4.8), `B-ACC2` = c(3.5, 5), `B-ANA0` = c(3.6,
5.2), `B-ANA1` = c(3.7, 5.4), `B-BRICKID` = c(3.8, 5.6),
`B-CC0` = c(3.9, 5.8), `B-CC1` = c(4L, 6L), `B-DIGDN` = c(4.1,
6.2), `B-DIGDP` = c(4.2, 6.4), `B-DN1` = c(4.3, 6.6), `B-DN2` = c(4.4,
6.8), `B-DP1` = c(4.5, 7), `B-DP2` = c(4.6, 7.2), `B-SCL` = c(4.7,
7.4), `B-SDA` = c(4.8, 7.6), `B-USB0DN` = c(4.9, 7.8), `B-USB0DP` = c(5L,
8L), `B-USB1DN` = c(5.1, 8.2), `B-USB1DP` = c(5.2, 8.4),
`B-NA` = c(5.3, 8.6), `B-ACC2PWRLKG_0v4` = c(5.4, 8.8), `B-ACC2PWRLKG_0v4` = c(5.5,
9), `B-P_IN_Leak` = c(5.6, 9.2)), row.names = c(NA, -2L), class = "data.frame"))
Update 1
After #ØysteinS answer I realized that there should be a third condition too:
Third:
If there is only a single column in one of the data frame in the list, then only that column be added to the parent data frame.
This should do the job:
data <- list(A = structure(list(`A-DIODE` = c(1.2, 0.4), `A-DIODE` = c(1.3,
0.6)), row.names = c(NA, -2L), class = "data.frame"), B = structure(list(
`B-DIODE` = c(1.4, 0.8), `B-ACC1` = c(1.5, 1), `B-ACC2` = c(1.6,
1.2), `B-ANA0` = c(1.7, 1.4), `B-ANA1` = c(1.8, 1.6), `B-BRICKID` = c(1.9,
1.8), `B-CC0` = c(2L, 2L), `B-CC1` = c(2.1, 2.2), `B-DIGDN` = c(2.2,
2.4), `B-DIGDP` = c(2.3, 2.6), `B-DN1` = c(2.4, 2.8), `B-DN2` = c(2.5,
3), `B-DP1` = c(2.6, 3.2), `B-DP2` = c(2.7, 3.4), `B-SCL` = c(2.8,
3.6), `B-SDA` = c(2.9, 3.8), `B-USB0DN` = 3:4, `B-USB0DP` = c(3.1,
4.2), `B-USB1DN` = c(3.2, 4.4), `B-USB1DP` = c(3.3, 4.6),
`B-ACC1` = c(3.4, 4.8), `B-ACC2` = c(3.5, 5), `B-ANA0` = c(3.6,
5.2), `B-ANA1` = c(3.7, 5.4), `B-BRICKID` = c(3.8, 5.6),
`B-CC0` = c(3.9, 5.8), `B-CC1` = c(4L, 6L), `B-DIGDN` = c(4.1,
6.2), `B-DIGDP` = c(4.2, 6.4), `B-DN1` = c(4.3, 6.6), `B-DN2` = c(4.4,
6.8), `B-DP1` = c(4.5, 7), `B-DP2` = c(4.6, 7.2), `B-SCL` = c(4.7,
7.4), `B-SDA` = c(4.8, 7.6), `B-USB0DN` = c(4.9, 7.8), `B-USB0DP` = c(5L,
8L), `B-USB1DN` = c(5.1, 8.2), `B-USB1DP` = c(5.2, 8.4),
`B-NA` = c(5.3, 8.6), `B-ACC2PWRLKG_0v4` = c(5.4, 8.8), `B-ACC2PWRLKG_0v4` = c(5.5,
9), `B-P_IN_Leak` = c(5.6, 9.2)), row.names = c(NA, -2L), class = "data.frame"))
# Use lapply to apply the same function to each data frame in the list.
combined_frames <- lapply(data, function(df){
first_name <- names(df)[[1]]
result <- df[, 1, drop = FALSE]
# Keep adding if name is the same as the first
if (ncol(df) != 1) {
for(i in seq(2, length(names(df)), by = 1)){
if(names(df)[[i]] == names(df)[[1]]){
result <- rbind(result, df[, i, drop = FALSE])
} else {
# Otherwise, break out of loop
break
}
}
}
return(result)
})
# Yes, your suggested code seems to work as expected for the last task
do.call(rowr::cbind.fill, c(combined_frames, list(fill = 0)))
#> A.DIODE B.DIODE
#> 1 1.2 1.4
#> 2 0.4 0.8
#> 3 1.3 0.0
#> 4 0.6 0.0
One easy option would be to loop through the list, get the run-length-id of the column names, extract only those equal to 1, unlist, convert to data.frame with the first column name and then with cbind.fill bind the list of data.frame`s together
library(data.table)
lst1 <- lapply(data, function(x)
setNames(data.frame(unlist(x[rleid(names(x)) == 1])), names(x)[1]))
do.call(rowr::cbind.fill, c(lst1, list(fill = 0)))
# A.DIODE B.DIODE
#1 1.2 1.4
#2 0.4 0.8
#3 1.3 0.0
#4 0.6 0.0
The data I have is a list of data frames. I want to loop through each of the data frame to find:
If there are columns with duplicate column names. If yes, then I
want to merge them by using rbind() in a parent data frame
called output and remove all other columns of such data frames.
I also want to check if there is any data frame that doesn't have duplicate
columns. If yes, then remove all the columns except the first one. Then
cbind() with output such that if rows are more or less than what was
created by (1) then zero should be added.
I tried using lappy(), but my logic to get above two isn't working at one go. Any suggestion will help.
output <- lapply(data, function(x) {
})
Input Data List Containing Data Frames
list(A = structure(list(`A-DIODE` = c(1.2, 0.4), `A-DIODE` = c(1.3,
0.6)), row.names = c(NA, -2L), class = "data.frame"), B = structure(list(
`B-DIODE` = c(1.4, 0.8), `B-ACC1` = c(1.5, 1), `B-ACC2` = c(1.6,
1.2), `B-ANA0` = c(1.7, 1.4), `B-ANA1` = c(1.8, 1.6), `B-BRICKID` = c(1.9,
1.8), `B-CC0` = c(2L, 2L), `B-CC1` = c(2.1, 2.2), `B-DIGDN` = c(2.2,
2.4), `B-DIGDP` = c(2.3, 2.6), `B-DN1` = c(2.4, 2.8), `B-DN2` = c(2.5,
3), `B-DP1` = c(2.6, 3.2), `B-DP2` = c(2.7, 3.4), `B-SCL` = c(2.8,
3.6), `B-SDA` = c(2.9, 3.8), `B-USB0DN` = 3:4, `B-USB0DP` = c(3.1,
4.2), `B-USB1DN` = c(3.2, 4.4), `B-USB1DP` = c(3.3, 4.6),
`B-ACC1` = c(3.4, 4.8), `B-ACC2` = c(3.5, 5), `B-ANA0` = c(3.6,
5.2), `B-ANA1` = c(3.7, 5.4), `B-BRICKID` = c(3.8, 5.6),
`B-CC0` = c(3.9, 5.8), `B-CC1` = c(4L, 6L), `B-DIGDN` = c(4.1,
6.2), `B-DIGDP` = c(4.2, 6.4), `B-DN1` = c(4.3, 6.6), `B-DN2` = c(4.4,
6.8), `B-DP1` = c(4.5, 7), `B-DP2` = c(4.6, 7.2), `B-SCL` = c(4.7,
7.4), `B-SDA` = c(4.8, 7.6), `B-USB0DN` = c(4.9, 7.8), `B-USB0DP` = c(5L,
8L), `B-USB1DN` = c(5.1, 8.2), `B-USB1DP` = c(5.2, 8.4),
`B-NA` = c(5.3, 8.6), `B-ACC2PWRLKG_0v4` = c(5.4, 8.8), `B-ACC2PWRLKG_0v4` = c(5.5,
9), `B-P_IN_Leak` = c(5.6, 9.2)), row.names = c(NA, -2L), class = "data.frame"))
Desired Output
> A
A-DIODE
1.2
0.4
1.3
0.6
> B
B-DIODE
1.4
0.8
> Output
A-DIODE B-DIODE
1.2 1.4
0.4 0.8
1.3 0
0.6 0
Loop through the list, create a condition with if/else that checks the length of the unique column names and returns the unlisted single data.frame when there is only a single unique column or else return the first column. Finally, with cbind.fill (from rowr) bind the list of data.frame columns together, specifying the fill as 0
lst2 <- lapply(lst1, function(x) if(length(unique(names(x))) ==1)
setNames(data.frame(unlist(x)), names(x)[1]) else x[1])
do.call(rowr::cbind.fill, c(lst2, list(fill = 0)))
# A.DIODE B.DIODE
#1 1.2 1.4
#2 0.4 0.8
#3 1.3 0.0
#4 0.6 0.0
My data (final.df) looks like the following:
A B C Y 1
0 0 0 0 0.05
0 0 1 1 0.03
....
Based on the comment below, here is a ASCII text representation of the dataframe.
structure(list(A = c(502, 541, 542, 543, 544, 545, 4304, 4370,
4371, 4372, 4373, 4442), B = c(4.4, 4.2, 4.4, 4.6, 4.8, 5, 5.2,
4.6, 4.8, 5, 5.2, 5.2), C = c(2.6, 2.8, 2.8, 2.8, 2.8, 2.8, 12.6,
12.8, 12.8, 12.8, 12.8, 13), Y = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1), `1` = c(0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1), `NA` = c(0,
0, 0, 0, 0, 0, 0, 0, 0.000281600479875937, 0, 0, 0)), .Names = c("A",
"B", "C", "Y", "1", NA), row.names = c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L), class = "data.frame")
To summarize, there are four columns that identify each data point. I am interested in creating two boxplots according to their values in column with name 1. I want to compare the values for points labeled 0 in column 'Y' and labeled 1 in column 'Y'. Finally, I want to be able to hover over the points to retrieve the meta-data, meaning the 'A', 'B', 'C', and '1' value.
p <- ggplot(final.df, aes(x = factor(Y), y =
Y, fill = factor(Y)))
p <- p + geom_boxplot() + geom_point() + xlab("Y") + guides(fill =
guide_legend("Y")) + theme(legend.position="top")
final.p <- ggplotly(p)
The current plot shows me factor(Y) value and the corresponding value in 1. How can I include the meta-data in columns 'A', 'B', 'C'?
We can build a text using paste0 and HTML tag <br><\br> and instructe toolttip to use text.
p <- ggplot(df, aes(x = factor(Y), y = Y,
fill = factor(Y), text=paste('</br>A: ',A,'</br>B: ',B, '</br>1: ',1)))
ggplotly(p,tooltip = c("text"))
Use the tooltip feature of ggplotly. Read about it by typing in help(ggplotly). See Below:
library(tidyverse)
library(plotly)
set.seed(55)
df <- data.frame(
A = c(rep(0, 8), rep(1, 8)),
B = rep(c(rep(0, 4), rep(1, 4)), 2),
C = rep(c(rep(0, 2), rep(1, 2)), 4),
Y = rep(c(0, 1), 8),
X1 = runif(16)
)
p <- ggplot(df, aes(x = factor(Y), y = X1, fill = factor(Y), A = A, B = B, C = C))
p <- p + geom_boxplot() +
geom_point() +
xlab("Y") +
guides(fill = guide_legend("Y")) +
theme(legend.position = "top")
final.p <- ggplotly(p, tooltip = c("A", "B", "C"))
final.p
Following data:
df <- data.frame(cbind("Group_ID" = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4), "WBHO" = runif(20, 1.0, 7.0), "SI" = runif(20, 1.0, 7.0), "OORT" = c(2.34, 4.64, NA, 5.32, 3.23, 6.01, 5.43, 4.78, 3.98, 3.80, 4.45, NA, NA, 3.18, 4.87, NA, NA, 5.73, 3.52, 4.89), "LMX" = runif(20, 1.0, 7.0),"RL" = runif(20, 1.0, 7.0),"AL" = c(1.54, NA, 1.08, 6.77, NA, NA, 4.56, NA, 5.34, 4.32, 2.45, 3.86, 6.21, 2.89, 7.32, 6.43, NA, 4.56, 3.89, 6.16),"SL" = runif(20, 1.0, 7.0),"RV" = runif(20, 1.0, 7.0),"PT" = runif(20, 1.0, 7.0),"SD" = runif(20, 1.0, 7.0), "HT" = runif(20, 1.0, 7.0), "RTL" = c(2.45, NA, 6.04, 2.88, 3.49, 2.30, NA, 5.32, 2.39, NA, 3.62, 3.22, 4.87, 2.91, 5.41, NA, NA, 4.78, 6.20, NA), "INB" = runif(20, 1.0, 7.0), "ETB" = runif(20, 1.0, 7.0)))
Now, I want to create a raster, 2D-Grid or Heatmap which gives a nice overview of all the variables for each group ("Group_ID") using the mean (the x-axis showing the groups and the y-axis all the variables), giving a particular field green colour for value 1 to 3, yellow for 3 to 5 and green for 5 to 7. I have the following Code to create a df that combines the variables in one column and has the values and Group-belonging in the other two:
library(dplyr)
library(tidyr)
df %>%
gather(key = "variable", value = "value", - Group_ID) -> df_new
This does not work, however, as there are NAs included. However, I want to keep those rows with NAs. Is there a way with which I can do this in the same step?
Then, I would like to create the raster concerning which I have been given the following code which I am not fully sure how to apply in this case:
library(raster)
r <- raster(ncol=nrow(df_new), nrow=15, xmn=0, xmx=4, ymn=0, ymx=15)
values(r) <- as.vector(as.matrix(df$WBHO, df$SI, df$OORT, df$LMX, df$RL, df$AL, df$SL, df$RV, df$PT, df$SD, df$HT, df$RTL,
df$INB, df$ETB)
plot(r, axes=F, box=F, asp=NA)
axis(1, at=seq(), 0:9)
axis(2, at=seq(), c("", colnames(df_new)), las=1)
Thanks for any help!
We can use the dplyr and tidyr to calculate the mean. After that, we can use the cut function to categorize the values. We can then use the geom_tile from the ggplot2 to plot a heatmap. Specify x to be the variable, y is Group_ID (converted to be factor), and fill to be based on value2. No raster package is required.
It is not clear why do you want two groups (1-3, 5-7), both being green. My example assign red to the group 5-7, but you can make changes easily based on your needs.
library(dplyr)
library(tidyr)
df_new <- df %>%
gather(key = "variable", value = "value", - Group_ID) %>%
group_by(Group_ID, variable) %>%
summarise(value = mean(value, na.rm = TRUE)) %>%
mutate(value2 = cut(value, breaks = c(1, 3, 5, 7), labels = c("Low", "Medium", "High"))) %>%
ungroup()
library(ggplot2)
ggplot(df_new, aes(x = variable, y = factor(Group_ID), fill = value2)) +
geom_tile() +
scale_fill_manual(values = c("Low" = "Green", "Medium" = "Yellow", "High" = "Red")) +
labs(
y = "Group_ID"
)