There's a given dataframe
df <- data.frame("V1" = c(0,0,0,0,0,2,2,2,2,2,3,3,3),
"V2" = c(9,9,9,0,0,2,2,2,0,0,3,0,0))
and I would like to create a bar plot out of it, where each value has a specific colour. With the great help of one of users we managed to create code
p <- df %>%
mutate(index = 1) %>%
pivot_longer(cols = -index) %>%
mutate(color = case_when(value == 9 ~ 'white',
value == 0 ~ 'darkgreen',
value == 1 ~ 'blue',
value == 2 ~ 'red',
value == 3 ~ 'darkorange')) %>%
ggplot(aes(x = index, y = name, fill = color)) +
geom_col(width = 0.3) +
scale_fill_identity(guide = 'legend') +
theme_classic() +
scale_x_continuous(expand = c(0,0), breaks = pretty_breaks(2))
vec_colors <- df %>%
mutate(index = 1) %>%
pivot_longer(cols = -index) %>%
mutate(color = case_when(value == 9 ~ 'white',
value == 0 ~ 'darkgreen',
value == 1 ~ 'blue',
value == 2 ~ 'red',
value == 3 ~ 'darkorange')) %>%
arrange(name) %>%
pull(color)
q <- ggplot_build(p)
q$data[[1]] <- q$data[[1]] %>%
group_by(y) %>%
arrange(x, .by_group = TRUE)
q$data[[1]]$fill <- vec_colors
q <- ggplot_gtable(q)
plot(q)
that results in such a plot
Question: how do I create a legend that looks like this?
Or like this?
Well, first your approach to achieve your desired result is quite complicated. Instead you could simplify using a named color vector and switching to scale_fill_manual. Doing so will give you atomatically a legend similar to your desired result which I tweak a bit using the breaks argument. Also, instead of geom_col I would go for geom_tile. To this end use the row number as the index.
library(dplyr)
library(tidyr)
library(ggplot2)
df_long <- df %>%
mutate(index = row_number()) %>%
pivot_longer(cols = -index)
cols <- c( 'white', 'darkgreen', 'blue', 'red', 'darkorange')
names(cols) <- c(9, 0, 1, 2, 3)
ggplot(df_long, aes(x = index, y = name, fill = factor(value))) +
geom_tile(height = .3) +
scale_fill_manual(values = cols, limits = force, breaks = c(0, 3, 2), name = "State") +
theme_classic() +
scale_x_continuous(expand = c(0,0), breaks = scales::pretty_breaks(2))
Related
I have a dataframe that looks like this
df <- data.frame("V1" = c(0,0,0,0,0,2,2,2,2,2),
"V2" = c(9,9,9,0,0,2,2,2,0,0))
I would like to create a stacked barplot in this fashion:
So basically I map a certain value to a corresponding color (9 -> white (invisible), 0 -> green 2 -> red), and then create a bar of a length that is equal to the number of appearances of the value in a dataframe column. Is there any way to do something like this, i.e. using ggplot2?
You could change your dataframe to a long format using pivot_longer. Add a column with the colors you want to show the bars using case_when for example. You could use scale_fill_identity to fill the bars to the corresponding color. To get the same order fill in your stacked bars as your dataframe, you could use ggplot_build to modify the data of your plot by replace the column of "fill" with a vector of the same order as your dataframe. You can use the following code:
library(ggplot2)
library(tidyr)
library(dplyr)
library(scales)
# Create plot
p <- df %>%
mutate(index = 1) %>%
pivot_longer(cols = -index) %>%
mutate(color = case_when(value == 9 ~ 'white',
value == 0 ~ 'darkgreen',
TRUE ~ 'red')) %>%
ggplot(aes(x = index, y = name, fill = color)) +
geom_col(width = 0.3) +
scale_fill_identity() +
theme_classic() +
scale_x_continuous(expand = c(0,0), breaks = pretty_breaks(2)) +
labs(x = '', y = '')
# Extract vector of colors in right order
vec_colors <- df %>%
mutate(index = 1) %>%
pivot_longer(cols = -index) %>%
mutate(color = case_when(value == 9 ~ 'white',
value == 0 ~ 'darkgreen',
TRUE ~ 'red')) %>%
arrange(name) %>%
pull(color)
# right order colors
vec_colors
#> [1] "darkgreen" "darkgreen" "darkgreen" "darkgreen" "darkgreen" "red"
#> [7] "red" "red" "red" "red" "white" "white"
#> [13] "white" "darkgreen" "darkgreen" "red" "red" "red"
#> [19] "darkgreen" "darkgreen"
# use ggplot_build to modify data and replace fill column with vector
q <- ggplot_build(p)
q$data[[1]] <- q$data[[1]] %>%
group_by(y) %>%
arrange(x, .by_group = TRUE)
q$data[[1]]$fill <- vec_colors
q <- ggplot_gtable(q)
# plot
plot(q)
Created on 2022-09-17 with reprex v2.0.2
To add the legend, you should specify guide = 'legend' in scale_fill_identity. To determine which elements you want to show in the legend, you should specify the labels and breaks like this:
library(ggplot2)
library(tidyr)
library(dplyr)
library(scales)
# Create plot
p <- df %>%
mutate(index = 1) %>%
pivot_longer(cols = -index) %>%
mutate(color = case_when(value == 9 ~ 'white',
value == 0 ~ 'darkgreen',
TRUE ~ 'red')) %>%
ggplot(aes(x = index, y = name, fill = color)) +
geom_col(width = 0.3) +
scale_fill_identity(guide = 'legend',
labels = c('darkgreen', 'red'),
breaks = c('darkgreen', 'red')) +
theme_classic() +
scale_x_continuous(expand = c(0,0), breaks = pretty_breaks(2)) +
labs(x = '', y = '')
# Extract vector of colors in right order
vec_colors <- df %>%
mutate(index = 1) %>%
pivot_longer(cols = -index) %>%
mutate(color = case_when(value == 9 ~ 'white',
value == 0 ~ 'darkgreen',
TRUE ~ 'red')) %>%
arrange(name) %>%
pull(color)
# use ggplot_build to modify data and replace fill column with vector
q <- ggplot_build(p)
q$data[[1]] <- q$data[[1]] %>%
group_by(y) %>%
arrange(x, .by_group = TRUE)
q$data[[1]]$fill <- vec_colors
q <- ggplot_gtable(q)
# plot
plot(q)
Created on 2022-09-24 with reprex v2.0.2
Here is a way. The complicated part is to get different values from the data given the groups they belong to.
df <- data.frame("V1" = c(0,0,0,0,0,2,2,2,2,2),
"V2" = c(9,9,9,0,0,2,2,2,0,0))
suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(ggplot2)
})
df %>%
group_by(V1) %>%
mutate(V1 = cur_group_id()) %>%
group_by(V1, V2) %>%
mutate(V2 = cur_group_id()) %>%
pivot_longer(everything()) %>%
mutate(value = factor(value)) %>%
ggplot(aes(name, fill = value)) +
geom_bar(width = 0.75) +
coord_flip() +
scale_x_discrete(limits = rev) +
scale_fill_manual(
values = c(`1` = "darkolivegreen", `2` = "red",
`3` = "darkolivegreen", `4` = "white")
) +
theme_classic()
Created on 2022-09-17 with reprex v2.0.2
Using rle we may account for duplicated values in columns V2, then use a factor with specific level= order. Then just barplot the column tables.
cvec <- c(0, 'darkgreen', 'red', 'darkgreen')
par(lty=0) ## to omit borders
lapply(rev(df), \(x) factor(with(rle(x), rep.int(seq_along(values), lengths)), levels=c(3:1, 4))) |>
sapply(table) |> barplot(width=.5, space=1, col=cvec, horiz=TRUE, ylim=c(-1, 3))
Data:
df <- structure(list(V1 = c(0, 0, 0, 0, 0, 2, 2, 2, 2, 2), V2 = c(9,
9, 9, 0, 0, 2, 2, 2, 0, 0)), class = "data.frame", row.names = c(NA,
-10L))
In below plot , 1) How to align category 'A' to axis x (axis y start from 0)?
2) How to change sub_category fill color ? want change to 'pink' (whis the color samilar to category color) . Anyone can help?
library(tidyverse)
plot_data <- data.frame(category=c('A','A','B','C'),
sub_category=c('a1','a2','b1','c1'),
value=c(6,12,3,2))
plot_data %>% mutate(sub_category=if_else(category=='A',
sub_category,category)) %>%
pivot_longer(names_to = 'title',values_to ='cat_region',-value) %>%
filter(!(title=='sub_category'&cat_region %in% c('B','C') )) %>%
group_by(title,cat_region) %>%
summarise(value_sum=sum(value)) %>%
ggplot(aes(x=title,y=value_sum,fill=cat_region,
group=interaction(title,cat_region)))+geom_col()
I think you're looking for scale_fill_manual to select the fill colors, and position_stack(reverse = TRUE) to reverse the stacking order:
plot_data %>%
mutate(sub_category = if_else(category=='A', sub_category, category)) %>%
pivot_longer(names_to = 'title', values_to = 'cat_region', -value) %>%
filter(!(title == 'sub_category' & cat_region %in% c('B','C'))) %>%
group_by(title, cat_region) %>%
summarise(value_sum = sum(value)) %>%
ggplot(aes(x = title, y = value_sum, fill = cat_region,
group = interaction(title,cat_region))) +
geom_col(position = position_stack(reverse = TRUE)) +
scale_fill_manual(values = c(A = "#f8766d", a1 = "#ffa8a3",
a2 = "#ffe2e0", B = "#00b0f6",
C = "#74d67f")) +
theme_bw()
In my plot below, d_math and d_hyp are each {0,1} variables. Given this fact, in my plot below, I was wondering if we can combine the two plots into one, just like in the desired plot further below?
ps. I'm open to any R packages.
multivariate <- read.csv('https://raw.githubusercontent.com/hkil/m/master/bv.csv')
library(nlme)
library(effects) # for plot
m2 <- lme(var ~ 0 + d_math + d_hyp + d_math:I(grade-2) + d_hyp:I(grade-2),
random = ~ 0 + d_math + d_hyp + d_math:I(grade-2) + d_hyp:I(grade-2) | id, data = multivariate,
na.action = na.omit, weights = varIdent(c(hyp=.3), form = ~1|grp),
control = lmeControl(maxIter = 200, msMaxIter = 200, niterEM = 50,
msMaxEval = 400))
plot(allEffects(m2), multiline = TRUE, x.var="grade")
Desired:
We could use tidyverse to create a single plot. Loop over the list of allEffects output with imap, convert to tibble, select the columns needed, row bind the list elements to single dataset (_dfr), unite two columns to a single, and use ggplot for plotting
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)
imap_dfr(allEffects(m2), ~ as_tibble(.x) %>%
mutate(dname = grep("d_", names(.), value = TRUE)) %>%
select(dname, dvalue = starts_with('d_'), grade, fit) %>%
mutate(grp = .y)) %>%
unite(dname, dname, dvalue, sep=" = ") %>%
ggplot(aes(x = grade, y = fit, color = dname)) +
geom_line() +
theme_bw() #+
# facet_wrap(~ grp)
-output
If we want the labels at the end of line, use directlabels
library(directlabels)
imap_dfr(allEffects(m2), ~ as_tibble(.x) %>%
mutate(dname = grep("d_", names(.), value = TRUE)) %>%
select(dname, dvalue = starts_with('d_'), grade, fit) %>%
mutate(grp = .y)) %>%
unite(dname, dname, dvalue, sep=" = ") %>%
ggplot(aes(x = grade, y = fit, group = dname, color = dname)) +
geom_line() +
theme_bw() +
scale_colour_discrete(guide = 'none') +
geom_dl(aes(label = dname), method="last.qp", cex = 0.8)
Also, this can be done for each 'dvalue' as a facet
imap_dfr(allEffects(m2), ~ as_tibble(.x) %>%
mutate(dname = grep("d_", names(.), value = TRUE)) %>%
select(dname, dvalue = starts_with('d_'), grade, fit) %>%
mutate(grp = .y)) %>%
unite(dname, dname, dvalue, sep=" = ", remove = FALSE) %>%
ggplot(aes(x = grade, y = fit, group = dname, color = dname)) +
geom_line() +
theme_bw() +
scale_colour_discrete(guide = 'none') +
geom_dl(aes(label = dname), method="last.qp", cex = 0.8) +
facet_wrap(~ dvalue)
Or if we need only a specific level, then filter
imap_dfr(allEffects(m2), ~ as_tibble(.x) %>%
mutate(dname = grep("d_", names(.), value = TRUE)) %>%
select(dname, dvalue = starts_with('d_'), grade, fit) %>%
mutate(grp = .y)) %>%
unite(dname, dname, dvalue, sep=" = ") %>%
filter(dname %in% c("d_hyp = 1", "d_math = 1")) %>%
ggplot(., aes(x = grade, y = fit, colour = dname, group = dname)) +
geom_line() +
scale_colour_discrete(guide = 'none') +
geom_dl(aes(label = dname), method="last.qp", cex = 0.6) +
theme_bw()
You could do it like this with lattice and a bit more brute force than #akrun's approach:
e <- allEffects(m2)
f1 <- matrix(e[[1]]$fit, ncol=5) # math
f2 <- matrix(e[[2]]$fit, ncol=5) # hyp
dat = data.frame(
fit = c(f1[5,], f2[5,]),
grade = rep(c(2,4,5,6,8), 2),
variable = factor(rep(1:2, each=5),
labels=c("Math=1", "Hyp=1"))
)
xyplot(fit ~ grade, data=dat, group=variable, type="l",
auto.key=list(space="top", lines=TRUE,points=FALSE))
This question already has answers here:
Get values and positions to label a ggplot histogram
(2 answers)
Closed 2 years ago.
Is it possible to add number of points (as text in top right) inside each plot in R.
Thanks
library(tidyverse)
a <- rnorm(100, mean = 0, sd = 1)
b <- rnorm(100, mean = 0, sd = 1)
p <- tibble(a, b) %>%
pivot_longer(cols = everything(), names_to = "category", values_to = "values") %>%
ggplot() +
geom_histogram(aes(x = values),
bins = 20,
color = "black",
fill = "blue",
alpha = 0.7) +
theme_bw() +
facet_grid(category ~ .)
ggsave("sample.png", p)
Try this:
library(tidyverse)
#Data
a <- rnorm(100, mean = 0, sd = 1)
b <- rnorm(100, mean = 0, sd = 1)
#Plot
tibble(a, b) %>%
pivot_longer(cols = everything(), names_to = "category", values_to = "values") %>%
ggplot() +
geom_histogram(aes(x = values),
bins = 20,
color = "black",
fill = "blue",
alpha = 0.7) +
theme_bw() +
geom_text(data=tibble(a, b) %>%
pivot_longer(cols = everything(),
names_to = "category",
values_to = "values") %>%
group_by(category) %>% summarise(N=n()),
aes(x=0,y=20,label=N))+
facet_grid(category ~ .)
Output:
I have a technical question for you please.
read_sf("map.shp") %>% mutate(Groups = as.factor(Groups)) %>%
mutate(Groups = factor(Groups, levels = c(paste0(1:23)))) %>%
left_join(data, by = "cities_code") %>%
# Show map with cities border
ggplot() +
geom_sf(aes(fill = Groups), size = 0.4) +
# Color the different Groups, here 23 colors
stat_sf_coordinates(aes(size = observation)) +
# Put point with the size of my number of observations
scale_radius(range = c(1, 6)) +
geom_sf(fill = "transparent", color = "gray20", size = 1, data = . %>% group_by(Groups) %>% summarise()) +
# Show the border of my Groups
theme_bw()
This map represents exactly what I want. It represent cities of one state subdivided by district ("Groups"). But between my map.shp and my data I have a difference of 50 cities, because there is no observation in these cities (so no point of "stat_sf_coordinates(aes(size = observation))").
I can find the difference with anti_join(data, by = "cities_code").
I would like to have the same map but with the missing cities colored in red please please.
Thank you
It was simple :
read_sf("map.shp") %>% mutate(Groups = as.factor(Groups)) %>%
mutate(Groups = factor(Groups, levels = c(paste0(1:23)))) %>%
left_join(data, by = "cities_code") %>%
ggplot() +
geom_sf(aes(fill = Groups), size = 0.4) +
stat_sf_coordinates(aes(size = observation)) +
scale_radius(range = c(1, 6)) +
##
geom_sf(fill = "red", color = "gray40", size = 0.4, data = . %>% anti_join(data, by = "cities_code")) +
##
geom_sf(fill = "transparent", color = "gray20", size = 1, data = . %>% group_by(Groups) %>% summarise()) +
theme_bw()