How can I change the order of the segments in the following stacked barplot
from -- (left) to ++ (right) and NA on the right?
library(dplyr)
library(ggplot2)
library(tidyr)
size <- 100
df <- data.frame(item1 = sample(c(1:5, 1:5, 99), size = size, replace = TRUE),
item2 = sample(c(1:5, 1:4, 99), size = size, replace = TRUE),
item3 = sample(c(1:5, 1:3, 99), size = size, replace = TRUE),
item4 = sample(c(1:5, 1:5, 99), size = size, replace = TRUE),
item5 = sample(c(1:5, 1:4, 99), size = size, replace = TRUE))
# Stacked barplot
df %>% pivot_longer(cols = starts_with("item")) %>%
group_by(name) %>%
count(value) %>%
mutate(perc = 100 / sum(n) * n) %>%
ungroup() %>%
mutate(value = factor(value, levels = c(1:5, 99), labels = c("--", "-", "0", "+", "++", "NA"))) %>%
ggplot(aes(x = name, y = perc, fill = value)) + geom_col() +
coord_flip()
Created on 2022-12-12 with reprex v2.0.2
You can reverse the order of the stack by using position_stack(reverse = TRUE):
set.seed(123)
library(tidyr)
library(ggplot2)
library(dplyr)
df %>% pivot_longer(cols = starts_with("item")) %>%
group_by(name) %>%
count(value) %>%
mutate(perc = 100 / sum(n) * n) %>%
ungroup() %>%
mutate(value = factor(value, levels = c(1:5, 99), labels = c("--", "-", "0", "+", "++", "NA"))) %>%
ggplot(aes(x = name, y = perc, fill = value)) +
geom_col(position = position_stack(reverse = TRUE)) +
coord_flip()
Related
In below plot , 1) How to align category 'A' to axis x (axis y start from 0)?
2) How to change sub_category fill color ? want change to 'pink' (whis the color samilar to category color) . Anyone can help?
library(tidyverse)
plot_data <- data.frame(category=c('A','A','B','C'),
sub_category=c('a1','a2','b1','c1'),
value=c(6,12,3,2))
plot_data %>% mutate(sub_category=if_else(category=='A',
sub_category,category)) %>%
pivot_longer(names_to = 'title',values_to ='cat_region',-value) %>%
filter(!(title=='sub_category'&cat_region %in% c('B','C') )) %>%
group_by(title,cat_region) %>%
summarise(value_sum=sum(value)) %>%
ggplot(aes(x=title,y=value_sum,fill=cat_region,
group=interaction(title,cat_region)))+geom_col()
I think you're looking for scale_fill_manual to select the fill colors, and position_stack(reverse = TRUE) to reverse the stacking order:
plot_data %>%
mutate(sub_category = if_else(category=='A', sub_category, category)) %>%
pivot_longer(names_to = 'title', values_to = 'cat_region', -value) %>%
filter(!(title == 'sub_category' & cat_region %in% c('B','C'))) %>%
group_by(title, cat_region) %>%
summarise(value_sum = sum(value)) %>%
ggplot(aes(x = title, y = value_sum, fill = cat_region,
group = interaction(title,cat_region))) +
geom_col(position = position_stack(reverse = TRUE)) +
scale_fill_manual(values = c(A = "#f8766d", a1 = "#ffa8a3",
a2 = "#ffe2e0", B = "#00b0f6",
C = "#74d67f")) +
theme_bw()
I want to separate the hashtags in one column into different columns. After I use "separate" function, I have a lot of NA's when I do a ggplot. How can I remove the NA's in my ggplot? my code is like this:
df %>%
separate(terms, into = paste0("t", 1:5), sep = ";") %>%
pivot_longer(-year) %>%
group_by(year, value) %>%
count(value) %>%
ggplot(aes(x = factor(year), y = n, fill = value, label = NA)) +
geom_col(position = position_dodge()) +
geom_text(position = position_dodge(1))
my data is like this:
terms year
1 #A;#B;#C;#D;E 2017
2 #B;#C;#D 2016
3 #C;#D;#E#G 2021
4 #D;#E;#F 2020
...
Try tidyr::separate_rows instead:
library(tidyverse)
df %>%
separate_rows(terms, sep = ";") %>%
group_by(year, terms) %>%
count(terms) %>%
ggplot(aes(x = factor(year), y = n, fill = terms, label = NA)) +
geom_col(position = position_dodge()) +
geom_text(aes(label = terms), position = position_dodge(1))
You might also want to include tidyr::complete:
df %>%
separate_rows(terms, sep = ";") %>%
group_by(year, terms) %>%
count(terms) %>%
ungroup() %>%
complete(year, terms, fill = list(n = 0)) %>%
ggplot(aes(x = factor(year), y = n, fill = terms, label = NA)) +
geom_col(position = position_dodge(preserve = "single")) +
scale_fill_discrete(drop = FALSE) +
scale_x_discrete(drop = FALSE) +
geom_text(aes(label = n), size = 3, position = position_dodge(width = 1))
Or with only the top 3 terms labeled:
df %>%
separate_rows(terms, sep = ";") %>%
group_by(year, terms) %>%
count(terms) %>%
ungroup() %>%
complete(year, terms, fill = list(n = 0)) -> new_df
ggplot(new_df, aes(x = factor(year), y = n, fill = terms, label = NA)) +
geom_col(position = position_dodge(preserve = "single")) +
scale_fill_discrete(drop = FALSE) +
scale_x_discrete(drop = FALSE) +
geom_text(data = new_df %>%
group_by(year) %>%
mutate(n = case_when(rank(-n,ties.method = "random") <= 3 ~ n,
TRUE ~ NA_real_)),
aes(label = terms), size = 3, position = position_dodge(width = 1))
Sample Data:
df <- structure(list(terms = c("#A;#B;#C;#D;#E", "#C;#D;#E", "#B;#C;#D",
"#A", "#C;#D;#E;#G", "#D;#E;#F", "#D"), year = c(2017L, 2017L,
2016L, 2016L, 2021L, 2020L, 2020L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7"))
I have a point plot with two different points on each category and I want to create a line segment joining the two points on each row.
items %>%
group_by(category) %>%
summarise(med_buy_price = mean(buy_value, na.rm = TRUE),
med_sell_price = mean(sell_value, na.rm = TRUE)) %>%
pivot_longer(cols = c("med_buy_price", "med_sell_price"),
names_to = "measure",
values_to = "value") %>%
ggplot(aes(x = value, y = category)) +
geom_point(aes(color = measure), size = 3)
For creating a line segment, you need to have start and endpoints for the segment. Thus, you can stay with the wide format, so no pivot_longer needed.
Then create individual geom_point for sell and buy value and a geom_segment combining both points.
This code will work:
library(ggplot2)
library(dplyr)
library(tibble)
library(tidyr)
items <- tribble(
~category, ~buy_value, ~sell_value,
"Wallpaper", 2000, 5200,
"Usables", 500, 12500,
"Umbrellas", 200, 1800
)
items %>%
group_by(category) %>%
summarise(med_buy_price = mean(buy_value, na.rm = TRUE),
med_sell_price = mean(sell_value, na.rm = TRUE)) %>%
ggplot() +
geom_point(aes(x = med_buy_price, y = category), size = 3, color = "red")+
geom_point(aes(x = med_sell_price, y = category), size = 3, color = "green")+
geom_segment(aes(x = med_buy_price, xend = med_sell_price, y = category, yend = category))
If you do not insist on using geom_point you could try geom_errorbar which simplifies thing a little bit
items %>%
group_by(category) %>%
summarise(med_buy_price = mean(buy_value, na.rm = TRUE),
med_sell_price = mean(sell_value, na.rm = TRUE)) %>%
ggplot(aes(xmin=med_buy_price,xmax=med_sell_price, y = category)) +
geom_errorbar(width=0.1)
For a single dataframe that has multiple variables, I used the following code to compute statistics and for plotting.
library(tidyverse)
DF1 <- data.frame(seq(as.Date("2001-01-01"), to= as.Date("2003-12-31"), by="day"),
MaxTemp = runif(1095,-5,20),
MinTemp = runif(1095,-8,15),
MeanTemp = runif(1095,-10,10),
DF = rep("DF1",1095))
colnames(DF1) <- c("Date", "MaxTemp","MinTemp","MeanTemp","DF")
DF_1 <- DF1 %>% mutate(JDay = yday(Date)) %>%
group_by(JDay) %>%
summarise(AveMaxTemp = mean(MaxTemp, na.rm = T),
AveMinTemp = mean(MinTemp, na.rm = T),
AveMeanTemp = mean(MeanTemp, na.rm = T))
DF_1 %>% gather(key = "Variable", value = "Value", -c(JDay)) %>%
ggplot(aes(x = JDay, y = Value, col = Variable))+
geom_line(aes(y = Value))
In case, I have multiple data.frame (see the example below). Is there a way, to combine the two data.frame (i.e., DF1 and DF2), compute statistics, and then plot it using [Tag:facet_wrap]?
DF2 <- data.frame(Date = DF1$Date,
MaxTemp = runif(1095, -2,15),
MinTemp = runif(1095,-6,15),
MeanTemp = runif(1095,-5,10),
DF = rep("DF2",1095))
I am looking for a plot like below
try to do so
DF3 <- bind_rows(DF1, DF2)
DF_3 <- DF3 %>%
mutate(JDay = yday(Date)) %>%
group_by(JDay, DF) %>%
summarise(AveMaxTemp = mean(MaxTemp, na.rm = T),
AveMinTemp = mean(MinTemp, na.rm = T),
AveMeanTemp = mean(MeanTemp, na.rm = T))
DF_3 %>% gather(key = "Variable", value = "Value", -c(JDay, DF)) %>%
ggplot(aes(x = JDay, y = Value, col = Variable)) +
geom_line(aes(y = Value)) +
facet_wrap(~ DF)
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
# Generate data
df_durations = data.frame(x = as.character(1:100)) %>%
mutate(linesize = runif(n = n()),
linesize = linesize / sum(linesize),
linesize = linesize / min(linesize)) %>%
mutate(start_x1 = as.Date(sample(as.Date('2018-01-01'):as.Date('2018-04-01'), size = n(), replace = T), origin='1970-01-01'),
end_x1 = start_x1 + 20 + sample(-5:5, size = n(), replace = T),
start_x2 = end_x1 + sample(-5:10, size = n(), replace = T),
end_x2 = start_x2 + 15 + sample(-5:5, size = n(), replace = T),
start_x3 = end_x2 + sample(-5:10, size = n(), replace = T),
end_x3 = start_x1 + 30 + sample(-10:10, size = n(), replace = T)) %>%
arrange(start_x1) %>%
mutate(x = factor(x, levels = x, ordered = T),
fontsize = round(runif(n(), 5, 12)),
colour = sample(c('black', 'red', 'blue'), n(), replace = T),
location = cumsum(fontsize))
# Pivot data for plotting
df_durations_long = df_durations %>%
gather(key, value, contains('start'), contains('end')) %>%
separate(key, c('id', 'activity'), '_') %>%
spread(id, value)
# Plot
df_durations_long %>%
ggplot(aes(x=location)) +
geom_linerange(aes(ymin=start, ymax=end, colour=activity, size=linesize), alpha=.5) +
scale_y_date(date_labels = '%b-%Y', date_breaks = '2 month') +
scale_x_continuous(breaks = df_durations$location, labels = df_durations$x) +
theme(axis.text.y = element_text(colour = df_durations$colour,
size = df_durations$fontsize)) +
coord_flip()
The above codes generates the following chart.
As you see we have both overlapping and non-overlappping horizontal bars (intended).
However there is also overlap between vertically stacked bars (not intended).
What I am looking for is a way to introduce an equal amount of spacing between each of the bars while keeping the relative widths of all the bars intact.
I tried working with scale_size_identity, however I haven't managed to get the formatting right.