I am trying to create stack bar with counts and percent in same graph. I took help from Showing data values on stacked bar chart in ggplot2 and add group total and plotted my as
By using code
### to plot stacked bar graph with total on the top and
### distribution of the frequency;
library(ggplot2);
library(plyr);
library(dplyr);
Year <- c(rep(c("2006-07", "2007-08", "2008-09", "2009-10"), each = 4))
Category <- c(rep(c("A", "B", "C", "D"), times = 4))
Frequency <- c(168, 259, 226, 340, 216, 431, 319, 368, 423, 645, 234, 685, 166, 467, 274, 251)
Data <- data.frame(Year, Category, Frequency);
sum_count <-
Data %>%
group_by(Year) %>%
summarise(max_pos = sum(Frequency));
sum_count;
Data <- ddply(Data, .(Year), transform, pos =
cumsum(Frequency) - (0.5 * Frequency));
Data;
# plot bars and add text
p <- ggplot(Data, aes(x = Year, y = Frequency)) +
geom_bar(aes(fill = Category), stat="identity") +
geom_text(aes(label=Frequency,y = pos), size = 3) +
geom_text(data = sum_count,
aes(y = max_pos, label = max_pos), size = 4,
vjust = -0.5);
print(p);
/Now I want to overlay percent of each group with counts This is my approach.merge data such a way that we can calculate
% for each of the group you are dealing with/
MergeData <- merge(Data,sum_count,by="Year");
MergeData <- transform(MergeData,
per_cent=round((pos/max_pos)*100,0));
MergeData<- ddply(MergeData, .(Year), transform, per_pos =
cumsum(per_cent) - (0.5 * per_cent));
# calculate percent and attach % sign;
MergeData <- transform(MergeData,
per_cent=paste(round((pos/max_pos)*100,0),"%"));
# Data only with percents
Percent_Data <- subset(MergeData,select
= c("Year","Category","per_cent","per_pos"));
/I am wondering if it is possible to overlay percent data to the image I created using previous code so that number and percent can be presented together./
I think you are almost there.
Use MergeData as the source for the data frame and add one more call to geom_text
p <- ggplot(MergeData, aes(x = Year, y = Frequency, group = Category)) +
geom_bar(aes(fill = Category), stat="identity") +
geom_text(aes(label=Frequency,y = pos), size = 3, vjust = 1) +
geom_text(
aes(y = max_pos, label = max_pos), size = 4,
vjust = -.5) +
geom_text(aes(x = Year, y = pos, label = per_cent), vjust = -1, size = 4)
print(p);
You may need to fiddle with hjust and vjust to get the text just how you like it.
Thank you for your response. I think it is very good.
p <- ggplot(MergeData, aes(x = Year, y = Frequency, group = Category)) +
geom_bar(aes(fill = Category), stat="identity") +
geom_text(aes(label=Frequency,y = pos), vjust = 1,size = 2,hjust = 0.5) +
geom_text(aes(y = max_pos, label = max_pos), size = 3,vjust = -.1) +
geom_text(aes(x = Year, y = pos, label = per_cent), vjust = -.4, size = 2)+
xlab("Year") + ylab(" Number of People") + # Set axis labels
ggtitle("Distribution by Category over Year") + # Set title
theme(panel.background =
element_rect(fill = 'white', colour = 'white'),
legend.position = "bottom" ,
legend.title = element_text(color="black",
size=7),
legend.key.width = unit(1,"inch") );
print(p);
now my % on top of number numbers,in other words, it is "17%" and "168" but I want "168" and "17%". I tried switching position of geom_text() but it did not work. I am wondering if you know how to fix it.
Yes it helped. I fixed number to make center of each stack. therefore i needed to make change in percent below code fixed my issue. Thank you so much for your help.
p <- ggplot(MergeData, aes(x = Year, y = Frequency, group = Category)) +
geom_bar(aes(fill = Category), stat="identity") +
geom_text(aes(label=Frequency,y = pos), vjust = 1,
size = 2,hjust = 0.5) +
geom_text(aes(y = max_pos, label = max_pos), size = 3,vjust = -.1) +
geom_text(aes(x = Year, y = pos, label = per_cent), vjust = 1.95,
size = 2,hjust=0.3)+
xlab("Year") + ylab(" Number of People") + # Set axis labels
ggtitle("Distribution by Category over Year") + # Set title;
theme(panel.background =
element_rect(fill = 'white', colour = 'white'),
legend.position = "bottom" ,
legend.title = element_text(color="black",
size=7) );
print(p);
Related
library(ggplot2)
library(ggthemes)
data <- read.csv('/Users/zbhay/Documents/r-data.csv', header = 1)
zb <- ggplot(data) +
geom_segment( aes(x=x, xend=x, y=value1, yend=value2), color="black")+
geom_point( aes(x=x, y=value1), color=rgb(0.2,0.7,0.1,1), size=4 )+
geom_point( aes(x=x, y=value2), color=rgb(0.7,0.2,0.1,1), size=4 )+
coord_flip() +
theme_solarized() +
scale_y_continuous(breaks = seq(0, 10000, by = 500)
)
zb + labs(title = "Title",
subtitle = "subtitle") +
xlab("Business Functions") +
ylab("# of hours")
legend("left", c("Starting", "Ending"),
box.col = "darkgreen"
)
Hello, here is the code. The CSV file is structured as follows; column A = names, column b = starting number, column c = final number.
I am trying to set up a legend that calls out the final number vs starting number. I have tried and tried but cannot seem to be able to crack it. If anyone knows a fix, I would appreciate it if you could let me know.
As a general rule when using ggplot2 you have to map on aesthetics if you want to get a legend, i.e. instead of setting the colors for your points as arguments map a value on the color aes, e.g. in my code below I map the constant value or category start on the color aes inside aes() for the first geom_point. Afterwards you could use scale_color_manual to assign your desired colors and labels to these "categories" or "values". Finally, the color of the legend box could be set via the theme option legend.background. However, the legend keys themselves have a background color too, which I set to NA via legend.key.
Using some fake random example data:
library(ggplot2)
library(ggthemes)
set.seed(123)
data <- data.frame(x = letters[1:5], value1 = runif(5, 0, 10000), value2 = runif(5, 0, 10000))
ggplot(data) +
geom_segment(aes(x = x, xend = x, y = value1, yend = value2), color = "black") +
geom_point(aes(x = x, y = value1, color = "start"), size = 4) +
geom_point(aes(x = x, y = value2, color = "end"), size = 4) +
coord_flip() +
theme_solarized() +
scale_y_continuous(breaks = seq(0, 10000, by = 500)) +
scale_color_manual(values = c(start = rgb(0.2, 0.7, 0.1, 1), end = rgb(0.7, 0.2, 0.1, 1)), labels = c(start = "Starting", end = "Ending")) +
labs(title = "Title", subtitle = "subtitle", x = "Business Functions", y = "# of hours", color = NULL) +
theme(
legend.key = element_rect(fill = NA),
legend.background = element_rect(fill = "darkgreen")
)
I am trying to create an animation or GIF that shows the evolution of an environmental condition over time. Basically, I have a dataset (example below) with year, value of the environmental condition, unit, and coordinates.
year
condition
unit
Lat
Long
1945
-0.120148
TSS
41.36531
41.67889
1948
0.274646
TSS
30.45368
-87.99042
1948
0.074794
TSS
30.45368
-87.99042
1975
-0.102050
TSS
38.10541
-122.06782
1979
-0.169886
NTU
29.77048
-84.91630
Complete dataset: https://drive.google.com/file/d/1XQ95KP_x-kbq_wdmpfpCiOonF-RoFsU1/view?usp=sharing
I am using ggplot2 to create the plots comprising year gaps. Here is the code I am using to plot the variation from 1945 to 1980:
`ggplot() +
geom_map(data = world, map = world,aes(long, lat, map_id = region),color = "seashell2", fill = "seashell", size = 0.3, alpha=0.9)+
geom_point(data = mapa_variacao_anual_45_80,aes(Long, Lat, color = med_turb),size=2, shape=16, position = position_jitter(width = 8)) +
labs(title = "1945 to 1980")+
theme(plot.title = element_text(hjust = 0.5))+
scale_colour_gradient( low = "darkgreen", high = "red")+
xlab("Longitude") + ylab("Latitude")+
theme(legend.title= element_blank())+
theme(panel.background = element_rect(fill = 'aliceblue', colour = 'gray'))`
My plan is to have several plots with determined year ranges and in the end combine all of them in sequence to show temporal variation.
Is there an easy way to combine the plots? I have been looking for solutions online but they seem not to suit my goal or are just too complicated.
Thanks in advance for any help.
You could get gganimate to handle the animation for you:
library(ggplot2)
library(gganimate)
world <- map_data("world")
mapa_variacao_anual_45_80$frames <- as.numeric(
factor(mapa_variacao_anual_45_80$year))
p <- ggplot() +
geom_map(data = world, map = world,
aes(long, lat, map_id = region),
color = "seashell2", fill = "seashell", size = 0.3, alpha = 0.9)+
geom_point(data = mapa_variacao_anual_45_80,
aes(Long, Lat, color = med_turb),
size = 2, shape = 16, position = position_jitter(width = 8)) +
labs(title = "1945 to 1980")+
theme(plot.title = element_text(hjust = 0.5))+
scale_colour_gradient( low = "darkgreen", high = "red") +
geom_text(data = mapa_variacao_anual_45_80,
aes(x = -180, y = 65, label = year), hjust = 0, size = 8,
check_overlap = TRUE) +
xlab("Longitude") +
ylab("Latitude")+
theme(legend.title= element_blank())+
theme(panel.background = element_rect(fill = 'aliceblue', colour = 'gray')) +
transition_events(mapa_variacao_anual_45_80$frames,
enter_length = 1, exit_length = 1)
anim_save("map.gif", p, device = "ragg_png", duration = 20, fps = 30,
width = 900, height = 450)
You can create a series of png files and assemble them into an animation with the gifski package:
library(ggplot2)
library(gifski)
for(i in 1:30){
gg <- ggplot(......)
ggsave(sprintf("myplot%03d.png", i), gg)
}
png_files <- Sys.glob("myplot*.png")
gifski(
png_files,
"myanimation.gif",
width = 400, height = 400,
delay = 1/5 # 5 images per second
)
file.remove(png_files)
I am trying to create a stacked bar graph in ggplot2 with percent shares on top of them. After looking at many other posts, I couldn't figure out a solution, so here are some sample data:
# load some libraries
library(ggplot2)
# make basic data frame
df <- data.frame(type = c("A", "A", "B", "B"),
year = c(2001,2002, 2001, 2002),
count_A = c(3, 2, NA, NA),
count_B = c(NA, NA, 8, 1),
sum_year_AB = c(11,3,11,3),
total_count_with_irrelevant_types = c(13,14,19,23))
# create single percentage variable for top of bar
df$percent_AB_year = (df$sum_year_AB/df$total_count_with_irrelevant_types)*100
df$percent_AB_year = round(df$percent_AB, 1)
df$percent_final = paste0(df$percent_AB_year, "%")
df$percent_AB_year = ifelse(df$type=="B", NA, df$percent_AB_year)
df$percent_final = ifelse(df$type=="B", NA, df$percent_final)
Here is my code for the bar graph:
ggplot(df, aes(fill=type, x=year, y=sum_year_AB)) +
geom_bar(position="stack", stat="identity", width = .9) +
labs(fill="Type",
x = "Year",
y = "Count",
title = "Count by Year") +
scale_x_continuous(breaks = seq(1999,2003,1)) +
geom_text(aes(label = percent_final), size = 3) +
scale_fill_grey(start = .4, end = .6)
And here is the output of the graph:
How do I put the percents on top?
Use position = "stack" inside geom_text too:
ggplot(df, aes(fill=type, x=year, y=sum_year_AB)) +
geom_bar(position="stack", stat="identity", width = .9) +
labs(fill="Type",
x = "Year",
y = "Count",
title = "Count by Year") +
scale_x_continuous(breaks = seq(1999,2003,1)) +
geom_text(aes(label = percent_final), size = 3, position = "stack", vjust = -0.2) +
scale_fill_grey(start = .4, end = .6)
To avoid confusion, I would include it in the bar plot rather than placing it at the top (because the other percent_final values in your df are NA).
Sample code:
library(ggplot2)
library(ggthemes)
ggplot(df, aes(fill=type, x=year, y=sum_year_AB)) +
geom_bar(position="stack", stat="identity", width = .9) +
labs(fill="Type",
x = "Year",
y = "Count",
title = "Count by Year") +
scale_x_continuous(breaks = seq(1999,2003,1)) +
geom_text(aes(label = percent_final),position=position_stack(vjust=0.5), colour="blue", size = 9) +
theme_economist()
plot:
I have a stacked bar plot like this:
library(ggplot2)
Year <- c(rep(c("2006-07", "2007-08", "2008-09", "2009-10"), each = 4))
Category <- c(rep(c("A", "B", "C", "D"), times = 4))
Frequency <- c(1, 1, 8, 32, 216, 431, 319, 368, 423, 645, 234, 685, 166, 467, 274, 251)
Data <- data.frame(Year, Category, Frequency)
ggplot(Data, aes(x = Year, y = Frequency, fill = Category, label = Frequency)) +
geom_bar(stat = "identity") +
geom_text(size = 3, position = position_stack(vjust = 0.5))
Is there any automated option to make the bars show there results with a clear way even if their frequency is 1 like the year of 2012?
There's no ideal solution to showing this tidily on a plot. You could use geom_label_repel from ggrepel:
library(ggplot2)
library(ggrepel)
ggplot(Data, aes(x = Year, y = Frequency, fill = Category, label = Frequency)) +
geom_bar(stat = "identity") +
geom_label_repel(size = 3, position = position_stack(vjust = 0.5))
or facet with free scales:
ggplot(Data, aes(x = Year, y = Frequency, fill = Category, label = Frequency)) +
geom_bar(stat = "identity") +
geom_text(size = 3, position = position_stack(vjust = 0.5)) +
facet_wrap(.~Year, drop = TRUE, nrow = 1, scales = "free") +
theme(strip.background = element_blank(), strip.text = element_blank())
Or perhaps a facet_zoom from ggforce:
ggplot(Data, aes(x = Year, y = Frequency, fill = Category, label = Frequency)) +
geom_bar(stat = "identity") +
geom_text(size = 3, position = position_stack(vjust = 0.5)) +
ggforce::facet_zoom(ylim = c(0, 50))
Or have floating labels:
ggplot(Data, aes(x = Year, y = Frequency, fill = Category, label = Frequency)) +
geom_bar(stat = "identity") +
geom_text(data = within(Data, Frequency[Year == "2006-07"] <- NA), size = 3,
position = position_stack(vjust = 0.5)) +
geom_label(data = Data[1:4,], aes(y = 1:4 * 100),
position = "stack"
Personally, I think I'd go with a table here...
I'm plotting a stacked bar graph and use geom_text to insert the value and name of each stack. The problem is some stacks are very small/narrow, so that the text of two stacks overlap each other and hence is not very readable. How can I modify the code to solve this issue.
Type<-c("ddddddddddd","ddddddddddd","bbbbbbbbbbbbb","ddddddddddd","eeeeeeeeeeeeee","bbbbbbbbbbbbb","ddddddddddd","bbbbbbbbbbbbb","ddddddddddd",
"eeeeeeeeeeeeee","mmmmmmmmmmmmmmmmmmm","bbbbbbbbbbbbb","ddddddddddd","bbbbbbbbbbbbb","eeeeeeeeeeeeee")
Category<-c("mmmmm","mmmmm","gggggggggggggggggg","ffffffffffff","ffffffffffff","ffffffffffff","sanddddddddd","sanddddddddd","yyyyyyyyyyy",
"yyyyyyyyyyy","yyyyyyyyyyy","sssssssssssssss","sssssssssssssss","sssssssssssssss","ttttttttttttt")
Frequency<-c(4,1,30,7,127,11,1,1,6,9,1,200,3,4,5)
Data <- data.frame(Type, Category, Frequency)
p <- ggplot(Data, aes(x = Type, y = Frequency)) +
geom_bar(aes(fill = Category), stat="identity", show.legend = FALSE) +
geom_text(aes(label = Frequency), size = 3) +
geom_text(aes(label = Category), size = 3)
Considering your data, a facetted plot might be a better approach:
# summarise your data
library(dplyr)
d1 <- Data %>%
mutate_each(funs(substr(.,1,2)),Type,Category) %>%
group_by(Type,Category) %>%
summarise(Freq = sum(Frequency)) %>%
mutate(lbl = paste(Category,Freq)) # create a label by pasting the 'Category' and the 'Freq' variables together
# plot
ggplot(d1, aes(x = Category, y = Freq, fill = Category)) +
geom_bar(stat="identity", width = 0.7, position = position_dodge(0.8)) +
geom_text(aes(label = lbl), angle = 90, size = 5, hjust = -0.1, position = position_dodge(0.8)) +
scale_y_continuous(limits = c(0,240)) +
guides(fill = FALSE) +
facet_grid(.~Type, scales = "free", space = "free") +
theme_bw(base_size = 14)
which gives:
In the above plot I shortened the labels on purpose. If you don't want to do that, you could consider this:
d2 <- Data %>%
group_by(Type,Category) %>%
summarise(Freq = sum(Frequency)) %>%
mutate(lbl = paste(Category,Freq))
ggplot(d2, aes(x = Category, y = Freq, fill = Category)) +
geom_bar(stat="identity", width = 0.7, position = position_dodge(0.8)) +
geom_text(aes(y = 5, label = lbl), alpha = 0.6, angle = 90, size = 5, hjust = 0, position = position_dodge(0.8)) +
scale_y_continuous(limits = c(0,240)) +
guides(fill = FALSE) +
facet_grid(.~Type, scales = "free", space = "free") +
theme_bw(base_size = 14) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
which gives: