Creating the pie chart according to the dataframe - r

df <- read.csv ('https://raw.githubusercontent.com/ulklc/covid19-
timeseries/master/countryReport/raw/rawReport.csv',
stringsAsFactors = FALSE)
How to create a pie chart of the death, confirmed and recovered fields in this data set by region.

perfect for a tidyverse
library(tidyverse)
df %>%
as_tibble() %>%
select(region, confirmed, recovered, death) %>%
gather(type, value, -region) %>%
group_by(region,type) %>%
summarise(value= sum(value)) %>%
ggplot(aes(x="", value, fill =region)) +
geom_col(position = position_fill(), color="white") +
ggrepel::geom_text_repel(aes(label = region), direction = "y",
position = position_fill(vjust = 0.5)) +
coord_polar(theta = "y") +
scale_fill_discrete("") +
facet_wrap(~type) +
theme_void() +
theme(legend.position = "bottom")
For labels I used function geom_text_repel from ggrepel package to easily avoid overplotting.

Related

Placing data labels for stacked bar chart at top of bar

I have been attempting to add a label on top of each bar to represent the proportion that each ethnic group makes up in referrals.
For some reason I cannot get the labels to be placed at the top of each bar. How do I fix this?
My code below
freq <- df %>%
group_by(ethnicity) %>%
summarise(n = n()) %>%
mutate(f = round((n/sum(n)*100, 1))
df %>%
group_by(pathway) %>%
count(ethnicity) %>%
ggplot(aes(x = ethnicity, y = n , fill = pathway)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(data = freq,
aes(x= ethnicity, y = f, label = f),
inherit.aes = FALSE) +
theme(legend.position = "bottom") +
scale_fill_manual(name = "",
values = c("light blue", "deepskyblue4"),
labels = "a", "b") +
xlab("") +
ylab("Number of Referrals") +
scale_y_continuous(breaks = seq(0, 2250, 250), expand = c(0,0)
Here is what it currently looks like
Since you are using the count as your y-axis position in geom_bar, you need to use the same thing in your geom_text to get the labels in the right place. Below is an example using mtcars dataset. Using vjust = -1 I put a little bit of space between the label and the bars to make it more legible and aesthetically pleasing.
library(tidyverse)
mtcars %>%
group_by(carb) %>%
summarise(n = n()) %>%
mutate(f = round(proportions(n) * 100, 1)) -> frq
mtcars %>%
group_by(gear) %>%
count(carb) -> df
df %>%
ggplot(aes(x = carb, y = n, fill = gear)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(data = frq,
vjust = -1,
aes(x= carb, y = n, label = f),
inherit.aes = FALSE)
Created on 2022-10-31 by the reprex package (v2.0.1)

npdb dataset and displaying results in charts R

I'm, working with the npdb dataset from UsingR package. Here is some information about it
How could I calculate how much was the amount for each ear and inserting it into a pie chart?
How could I determinate which states and which ID has max amount value (for each state)?
How could I create a histogram with the amounts?
I was able to calculate some parts of these, but I don't know how to connect them into charts
Did you have this in mind?
options(scipen=999)
library(UsingR)
library(dplyr)
data(npdb)
df <- npdb
q1 <- df %>% group_by(year) %>% summarise(mean = mean(amount)) %>% rename(Ave_am = mean)
q1 <- q1 %>%
arrange(desc(year)) %>%
mutate(prop = Ave_am / sum(q1$Ave_am) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
ggplot(q1, aes(x="", y=prop, fill=year)) +
geom_bar(stat="identity", width=1, color="white") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = year), color = "white", size=6)
q2 <- df %>% group_by(state) %>% slice(which.max(amount))
ggplot(q2, aes(x=state, y=amount)) +
geom_bar(stat = "identity") +
ylab("Max amount")

Order grouped scatterplot by mean

I am plotting a geom_point for several groups (Loc) and want in addition a line that indicates the mean of the points for each group. The groups should be ordered based on the mean of the Size for each group. I am trying to do this by reorder(Loc, Size.Mean) but it does not reorder.
ggplot(data,aes(Loc,Size,color=Loc)) +
geom_point() +
geom_point(data %>%
group_by(Loc) %>%
summarise(Size.Mean = mean(Size)),
mapping = aes(y = Size.Mean, x = reorder(Loc, Size.Mean)),
color = "black", shape = '-') +
theme_pubr(base_size=8) +
scale_y_continuous(trans="log10") +
theme(axis.text.x = element_text(angle = 90,hjust = 1)) +
theme(legend.position = "none")
ggplot orders discrete x ticks according to their level if the variable is a factor:
library(tidyverse)
iris_means <-
iris %>%
group_by(Species) %>%
summarise(mean = mean(Sepal.Length)) %>%
arrange(-mean)
iris %>%
mutate(Species = Species %>% factor(levels = iris_means$Species)) %>%
ggplot(aes(Species, Sepal.Length)) +
geom_point() +
geom_crossbar(data = iris_means, mapping = aes(y = mean, ymin = mean, ymax = mean), color = "red")
Created on 2021-09-10 by the reprex package (v2.0.1)

How to do a bar graphic with multiple columns out of an excel archive?

How can I make a graphic bar using barplot() or ggplopt() of an excel archive that has 83 columns?
I need to plot every column that has a >0 value on ich raw. (ich column represents a gene function and I need to know how many functions there is on ich cluster).
Iwas trying this,but it didn't work:
ggplot(x, aes(x=Cluster, y=value, fill=variable)) +
geom_bar(stat="bin", position="dodge") +
theme_bw() +
ylab("Funções no cluster") +
xlab("Cluster") +
scale_fill_brewer(palette="Blues")
Link to the excel:
https://github.com/annabmarques/GenesCorazon/blob/master/AllclusPathwayEDIT.xlsx
What about a heatmap? A rough example:
library(dplyr)
library(tidyr)
library(ggplot2)
library(openxlsx)
data <- read.xlsx("AllclusPathwayEDIT.xlsx")
data <- data %>%
mutate(cluster_nr = row_number()) %>%
pivot_longer(cols = -c(Cluster, cluster_nr),
names_to = "observations",
values_to = "value") %>%
mutate(value = as.factor(value))
ggplot(data, aes(x = cluster_nr, y = observations, fill = value)) +
geom_tile() +
scale_fill_brewer(palette = "Blues")
Given the large number of observations consider breaking this up into multiple charts.
It's difficult to understand exactly what you're trying to do. Is this what you're trying to achieve?
#install.packages("readxl")
library(tidyverse)
library(readxl)
read_excel("AllclusPathwayEDIT.xlsx") %>%
pivot_longer(!Cluster, names_to = "gene_counts", values_to = "count") %>%
mutate(Cluster = as.factor(Cluster)) %>%
ggplot(aes(x = Cluster, y = count, fill = gene_counts)) +
geom_bar(position="stack", stat = "identity") +
theme(legend.position = "right",
legend.key.size = unit(0.4,"line"),
legend.text = element_text(size = 7),
legend.title = element_blank()) +
guides(fill = guide_legend(ncol = 1))
ggsave(filename = "example.pdf", height = 20, width = 35, units = "cm")

Adding proportions to a bar chart

I've used my df to create a filled bar chart (Code used below). I want to have the proportions of each "race" printed within the bar chart.
Demo_17 <- tidyr::pivot_longer(Race_17, -c("State",), names_to = "Race", values_to = "num") %>%
ggplot(aes(x=State, y=num, fill = Race)) +
geom_bar(position="fill", stat="identity")
Demo_17 +
labs(x = "Population", y = "State", title = "US State Demographics 2017")
This is the df I'm using: US Demographic Data
I've looked at other similar questions but the code is long and hard to follow, particularly if it doesn't relate to your own data.
Can anyone lead me in the right direction?
Try this. Simply compute shares before plotting. Use scales::percent for nice formatting:
Demo_17 <- tidyr::pivot_longer(Race_17, -c("State",), names_to = "Race", values_to = "num") %>%
# compute pct share of race by state
group_by(State) %>%
mutate(pct = num / sum(num)) %>%
ggplot(aes(x=State, y=num, fill = Race)) +
geom_bar(position="fill", stat="identity") +
geom_text(aes(label = scales::percent(pct)), position = "fill")
Demo_17 + labs(x = "Population",
y = "State",
title = "US State Demographics 2017")
An example of this approach using mtcars:
library(ggplot2)
library(dplyr)
mtcars %>%
count(cyl, gear, name = "num") %>%
group_by(cyl) %>%
mutate(pct = num / sum(num)) %>%
ggplot(aes(x=cyl, y=num, fill = gear)) +
geom_bar(position="fill", stat="identity") +
geom_text(aes(label = scales::percent(pct)), position = "fill", vjust = 1.5, color = "white")
Created on 2020-04-20 by the reprex package (v0.3.0)
ADDITIONALLY: If you prefer to only show a label for shares over 10% (just an example, adjust as wished) then you add an ifelse() inside the label argument of geom_text:
mtcars %>%
count(cyl, gear, name = "num") %>%
group_by(cyl) %>%
mutate(pct = num / sum(num)) %>%
ggplot(aes(x=cyl, y=num, fill = gear)) +
geom_bar(position="fill", stat="identity") +
geom_text(aes(label = ifelse(pct>0.10, scales::percent(pct), "")), position = "fill", vjust = 1.5, color = "white")
As you notice the 9% label is not showing anymore.
What adds the labels to your charts is the geom_text(). Maybe something like this:
Demo_17 <- tidyr::pivot_longer(Race_17, -c("State",), names_to = "Race", values_to = "num") %>%
ggplot(aes(x=State, y=num, fill = Race)) +
geom_bar(position="fill", stat="identity")
Demo_17 +
labs(x = "Population", y = "State", title = "US State Demographics 2017") +
geom_text(aes(y=num, x=State, labels=num), vjust=0.5)
Can't test if it works great like this or if it needs some modifications since you have only supplied a screenshot of your dataset instead of a reproducible example of it. Let me know if it works but if it needs more attention read here so that people can effectively help you.

Resources