I have a dataset that looks something like this:
a b a_total b_total
dog 3 5 10 8
cat 6 2 12 13
pig 9 3 15 9
I'm trying to make a stacked barplot using ggplot to have "a_total" on the bottom and "a" on top for each animal. I tried this but doesn't work.
ggplot(df, aes(x = "", y = c("a", "a_total")) + geom_bar(stat= "identity")
How should I go about this?
We create a column from the row names (rownames_to_column), select the 'a' columns along with the new column, reshape to 'long' format (pivot_longer) and do the plotting
library(dplyr)
library(tidyr)
library(ggplot2)
df %>%
rownames_to_column('animal') %>%
select(animal, a, a_total) %>%
pivot_longer(cols = -animal) %>%
ggplot(aes(x = animal, y = value, fill = name)) +
geom_bar(stat = 'identity') +
theme_bw()
-output
Also, this can be done for both 'a' and 'b' in a facet_wrap
df %>%
rownames_to_column('animal') %>%
pivot_longer(cols = -animal) %>%
mutate(abgrp = substr(name, 1, 1)) %>%
ggplot(aes(x = animal, y = value, fill = name)) +
geom_bar(stat = 'identity') +
theme_bw() +
facet_wrap(~ abgrp)
In base R, we can use barplot
barplot(t(df[c('a', 'a_total')]), col = c('red', 'blue'), legend = TRUE)
data
df <- structure(list(a = c(3L, 6L, 9L), b = c(5L, 2L, 3L), a_total = c(10L,
12L, 15L), b_total = c(8L, 13L, 9L)), class = "data.frame", row.names = c("dog",
"cat", "pig"))
Related
I am trying to produce a graph for a categorical variable with three sub-groups, but I would like to strictly present the results for two groups. In Stata, this can be done while producing a graph by adding something like, but I am not sure if there is an R equivalent?
drop if sentiment== "neutral"
Here is the a data example:
dput(head(sample_graph, 5))
(list(sentiment = structure(c(3L, 2L, 4L, NA, 2L), .Label = c("meg",
"negative", "neutral", "positive"), class = "factor"), treatment_announcement = c("pre",
"pre", "pre", "pre", "post"), n = c(78L, 150L, 87L, 1L, 829L),
sentiment_percentage = c(0.246835443037975, 0.474683544303797,
0.275316455696203, 0.00316455696202532, 0.490822972172883
), am = structure(c(2L, 2L, 2L, 2L, 1L), .Label = c("post",
"pre"), class = "factor")), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -5L), groups = structure(list(
treatment_announcement = c("post", "pre"), .rows = structure(list(
5L, 1:4), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), .drop = TRUE))
I have used this code before, which works well but it drops all observations under this category, but I only want to drop them for visualization purposes, not all rows in the df itself.
For instance, after running the code below, my observations declined from 8000 to 6323.
sample_graph<- sample_graph %>%
drop_na() %>%
filter(sentiment != "neutral")
Therefore, I have attempted dropping the specific subgroup within the ggplot itself, but I am facing an error: "Problem with filter() input ..2.
i Input ..2 is aes(x = treatment_announcement, fill = sentiment, y = sentiment_percentage)."
ggplot(sample_graph %>% filter(sentiment != "neutral", aes(x = treatment_announcement, fill = sentiment, y = sentiment_percentage))) +
geom_bar(stat = "identity", position=position_dodge()) +
scale_fill_grey() +
ylab("percentage") +
theme(text=element_text(size=20)) +
scale_fill_manual(values = c("positive" = "green",
"negative" = "red")) +
theme(plot.title = element_text(size = 18, face = "bold")) +
scale_x_discrete(limits = c("pre", "post")) +
theme_bw()
Following Allen's advice below, I tried the following:
twitter_posts |>
drop_na() |>
filter(sentiment != "neutral") |>
select(sentiment, treatment_announcement) |> # we're only interested in sentiment & treatment_announcement
group_by(sentiment) %>% # group data and
add_count(treatment_announcement) |> # add count of treatment_announcement
unique() |> # remove duplicates
ungroup() |> # remove grouping
group_by(treatment_announcement) |> # group by treatment_announcement
mutate(sentiment_percentage = n/sum(n)) |> # ...calculating percentage
mutate(sentiment = as.factor(sentiment)) |> # change to factors so that ggplot treats...
mutate(am = as.factor(treatment_announcement)) |>
twitter_posts (data = teacher_posts, aes(x = treatment_announcement, fill = sentiment, y = sentiment_percentage)) +
geom_bar(stat = "identity", position=position_dodge()) +
scale_fill_grey() +
xlab("Treatment refers to the implementation of the wage subsidy program targeted at jobless teachers") +
ylab("percentage") +
theme(text=element_text(size=20)) +
scale_fill_manual(values = c("positive" = "green",
"negative" = "red")) +
theme(plot.title = element_text(size = 18, face = "bold")) +
scale_x_discrete(limits = c("pre", "post")) +
theme_bw()
And I am receiving this error "Mapping should be created with aes() or aes_()." although I have the aes mapping for the plot.
You can do some version of this via piping to ggplot or using filter in the data argument
library(tidyverse)
library(palmerpenguins)
penguins <- penguins
penguins |>
drop_na() |>
filter(species != "Adelie") |>
ggplot(aes(x = bill_length_mm, y = body_mass_g)) +
geom_point()
ggplot(data = filter(penguins,species != "Adelie"), aes(x = bill_length_mm, y = body_mass_g)) +
geom_point()
#> Warning: Removed 1 rows containing missing values (geom_point).
Created on 2022-07-18 by the reprex package (v2.0.1)
So taking the code you provided it would look something like this
twitter_posts |>
drop_na() |>
filter(sentiment != "neutral") |>
select(sentiment, treatment_announcement) |> # we're only interested in sentiment & treatment_announcement
group_by(sentiment) %>% # group data and
add_count(treatment_announcement) |> # add count of treatment_announcement
unique() |> # remove duplicates
ungroup() |> # remove grouping
group_by(treatment_announcement) |> # group by treatment_announcement
mutate(sentiment_percentage = n/sum(n)) |> # ...calculating percentage
mutate(sentiment = as.factor(sentiment)) |> # change to factors so that ggplot treats...
mutate(am = as.factor(treatment_announcement)) |>
ggplot(aes(x = treatment_announcement, fill = sentiment, y = sentiment_percentage)) +
geom_bar(stat = "identity", position=position_dodge()) +
scale_fill_grey() +
xlab("Treatment refers to the implementation of the wage subsidy program targeted at jobless teachers") +
ylab("percentage") +
theme(text=element_text(size=20)) +
scale_fill_manual(values = c("positive" = "green",
"negative" = "red")) +
theme(plot.title = element_text(size = 18, face = "bold")) +
scale_x_discrete(limits = c("pre", "post")) +
theme_bw()
So you would be doing your data cleaning and then plotting it. Because you are piping it you do not need to include the data argument.
If I were you, I would just create a new dataframe by filtering your original one with
newdataframe <- originaldataframe %>%
filter(variable==)
or something in this style.
From there generating the new graph should be trivial if you already have a working code.
Maybe is not the most polished way to do it, but its fast and effective.
Hope it helps.
I have dataset below and would like to have the results in the picture attached using ggplot.
bar_df
class boys girls
a 20 30
b 40 30
c 50 50
d 30 70
I tried code below
bar_df <- melt(bar_df, id.vars = 'class')
ggplot(bar_df,aes(x = variable,y = value)) +
geom_bar(aes(fill = variable),stat = "identity",position = "dodge")
Another option is to use pivot_longer from tidyverse.
library(tidyverse)
df %>%
pivot_longer(-class) %>%
ggplot(aes(fill=name, y=value, x=class)) +
geom_bar(position="dodge", stat="identity")
Or with melt (as shown by #r2evans):
melt(df, id.vars = 'class') %>%
ggplot(aes(y=value, x=class)) +
geom_bar(aes(fill = variable),stat = "identity",position = "dodge")
Output
Data
df <-
structure(list(
class = c("a", "b", "c", "d"),
boys = c(20L, 40L,
50L, 30L),
girls = c(30L, 30L, 50L, 70L)
),
class = "data.frame",
row.names = c(NA,-4L))
I have a data with BMI and HTN in 3 time points, where BMI is continuous and HTN is categorical. I should show the changes for BMI and HTN over these three times by a graph like line graph (continuous values for BMI and numbers for HTN). Is there any way to do it in R? and Could I have both graphs in one figure? Thanks.
ID
BMI_1
BMI_2
BMI_3
HTN_1
HTN_2
HTN_3
A1
35
37
40
0
0
1
A2
29
32
35
0
1
1
A3
38
39
42
1
1
1
A4
33
34
34
0
0
0
We could reshape to 'long' format with pivot_longer and use ggplot
library(dplyr)
library(tidyr)
library(ggplot2)
df1 %>%
pivot_longer(cols = -ID, names_to = c('.value', 'grp'),
names_sep = "_") %>%
mutate(grp = as.integer(grp)) %>%
ggplot() +
geom_line(aes(x = grp, y = BMI, group = ID, color = ID)) +
geom_line(aes(x = grp, y = HTN, group = ID, color = ID)) +
theme_bw()
If we want two plots, can use facet_wrap
df1 %>%
pivot_longer(cols = -ID) %>%
separate(name, into = c('name', 'grp'), convert = TRUE) %>%
ggplot(aes(x = grp, y = value, color = ID, group = ID)) +
geom_line() +
facet_wrap(~ name) +
theme_bw()
-output
scales can be modified in facet_wrap
...
facet_wrap(~ name, scales = 'free_y') +
...
data
df1 <- structure(list(ID = c("A1", "A2", "A3", "A4"), BMI_1 = c(35L,
29L, 38L, 33L), BMI_2 = c(37L, 32L, 39L, 34L), BMI_3 = c(40L,
35L, 42L, 34L), HTN_1 = c(0L, 0L, 1L, 0L), HTN_2 = c(0L, 1L,
1L, 0L), HTN_3 = c(1L, 1L, 1L, 0L)), class = "data.frame", row.names = c(NA,
-4L))
Is this what you're looking for?
library(tidyverse)
df1 %>%
summarise(across(-ID, mean)) %>%
pivot_longer(cols = everything(), names_to = c("stat", "time"), names_sep = "_") %>%
ggplot(aes(time, value, group = stat)) +
geom_line() +
facet_wrap(~stat, scales = "free_y") +
expand_limits(y = 0)
Or perhaps:
df1 %>%
summarise(across(-ID, mean)) %>%
pivot_longer(cols = everything(), names_to = c("stat", "time"), names_sep = "_") %>%
pivot_wider(names_from = stat, values_from = value) %>%
ggplot(aes(BMI, HTN, label = time)) +
geom_path() +
geom_label()
I have a time series data with multiple variables measured in different units. it is daily data. The data is as below. (Example data)
structure(list(date = structure(18324:18329, class = "Date"),
x = c(-1805605.65336663, -217934.802608961, -1032002.23625031, 234816.624919304, 1321982.20108174, 104251.623282941), y = c(0.633729348424822, 0.244916933588684, 0.873351667076349, 0.552934182109311, 0.348864572821185, 0.197756679030135), z = c(3L, 5L, 5L, 6L, 5L, 6L)), class = "data.frame", row.names = c(NA, -6L
))
Suppose X is measured in Rs Billion, Y is a ratio between 0 and 1, and Z is a count variable. I want to plot all these variables over the time period in multiple graphs ( preferably using facet_wrap)
You can use the following code
library(tidyverse)
library(lubridate)
df %>%
dplyr::mutate(date = ymd(date)) %>%
gather(key = "key", value = "value",-date) %>%
ggplot(aes(x=date, y=value)) + geom_line() + facet_wrap("key", scales = "free")
Update
df %>%
dplyr::mutate(date = ymd(date)) %>%
gather(key = "key", value = "value",-date) %>%
ggplot(aes(x=date, y=value)) + geom_line() + theme_bw() +
facet_wrap(~key, scales = "free_y", ncol = 1,
strip.position = "left",
labeller=as_labeller(c(x = "Rs Billion", y = "Ratio", z = "Count variable (n)"))) +
ylab(NULL) +xlab("Date")+
theme(strip.background = element_blank(),
strip.placement = "outside")
My data frame looks as follows:
> df
person step start end
1 sam A 0 4
2 sam B 4 6
3 greg A 2 7
4 greg B 7 11
And I plot the following chart:
library(ggplot2)
library(dplyr)
library(tidyr)
ggplot(df, aes(colour=step)) +
geom_segment(aes(x=start, xend=end, y=person, yend=person), size=3) +
xlab("Duration")
Now, I want to connect each start and end between sam and greg and label it with the difference.
The expected graphic should look something like this:
The problem is that I don't know how I can draw the lines without passing x and y coordinates. The y coordinates are determined by the grouping (person).
I followed your way and wrote the following code. The key thing is that you need to transform your data so that you can draw lines and labels. For the lines, I used geom_segment(). The function requires x, xend, y, and yend. So I generated these values. For the labels, I used geom_text(), and I needed to generate x, y, and label.
library(tidyverse)
# This data transformation is for drawing lines between Greg and Sam
df2 <- gather(df, key = whatever, value = value, -person, -step) %>%
group_by(person) %>%
distinct(value) %>%
arrange(person, value) %>%
mutate(group = 1:n()) %>%
spread(key = person, value = value) %>%
mutate(y = 2, yend = 1)
# This data transformation is for labels
df3 <- mutate(df2, x = (greg + sam) / 2 + 0.4, y = 1.5,
label = greg - sam)
ggplot(df, aes(colour = step)) +
geom_segment(aes(x = start, xend = end, y = person, yend = person), size = 3) +
geom_segment(data = df2, aes(x = sam, xend = greg, y = y, yend = yend),
size = 2, lineend = "round", inherit.aes = F) +
geom_text(data = df3, aes(x = x, y = y, label = label), inherit.aes = F) +
xlab("Duration")
DATA
df <- structure(list(person = c("sam", "sam", "greg", "greg"), step = c("A",
"B", "A", "B"), start = c(0L, 4L, 2L, 7L), end = c(4L, 6L, 7L,
11L)), .Names = c("person", "step", "start", "end"), class = "data.frame", row.names = c("1",
"2", "3", "4"))
Not a complete answer but this should get you to your goal:
your Data:
df<-
fread("person step start end
sam A 0 4
sam B 4 6
greg A 2 7
greg B 7 11")
Your main task is to find/calculate data1:
data1 <- data.frame(x=2:4,xend=1:3,y=rep(1,3),yend=rep(2,3),textx=1:3+.5,texty=rep(1.5,3),textlabel=letters[1:3])
add the info of data1:
ggplot(df, aes(colour=step)) +
geom_segment(aes(x=start, xend=end, y=person, yend=person), size=3) +
xlab("Duration") +
geom_segment(data = data1,aes(x = x, y = y, xend = xend, yend = yend),colour="black",size=3) +
geom_text(data = data1,aes(x = textx+0.5, y = texty, label=textlabel),colour="green",size=7)
plot:
please note:
sam and greg are stored as factors and therefore integer values from 1 to n.
this means greg is 1 and sam is 2. (by default factors are sorted alphabetically)
this means the y position between greg and sam is 1.5