How to create ggplot graphs with the three groups into one plot? - r

My codes are:
ggplot(data=df2, aes(x=stress, fill=as.factor(JP_Gender))) + geom_density(alpha=.3)
ggplot(data=df1, aes(x=CGstress)) + geom_density(alpha=.3)
My dataset 1:
structure(list(CGstress = c(4, 1, 10, 8, 9.5, 5, 5, 6, 6, 6,
7, 3, 4.5, 8, 9, 1, 5, 1, 5.5, 4, 1, 7, 9, 8, 3, NA, 10, 9, 5,
3, NA, 10, 6, NA, 10, 7)), row.names = c(NA, -36L), class = c("tbl_df",
"tbl", "data.frame"))
My dataset 2:
structure(list(stress = c(7, 2, 5, 6, 7, 1, 6, 10, 9, 10, 10,
10, 10, 8, 9, 4, 7, 6, 4, 9, 4, 8, 3.5, 7, 6, 6, 1, 7, 9, 8,
10, 6, 3, 1, 1, 1, 9, 6, 4), JP_Gender = structure(c(1, 2, 1,
2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1), label = "What is your gender?", format.stata = "%12.0g", labels = c(Male = 1,
Female = 2, Transgender = 3, Other = 4), class = c("haven_labelled",
"vctrs_vctr", "double"))), row.names = c(NA, -39L), class = c("tbl_df",
"tbl", "data.frame"))
Above codes give me 2 graphs. How to combine 2 graphs into one plot? And how to label the legends?

You can try combining the two datasets and then plot :
library(dplyr)
library(ggplot2)
df1 %>%
mutate(id = 3) %>%
rename(stress = CGstress) %>%
bind_rows(df2 %>%
mutate(id = as.integer(JP_Gender)) %>%
select(stress, id)) %>%
mutate(id = factor(id)) %>%
ggplot(aes(x=stress, fill=factor(id))) + geom_density(alpha=.3)

Related

geom_bar(), Y-axis goes way above data value

I am trying to visualize a data frame from a survey. I'm currently trying to plot a barplot with geom_bar(), that takes in "Life Satisfaction" as the y-axis, and "Family Values" as the x-axis. Note that the survey answer for Life Satisfaction is 1(very unsatisfied) to 10(very satisfied).
But for some reason when I try to plot this barplot, the y-axis goes way above 10, and I don't understand why.
This is my code:
df1 %>%
filter(df1$B_COUNTRY_ALPHA == "PAK") %>%
drop_na(Q49) %>%
ggplot(aes(x = Q1, y = Q49, fill = B_COUNTRY_ALPHA)) +
geom_bar(stat = "identity") +
labs(x = "Family Value",
y = "Life Satisfaction")
This is the graph that I get when I run it:
This is the first 20 rows of data that I want to work with:
On a side note: I was thinking of finding the mean of the Life Satisfaction data and maybe that will make the plot make sense but I am not sure how to do that
#GregorThomas I followed your instructions and I got this.
structure(list(B_COUNTRY_ALPHA = c("PAK", "PAK", "PAK", "PAK",
"PAK", "PAK", "PAK", "PAK", "PAK", "PAK", "PAK", "PAK", "PAK",
"PAK", "PAK", "PAK", "PAK", "PAK", "PAK", "PAK"), Q49 = c(7,
10, 10, 5, 1, 6, 6, 10, 10, 10, 4, 4, 8, 10, 10, 10, 10, 9, 10,
8), Q1 = c(1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1), Q2 = c(1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1,
4, 1, 2, 2, 2), Q3 = c(2, 2, 1, 1, 3, 1, 2, 2, 2, NA, 2, 4, 1,
1, 2, 2, 4, 2, 4, 2), Q4 = c(3, 4, 2, 4, 2, 3, 4, 2, 1, 4, 4,
4, 4, 1, 3, 4, 3, 4, 4, 2), Q5 = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 1, 2, 1, 1, 1, 4, 1, 1, 4), Q6 = c(1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 4), Q57 = c(2, 2, 2, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1), Q106 = c(7, 5,
10, 4, 10, 7, 1, 10, 10, 10, 1, 10, 1, 10, 10, 10, 9, 4, 10,
6), Q107 = c(7, 6, 5, 5, 10, 3, 1, 10, 10, NA, 1, 1, 1, 10, 3,
10, 10, 8, 10, 4), Q108 = c(7, 9, 1, 4, 1, 1, 10, 10, 5, 10,
10, 10, 1, 10, 10, 10, 10, 10, 1, 3), Q109 = c(6, 4, 1, 4, 1,
1, 1, 10, 10, 1, 6, 2, 10, 5, 10, 1, 10, 9, 1, 4), Q110 = c(6,
3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 10, 1, 10, 3, 1, 3), Q112 =
c(8,
8, 10, 6, 10, 5, 10, 10, 10, 10, NA, 10, 10, 10, 10, 10, 10,
10, 10, 7), Q163 = c(6, 2, 10, 7, 9, 10, 10, 10, 10, NA, 10,
10, 6, 10, 3, NA, 8, 7, NA, 9), Q164 = c(4, 9, 10, 8, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, NA, 8, 10, 10, 10), Q222 = c(2,
1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 4, NA, 1, NA, 2, 3, NA, 3),
Q260 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 0, 1), Q262 = c(33, 21, 60, 18, 60, 50, 45, 29, 62,
46, 35, 40, 30, NA, 45, NA, 30, 50, 36, 34), Q273 = c(1,
6, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
Q275 = c(0, 2, 3, 3, 3, 2, 3, 2, 4, 0, 0, 0, 1, NA, 3, NA,
1, 1, 0, 1), Q281 = c(8, 0, 3, 0, 10, 3, 4, 6, 3, 8, 4, 4,
4, 0, 5, 0, 0, 0, 9, 0)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -20L))
Here's a couple ideas using your sample data:
Use a dodged bar plot:
sample_data %>%
ggplot(aes(x = factor(Q1), fill = factor(Q49))) +
geom_bar(position = position_dodge(preserve = 'single')) +
labs(x = "Family Value",
y = "Count of Responses",
fill = "Life Satisfaction")
Use facets:
sample_data %>%
ggplot(aes(x = factor(Q49), fill = factor(Q49))) +
geom_bar() +
labs(x = "Life Satisfaction",
y = "Count of Responses",
fill = "Life Satisfaction") +
facet_wrap(vars(paste("Family Value", Q1)))
Use a heat map:
sample_data %>%
ggplot(aes(x = factor(Q1),y = factor(Q49))) +
geom_bin2d() +
coord_fixed() +
labs(y = "Life Satisfaction", x = "Family Value")

Custom legend with collapsed factor values in ggplot2

This might have been asked before but I cannot find it after searching for a while.
I have the following data.frame.
structure(list(genotype = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4), treatment = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2), group_val = c(1.57837321136062, 1.76334487045417,
1.73586017158848, 2.04109599956349, 1.80010448171344, 2.07090618591467,
1.07574792716769, 1.18397923178828, 1.21889101529495, 1.20248500773822,
1.3808338457315, 1.42210495550068, 1.64573799027085, 1.55264650622629,
1.70883543195709, 1.50659245289343, 0.90200663935181, 0.881584819347461,
0.954018876774318, 0.930280832877143, 1.85156683945601, 1.84753564786241,
1.96298425756247, 1.97329138022375, 1.89502726316024, 1.88250460242058,
1.12763625255165, 0.849376374224505, 1.04073813233643, 1.00903241221572,
1.58053330474755, 1.60670456352336, 2.02389070564365, 1.88873097588837,
2.05477131909231, 1.9945072156688, 1.25082256791521, 1.19811638234775,
1.06975634816231, 1.20976663827858, 2.10380372095596, 2.14921911265538,
2.18892848376085, 2.15381486434453, 1.82607480270083, 1.98677173426624,
0.954242509439325, 1.26717172840301, 1.02118929906994, 0.8750612633917,
0.602059991327962, 0.751757501701102, 1.62038696281561, 1.20836885846782,
1.32651612490137, 1.13698195289592, 1.6421025338509, 1.41206291695827,
1.6101194399672, 1.6712113404111, 2.11429641123473, 1.84505371972817,
2.27595666174897, 2.2231986751043, 2.24564757180665, 2.24707729700922,
1.47310327692139, 1.1447387331723, 1.24550565752405, 1.07766801873253,
1.85452622982568, 1.87613186339641, 2.09397999968991, 1.96262712830201,
2.2095435542086, 2.10814923581137, 1.00067107824743, 0.983971241990881,
1.24468845794328, 1.15181012595794)), row.names = c(NA, -80L), groups = structure(list(
genotype = c(1, 1, 2, 2, 3, 3, 4, 4), treatment = c(1, 2,
1, 2, 1, 2, 1, 2), .rows = structure(list(1:10, 11:20, 21:30,
31:40, 41:50, 51:60, 61:70, 71:80), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 8L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
My aim is to have the following plot
But with the levels on the interaction(genotype, treatment) collapsed. The only relevant information here is that "light" colors equals treatment level 1 and "dark" colors equals treatment level "2".
I would like a legend to reflect that, meaning only two points, could be "#CFCFCF" and "gray50" to reflect that the values of treatment are 1 and 2 respectively.
Here's the code to make the plot as shown in the image
library(tidyverse)
target_colors <- c("#FF9BB4", "#FA234C", "#A2D3FF", "#2987FA", "#47C947", "darkgreen",
"#CFCFCF", "gray50")
color_order <- interaction(df$genotype, df$treatment) %>% levels() %>% sort
df %>%
ggplot(aes(genotype, group_val,
color=interaction(genotype, treatment)))+
ggbeeswarm::geom_quasirandom(dodge.width = 1,
show.legend = T) +
# if flipping, the levels of the factor must be modified
#coord_flip()+
geom_boxplot(
position=position_dodge(1),
width=0.1, fill='black', show.legend = F)+
scale_color_manual(values = setNames(target_colors,
color_order))
One option would be to use only four colors, map genotype on color and treatment on alpha:
library(tidyverse)
target_colors <- c("#FA234C", "#2987FA", "darkgreen", "gray50")
df %>%
ggplot(aes(genotype, group_val,
color = factor(genotype),
alpha = factor(treatment),
group = interaction(genotype, treatment)))+
ggbeeswarm::geom_quasirandom(dodge.width = 1,
show.legend = T) +
geom_boxplot(
position=position_dodge(1),
width=0.1, fill='black', show.legend = F)+
scale_color_manual(values = target_colors) +
scale_alpha_manual(values = c(.6, 1))

How to fix invalid vertex id error in tidygraph?

Data
network_data <- list(nodes = structure(list(id = c(0, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14), label = c("2892056", "2894543", "2894544",
"2894545", "2894546", "2894547", "2894548", "2894549", "2894550",
"2894551", "2894552", "2894553", "2894554", "2894555", "2894556"
)), row.names = c(NA, -15L), class = "data.frame"), links = structure(list(
from = c(3, 5, 7, 13, 13, 7, 3, 5, 0, 0, 5, 2, 7, 6, 13,
11, 0, 3, 2, 7, 13, 3, 0, 0, 5, 3, 13, 4, 0, 14, 13, 7, 2,
3, 5, 0, 12), to = c(0, 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3,
3, 3, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 7, 11, 12, 12,
12, 13, 13, 13, 13, 13, 14), weight = c(1, 2, 2, 1, 2, 1,
1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 1,
2, 1, 2, 1, 1, 2, 2, 2, 1, 2, 1, 1)), row.names = c(NA, -37L
), class = "data.frame"))
I have this list of nodes and links for building a network. Rather than plotting the network, I want to get the network characteristics such as isolates, reciprocity, etc.
Here's the rest of the code that I'm using to obtain these characteristics:
network_data$nodes <- network_data$nodes %>% select(id, label)
network_data$links <- network_data$links %>% rename(from = source, to = target)
print(network_data$nodes)
print(network_data$links)
SNA <- tidygraph::tbl_graph(
nodes = network_data$nodes,
edges = network_data$links,
directed = T
)
The last line is where it errors out.
Error in (function (edges, n = max(edges), directed = TRUE) :
At structure_generators.c:86 : Invalid (negative) vertex id, Invalid vertex id
I googled the issue and seems like it's pretty prevalent, but none of the methods suggested worked for me. What's different in my data that it's still generating the error, and how can I resolve this error?

How to remove rows from dataframe without rewriting it?

df = structure(list(V1 = c(1, 2, 2, 3, 4, 5, 5, 6, 7), V2 = c(3.5, 3, 2.5, 2, 3, 2, 3, 5, 4), V3 = c(6.5, 8, 9, 5, 7, 4, 3, 6, 7)), row.names = c(NA, 9L), class = "data.frame")
trash = c(2,3)
How to remove the rows having the IDs in trash without rewriting the df?
I don't think there are inplace operations in r, even if you do
df = structure(list(V1 = c(1, 2, 2, 3, 4, 5, 5, 6, 7), V2 = c(3.5, 3, 2.5, 2, 3, 2, 3, 5, 4), V3 = c(6.5, 8, 9, 5, 7, 4, 3, 6, 7)), row.names = c(NA, 9L), class = "data.frame")
trash = c(2,3)
df = df[-trash,]
It should still rewrite df.

Add legend to graph in R

For a sample dataframe:
df <- structure(list(antibiotic = c(0.828080341411847, 1.52002304506738,
1.31925434545302, 1.66681722567074, 1.17791610945551, 0.950096368502059,
1.10507733691997, 1.0568193215304, 1.03853131016669, 1.02313195567946,
0.868629787234043, 0.902126485349154, 1.12005679002801, 1.88261441540084,
0.137845900627507, 1.07040656448604, 1.41496470588235, 1.30978543173373,
1.16931780610558, 1.05894439450366, 1.24805122785724, 1.21318238007025,
0.497310305098053, 0.872362356327429, 0.902584749481137, 0.999731895498823,
0.907560340983954, 1.05930840957587, 1.40457554864091, 1.09747179272879,
0.944219456216072, 1.10363111431903, 0.974649273935516, 0.989983064420841,
1.14784471036171, 1.17232858907798, 1.44675812720393, 0.727078405331282,
1.36341361598635, 1.06120293299474, 1.06920290856811, 0.711007267992205,
1.39034247642439, 0.710873996527168, 1.30529753573398, 0.781191310196629,
0.921788181250106, 0.932214675722466, 0.752289683770589, 0.942392026874501
), year = c(3, 1, 4, 1, 2, 4, 1, 3, 4, 3, 4, 1, 2, 3, 4, 1, 1,
4, 1, 1, 1, 1, 4, 1, 3, 3, 1, 4, 1, 4, 2, 1, 1, 1, 3, 4, 3, 2,
2, 2, 3, 3, 1, 2, 3, 2, 3, 4, 4, 1), imd.decile = c(8, 2, 5,
5, 4, 3, 2, 8, 6, 4, 3, 6, 9, 2, 5, 3, 5, 6, 4, 2, 9, 11, 2,
8, 3, 5, 7, 8, 7, 4, 9, 7, 6, 4, 8, 10, 5, 6, 6, 11, 6, 4, 2,
4, 10, 8, 2, 8, 4, 3)), .Names = c("antibiotic", "year", "imd.decile"
), row.names = c(17510L, 6566L, 24396L, 2732L, 13684L, 28136L,
1113L, 15308L, 28909L, 21845L, 23440L, 1940L, 8475L, 22406L,
27617L, 4432L, 3411L, 27125L, 6891L, 6564L, 1950L, 5683L, 25240L,
5251L, 20058L, 18068L, 5117L, 29066L, 2807L, 24159L, 12309L,
6044L, 7629L, 2336L, 16583L, 23921L, 17465L, 14911L, 8879L, 13929L,
17409L, 19421L, 7239L, 11570L, 15283L, 8283L, 16246L, 27950L,
23723L, 4411L), class = "data.frame")
I am trying to graph imd.decile by antibiotic for each year
library(ggplot2)
p <- ggplot(df, aes(x = imd.decile, y = antibiotic, group = factor(year))) +
stat_summary(geom = "line", fun.y = mean)
p
How do I add the wave to colour the corresponding graph and add a legend (I can't seem to use the aes command correctly).

Resources