show 3 factors ggplot geom

show 3 factors ggplot geom - r

I am trying to make a PCA plot using ggplot and geom_point.
I would like to illustrate 3 factors (Diet, Time, Antibiotics).
I thought I could outline the points in black for one factor).
However this isn't showing the third factor (Time) for the Fill color.
Here is a subset of my data:
> dput(dat.pcx.annot.test)
structure(list(PC1 = c(25.296379160162, 1.4703101394886, 11.4138097811008,
1.41798772574591, 23.7253675969881, 15.5683516005535, -34.6012195481675,
-25.7129281491955, -2.97230018393742, 4.83421092719293, -0.0274189140249825,
23.227939504077, 15.2002258785889, -35.2243685702227, -34.2537374460037,
-7.6380794043063), PC2 = c(27.2678813936857, -9.88577494210313,
-6.19394322321806, -8.88953660465497, 33.6791127012231, -13.2912233546802,
7.77877968081575, 2.7371646557436, -8.41929538502921, -11.5151849519265,
-9.40733576034963, 32.3549860618533, -11.2170071727855, 10.0455709347794,
3.05679707335492, -6.66218028060621), Diet = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L), .Label = c("RC",
"WD"), class = "factor"), Time = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("ZT14",
"ZT2"), class = "factor"), Antibiotics = structure(c(2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L), .Label = c("Antibiotics ",
"None"), class = "factor")), row.names = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 18L, 19L, 20L, 21L, 22L), class = "data.frame")
Here is the plotting command :
ggplot(dat.pcx.annot.test,aes(x=PC1,y=PC2,color=Diet,shape=Antibiotics,Fill=Time))+
geom_point(size=3,alpha=0.5)+
scale_color_manual(values = c("black","white") )
And the plot it produces:
I thought if I had both color and fill specified then they would both show.
I would like black outlines for Antibiotics, and Fill color for Time.
Right now Time is not represented.
Any help on how to simultaneously view the 3 factors.
Thanks

Yes I had a fill typo. And I finally figured out how to get the legends to correspond. Here is my final answer.
ggplot(dat.pcx.annot,aes(x=PC1,y=PC2,color=Diet,shape=Antibiotics,fill=Time))+
geom_point(size=3)+
scale_shape_manual(values = c(21, 22) )+
scale_color_manual(values = c("black","white") )+
scale_fill_manual(values=c("#EC9DAE","#AEDE94"))+
xlab(PC1var)+
ylab(PC2var)+
guides(fill=guide_legend(override.aes=list(shape=21)))+
guides(color=guide_legend(override.aes=list(shape=21)))
guides(fill=guide_legend(override.aes=list(shape=21,fill=c("#EC9DAE","#AEDE94"),color=c("black","white"))))
ggsave("cohort2_pca.pdf")

Related

Modify color and font of emmip plot (emmeans package) in R

Hello :) I am desperately trying to change the colors and font of my emmip plot (plot from the emmeans package in R) but none of my codes are working.
Currently my code for the plot looks like this:
emmip(Model, group ~ gend, CIs=TRUE, nuisance = c("known", "age_dup", "edu"),
xlab = "",
ylab = "Intention to use the platform")
I read in the manueal from the R-package that the code from emmip() can be combined with ggplot2 codes. But when I add the following two codes (that I successfully use in another ggplot) - nothing changes in my plot:
+ theme(text=element_text(family="serif", size=13)
+ scale_fill_brewer(palette="Blues"))
I varied them already, for example "," instead of "+"
Does anyone have an idea how I can make these two modifications work in emmip? Thank you all in advance!
Here is the dput of my data (first 30 rows):
structure(list(dv = c(1, 5, 5, 1, 3, 5, 2, 1, 5, 5, 2, 4, 6,
7, 3, 5, 5, 6, 7, 1, 7, 6, 2, 4, 7, 6, 5, 1, 6, 6), gend = structure(c(1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, NA, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L), .Label = c("Male",
"Female"), class = "factor"), group = structure(c(5L, 3L, 5L,
3L, 2L, 1L, 3L, 4L, 2L, 1L, 3L, 2L, 3L, 3L, 4L, 4L, 2L, 4L, 5L,
5L, 1L, 4L, 1L, 4L, 2L, 1L, 2L, 3L, 1L, 4L), .Label = c("Default",
"Visual element", "Verbal content", "Visual design", "Combined",
"DesignZH"), class = "factor"), ISFregscores = c(0.984372106429775,
-0.383676865152824, -0.816194838031774, -0.408554787302724, -0.0416530380928891,
0.998088756156888, 0.216609251327447, 0.83416518546863, 1.00178246600492,
-0.496215251116934, -1.34559758838579, NA, 0.707838661016661,
1.05815783619489, -0.314855036376305, 0.617674358967702, -0.56862344822269,
0.0589354712707628, 0.31998903974822, -0.511084756816837, -0.171121724458495,
0.532699047600051, 0.196311893993997, -2.09902298349596, 1.04422334581248,
-0.132687312769232, 1.05733961165571, 0.541606480874359, 0.440296538856025,
0.895064902672922), age_dup = structure(c(2L, 1L, 1L, 2L, 1L,
2L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 3L, 1L, 2L, 3L,
1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L), .Label = c("under34", "age35_49",
"over50"), class = "factor"), edu = structure(c(5L, 4L, 5L, 2L,
5L, 5L, 5L, 5L, 5L, 4L, 5L, NA, 5L, 4L, 1L, 5L, 4L, 5L, 3L, 5L,
2L, 5L, 3L, 5L, 6L, 5L, 6L, 1L, 3L, 5L), .Label = c("oblig. Schulzeit",
"Berufsausbildung", "Berufsmatura", "Gymnasiale Matura", "BA/MA",
"Doktorat", "Andere"), class = "factor"), empl = structure(c(1L,
6L, 1L, 2L, 8L, 2L, 5L, 2L, 2L, 6L, 2L, NA, 1L, 1L, 6L, 2L, 6L,
1L, 1L, 4L, 2L, 6L, 1L, 1L, 3L, 6L, 2L, 2L, 4L, 1L), .Label = c("Privatsektor",
"öffentlicher Sektor", "Non-Profit Sektor", "selbstständig",
"Rentner/in", "Student/in", "Hausfrau/Hausmann", "arbeitssuchend"
), class = "factor"), civ_dup = structure(c(2L, 1L, 1L, 3L, 2L,
1L, 2L, 2L, 2L, 1L, 2L, NA, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 3L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 3L, 2L, 1L), .Label = c("single", "Partnerschaft",
"keine Angabe"), class = "factor"), kids = structure(c(2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, NA, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Nein",
"Ja"), class = "factor"), known = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, NA, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L), .Label = c("Nein", "Ja"
), class = "factor"), device = structure(c(1L, 2L, 1L, 2L, 2L,
1L, 3L, 1L, 2L, 2L, 1L, NA, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L), .Label = c("Smartphone / Tablet iOS (iPhone/iPad)",
"Smartphone / Tablet (Android)", "Computer / Laptop"), class = "factor")), row.names = c(NA,
-30L), class = c("tbl_df", "tbl", "data.frame"))
And this is the code for my regression that I then use for the interaction (graph):
Model <- lm(dv ~ gend * group + ISFregscores + age_dup + edu + empl + civ_dup + kids + known + device, data=)

You've got the right approach to change the font but you also have to make sure the font is actually available to the graphics device. This step can be tricky; I use the showtext package which makes this a bit easier.
To change the color palette, specify the color scale (rather than the fill scale).
library("showtext")
#> Loading required package: sysfonts
#> Loading required package: showtextdb
library("emmeans")
library("tidyverse")
showtext_auto()
# 30 data points are too few to fit the original model, so I drop `device`
model <- lm(
dv ~ gend * group + ISFregscores + age_dup + edu + empl + civ_dup + kids + known,
data = data
)
p <- emmip(
model, group ~ gend,
CIs = TRUE,
nuisance = c("known", "age_dup", "edu"),
xlab = "",
ylab = "Intention to use the platform"
)
p +
scale_color_brewer(
palette = "Blues"
) +
guides(
color = guide_legend(title = "New Legend Title")
) +
theme(
text = element_text(family = "serif", face = "bold.italic", size = 16)
)
Created on 2023-01-11 with reprex v2.0.2

How to get geom_step to play nicely with discrete factors on y-axis?

I'd like to produce a step plot using ggplot2. I have datetime along the x-axis, and four discrete non-numeric factor levels on the y-axis. I'd like a line with steps between the levels where appropriate, but right now the code i've produced in ggplot2 isn't working. Any advice greatly appreciated!
So far I have the following code:
ggplot(s2, aes(x = datetime, y = activity)) + geom_step(colour = "blue")
which produces:
However, what I'd like is something that looks like this (I switched to geom_point, which works fine, and added the step lines by hand):
Here's a subset of my data, as a data.frame with with datetime (POSIXct) and activity (factor):
s2 <- structure(list(`datetime` = structure(c(1496102400L,
1496109600L, 1496116800L, 1496124000L, 1496131200L, 1496138400L,
1496145600L, 1496152800L, 1496160000L, 1496167200L, 1496174400L,
1496181600L, 1496188800L, 1496196000L, 1496203200L, 1496210400L,
1496217600L, 1496224800L, 1496232000L, 1496239200L, 1496246400L,
1496253600L, 1496260800L, 1496268000L, 1496275200L, 1496282400L,
1496289600L, 1496296800L, 1496304000L, 1496311200L, 1496318400L,
1496325600L, 1496332800L, 1496340000L, 1496347200L, 1496354400L,
1496361600L, 1496368800L, 1496376000L, 1496383200L, 1496390400L,
1496397600L, 1496404800L, 1496412000L, 1496419200L, 1496426400L,
1496433600L, 1496440800L, 1496448000L, 1496455200L), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), activity = structure(c(1L, 2L, 3L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("HO",
"TR", "FO", "SE"), class = "factor")), .Names = c("datetime",
"activity"), row.names = c(NA, -50L), class = "data.frame")

Make a table in R that resembles facet_grid in structure

I would like to make a "nested" sort of table in R that mirrors the formatting of a plot I can make with ggplot using facet_wrap.
Here are some data and the code:
tabledata = structure(list(row = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,1L, 2L, 1L, 2L, 1L, 2L),
col = c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L),
grp1 = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L),
.Label = c("a", "b"), class = "factor"),
grp2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("g", "h"), class = "factor"),
value = c(9L, 9L, 14L, 8L, 10L, 9L, 8L, 15L, 2L, 1L, 3L, 4L, 1L, 5L, 2L, 4L)),
.Names = c("row", "col", "grp1", "grp2", "value"), class = "data.frame",
row.names = c(NA, -16L))
ggplot(tabledata, aes(grp2, value, shape = grp1)) + geom_jitter() + facet_grid(row ~ col)
Which produce this plot:
Here is the table I would like to make (which can easily be done with a pivot table, but obviously that is not ideal):

A nested table can be made using the tabular() function in the tables package using the following code.
tabular(
(Heading()*Factor(row)*Heading()*grp1)~
(Heading()*Factor(col)*Heading()*grp2)*Heading()*value*Heading()*identity,
data = tabledata)
The table can then be saved as a .csv file using write.csv.tabular().

Tidyverse just added a table package that has the nested format built in. It's called "gt" (great tables) https://blog.rstudio.com/2020/04/08/great-looking-tables-gt-0-2/

ggplot add percentage labels based on x-axis variables

I've a ggplot that shows the counts of tweets for some brands as well as a label for the overall percentage. This was done with much help from this link: Show % instead of counts in charts of categorical variables
# plot ggplot of brands
ggplot(data = test, aes(x = brand, fill = brand))
+ geom_bar()
+ stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom = 'text', vjust = -0.3)
Next, I would like to plot it based on brand and sentiment, with the labels for the bars of each brand totalling up to 100%. However, I have difficulty amending my code to do this. Would you be able to help please? Also, would it be possible to change the colours for neu to blue and pos to green?
# plot ggplot of brands and sentiment
ggplot(data = test, aes(x = brand, fill = factor(sentiment)))
+ geom_bar(position = 'dodge')
+ stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom = 'text', position = position_dodge(width = 0.9), vjust=-0.3)
Here's a dput of 100 rows of my data's brand and sentiment column
structure(list(brand = structure(c(3L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 3L, 4L, 4L, 1L, 2L, 1L, 2L, 1L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 5L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 4L, 5L, 5L, 1L, 1L, 2L, 3L, 1L, 1L, 4L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 4L, 1L, 1L), .Label = c("apple",
"samsung", "sony", "bb", "htc", "nokia", "huawei"), class = "factor"),
sentiment = structure(c(2L, 1L, 3L, 1L, 2L, 3L, 1L, 1L, 3L,
1L, 1L, 2L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 1L, 3L, 3L, 3L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
3L, 1L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 2L, 3L, 3L, 1L, 3L, 2L,
1L, 3L, 1L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
3L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 2L, 1L, 1L, 1L, 1L,
3L), .Label = c("neg", "pos", "neu"), class = "factor")), .Names = c("brand",
"sentiment"), class = c("data.table", "data.frame"), row.names = c(NA,
-100L), .internal.selfref = <pointer: 0x0000000003070788>)

Posting a hack far far far from the ggplot2 idiomatic way to do this, so if someone posts a more ggplot2 way to do this, you should accept the idiomatic method.
So basically I'm creating a dummy data set which will include all the information you've calculated using ..count../sum(..count..)*100 and plotting it on top of your bar plot using geom_text
temp <- as.data.frame(table(test$brand, test$sentiment))
temp <- merge(temp, as.data.frame(table(test$brand)), by = "Var1", all.x = T)
names(temp) <- c("brand", "sentiment", "Freq", "Count")
library(ggplot2)
ggplot(data = test, aes(x = brand, fill = factor(sentiment))) +
geom_bar(position = 'dodge') +
geom_text(data = temp, aes(x = brand, y = Freq, label = sprintf("%.02f %%", Freq/Count*100)), position = position_dodge(width = 0.9), vjust=-0.3)
This is not exactly same as your plot because you only provided a subset of your data

To choose the colors you would like for sentiment, make use of
scale_fill_manual(value = [and choose your colors by RGB, name, etc.]
You will have to experiment but the three factors will be in alphabetical order (unless you change that) so the colors you pick for the scale will match that order: neg, neu, pos could be "grey", "blue", "green"

Bar chart with several non mutually exclusive characteristics

I have a data set with a variable that has several other characteristics, which are not mutually exclusive. Here's the data.
df <- structure(list(cont1 = structure(c(2L, 2L, 4L, 1L, 2L, 3L, 2L, 4L, 4L, 1L, 2L, 2L, 4L, 1L, 1L, 2L, 2L), .Label = c("Africa", "Asia", "Europe", "LAC"), class = "factor"), SIDS = structure(c(2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("No", "SIDS"), class = "factor"), LDC = structure(c(2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("LDC", "No"), class = "factor")), .Names = c("cont1",
"SIDS", "LDC"), class = "data.frame", row.names = c(NA, -17L))
So when I put it into long format df.m <- melt(df, id.vars = c("cont1")) I can build the plot with ggplot2 but get all the NAs in the plot. If I exclude them the proportions are distorted because there are more NAs in one of the categories.
ggplot(df.m, aes(x = cont1, fill = value)) + geom_bar()
ggplot(df.m[df.m$value != "No",], aes(x = cont1, fill = value)) + geom_bar()
Is there a way to have a bar plot of the variable cont1 with the value as a fill without the NAs distorting the proportion? That is can I use a different length for the fill in ggplot2?

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

show 3 factors ggplot geom - r

Related

Modify color and font of emmip plot (emmeans package) in R

How to get geom_step to play nicely with discrete factors on y-axis?

Make a table in R that resembles facet_grid in structure

ggplot add percentage labels based on x-axis variables

Bar chart with several non mutually exclusive characteristics

Categories

Resources