Restrict stat_ellipse to certain data points - r

I have a dataset with four categories of vowel, akin to the following:
speaker vowel_category f1 f2
1 a x x
1 b x x
1 c x x
1 d x x
2 a x x
2 b x x...
This geom_point code plots them all one one graph with stat_ellipse and is 90% what I need:
ggplot(data = topicsubset_ikf, aes(x = F2, y = F1, shape = CATEGORY)) +
geom_point() +
scale_y_reverse() +
scale_x_reverse() +
xlab("F2") +
ylab("F1") +
labs(title = "All speakers with KIT and FLEECE tokens") +
coord_cartesian(xlim = c(1.9, 1.1), ylim = c(0.3, 1.5)) +
facet_wrap(~ SPEAKER) +
scale_color_manual(values = c("#000000", "#FF8F00", "#000000", "#A200FF")) +
stat_ellipse(geom = "polygon", alpha = 1 / 2, aes(fill = CATEGORY))
However, it would be ideal if I could draw ellipses round just two of the four categories (say, a and b), rather than all 4, so I can look at the spread of c & d relative to a and b. I haven't been able to find a way so far - I've tried combining multiple datasets on one graph to no avail. Any suggestions?

I had the same problem, I found that now you can just specify the group in stat_ellipse like this:
stat_ellipse(geom = "polygon", alpha = 1 / 2, aes(fill = CATEGORY, group = CATEGORY))

Related

geom_line() with x as factor and a grouping variable for color

I have the following reproducible data :
d <- data.frame(ATB = rep(c("ATB1", "ATB2"), each = 4),
status = rep(rep(c("S", "R"), each = 2), 2),
season = rep(c("Winter", "summer"), 4),
n = c(239,284,113,120,229,269,127,140)
)
I am trying to draw points for the count n for each line by season, the color being the variable ATB, and to link each point according to the ATB and to the status from one season value to another (ATB1 S winter linked to ATB1 S summer). Here is the plot I am trying to get:
Until now I managed to draw the points but not the lines.
ggplot(d, aes(x=season, y = n)) +
geom_point(aes(color = ATB)) +
geom_line(aes(color = ATB, linetype = status))
I tried group = 1 in each aes, but it didn't work.
Is there a way to obtain the plot ?
You need to group by the interaction of ATB and status, otherwise you are not correctly telling ggplot which points to connect:
ggplot(d, aes(x=season, y = n)) +
geom_point(aes(color = ATB)) +
geom_line(aes(color = ATB, linetype = status,
group = interaction(ATB, status))) +
scale_linetype_manual(values = c(2, 1))

How to overlay scatterplots in ggplot when one plot has colors defined in its dataframe?

I am trying to overlay two scatter plots. Here is the base code:
ggplot() + geom_point(data = df, aes(A, B, color = Cluster), shape=1) +
geom_point(data = as.data.frame(centers), aes(A, B), shape=13, size=7, alpha = 5)
This is what the plot looks like:
But when I attempt to add a color to the overlaid cluster centers (those circles with X inside):
ggplot() + geom_point(data = df, aes(A, B, color = Cluster), shape=1) +
geom_point(data = as.data.frame(centers), aes(A, B, color = "red"), shape=13, size=7, alpha = 5)
I get the following error: "Error: Discrete value supplied to continuous scale"
Here is a portion of the dataframe I am using to plot the first of two overlays:
> df
A B Cluster
1 1.33300195 -1.4524680585 2
2 1.41102294 -0.7889431279 2
3 1.36350553 -1.4437548005 2
4 1.61462300 -0.7145174514 2
5 -0.64722704 0.8449845639 1
6 1.33855918 -0.9161504530 2
7 1.33467865 -2.1513899524 2
8 1.50842550 -0.5170262065 2
9 1.67045671 -0.3644476090 2
10 1.32328373 -1.5496692059 2
My theory is that ggplot is interpreting the "Cluster" column of that dataframe as a continuous variable. Is there a way to change it so its discrete? Should I instead use a column of colors as factors? For example: 1 becomes "Blue", 2 becomes "Black"?
This should work. No data for centers so can not add that to the plot. You are right in the fact that the continuous variable is messing the plot. Instead set it as factor() and use scale_color_manual() to change the colors. Here the code:
library(ggplot2)
#Code
ggplot() + geom_point(data = df, aes(A, B, color = factor(Cluster),
fill = factor(Cluster))) +
geom_point(data = as.data.frame(centers), aes(A, B, color = "red"),
shape=13, size=7, alpha = 5)+
scale_color_manual(values=c('blue','black'))+labs(color='Cluster',fill='Cluster')
Output:
Or keeping the original shape:
#Code 2
ggplot() + geom_point(data = df, aes(A, B, color = factor(Cluster)),shape=1) +
geom_point(data = as.data.frame(centers), aes(A, B, color = "red"),
shape=13, size=7, alpha = 5)+
scale_color_manual(values=c('blue','black'))+labs(color='Cluster')
Output:

How to create two barplots with different x and y axis in tha same plot in R?

I need plot two grouped barcodes with two dataframes that has distinct number of rows: 6, 5.
I tried many codes in R but I don't know how to fix it
Here are my data frames: The Freq colum must be in Y axis and the inter and intra columns must be the x axis.
> freqinter
inter Freq
1 0.293040975264367 17
2 0.296736775990729 2
3 0.297619926364764 4
4 0.587377012109561 1
5 0.595245125315916 4
6 0.597022018595893 2
> freqintra
intra Freq
1 0 3
2 0.293040975264367 15
3 0.597022018595893 4
4 0.598809552335782 2
5 0.898227748764939 6
I expect to plot the barplots in the same plot and could differ inter e intra values by colour
I want a picture like this one:
You probably want a histogram. Use the raw data if possible. For example:
library(tidyverse)
freqinter <- data.frame(x = c(
0.293040975264367,
0.296736775990729,
0.297619926364764,
0.587377012109561,
0.595245125315916,
0.597022018595893), Freq = c(17,2,4,1,4,2))
freqintra <- data.frame(x = c(
0 ,
0.293040975264367,
0.597022018595893,
0.598809552335782,
0.898227748764939), Freq = c(3,15,4,2,6))
df <- bind_rows(freqinter, freqintra, .id = "id") %>%
uncount(Freq)
ggplot(df, aes(x, fill = id)) +
geom_histogram(binwidth = 0.1, position = 'dodge', col = 1) +
scale_fill_grey() +
theme_minimal()
With the data you posted I don't think you can have this graph to look good. You can't have bars thin enough to differentiate 0.293 and 0.296 when your data ranges from 0 to 0.9.
Maybe you could try to treat it as a factor just to illustrate what you want to do:
freqinter <- data.frame(x = c(
0.293040975264367,
0.296736775990729,
0.297619926364764,
0.587377012109561,
0.595245125315916,
0.597022018595893), Freq = c(17,2,4,1,4,2))
freqintra <- data.frame(x = c(
0 ,
0.293040975264367,
0.597022018595893,
0.598809552335782,
0.898227748764939), Freq = c(3,15,4,2,6))
df <- bind_rows(freqinter, freqintra, .id = "id")
ggplot(df, aes(x = as.factor(x), y = Freq, fill = id)) +
geom_bar(stat = "identity", position = position_dodge2(preserve = "single")) +
theme(axis.text.x = element_text(angle = 90)) +
scale_fill_discrete(labels = c("inter", "intra"))
You can also check the problem by not treating your x variable as a factor:
ggplot(df, aes(x = x, y = Freq, fill = id)) +
geom_bar(stat = "identity", width = 0.05, position = "dodge") +
theme(axis.text.x = element_text(angle = 90)) +
scale_fill_discrete(labels = c("inter", "intra"))
Either the bars must be very thin (small width), or you'll get overlapping x intervals breaking the plot.

Implementing paired lines into boxplot.ggplot2

I have a set of paired data, and I'm using ggplot2.boxplot (of the easyGgplot2 package) with added (jittered) individual data points:
ggplot2.boxplot(data=INdata,xName='condition',yName='vicarious_pain',groupName='condition',showLegend=FALSE,
position="dodge",
addDot=TRUE,dotSize=3,dotPosition=c("jitter", "jitter"),jitter=0.2,
ylim=c(0,100),
backgroundColor="white",xtitle="",ytitle="Pain intenstity",mainTitle="Pain intensity",
brewerPalette="Paired")
INdata:
ID,condition,pain
1,Treatment,4.5
3,Treatment,12.5
4,Treatment,16
5,Treatment,61.75
6,Treatment,23.25
7,Treatment,5.75
8,Treatment,5.75
9,Treatment,5.75
10,Treatment,44.5
11,Treatment,7.25
12,Treatment,40.75
13,Treatment,17.25
14,Treatment,2.75
15,Treatment,15.5
16,Treatment,15
17,Treatment,25.75
18,Treatment,17
19,Treatment,26.5
20,Treatment,27
21,Treatment,37.75
22,Treatment,26.5
23,Treatment,15.5
25,Treatment,1.25
26,Treatment,5.75
27,Treatment,25
29,Treatment,7.5
1,No Treatment,34.5
3,No Treatment,46.5
4,No Treatment,34.5
5,No Treatment,34
6,No Treatment,65
7,No Treatment,35.5
8,No Treatment,48.5
9,No Treatment,35.5
10,No Treatment,54.5
11,No Treatment,7
12,No Treatment,39.5
13,No Treatment,23
14,No Treatment,11
15,No Treatment,34
16,No Treatment,15
17,No Treatment,43.5
18,No Treatment,39.5
19,No Treatment,73.5
20,No Treatment,28
21,No Treatment,12
22,No Treatment,30.5
23,No Treatment,33.5
25,No Treatment,20.5
26,No Treatment,14
27,No Treatment,49.5
29,No Treatment,7
The resulting plot looks like this:
However, since this is paired data, I want to represent this in the plot - specifically to add lines between paired datapoints. I've tried adding
... + geom_line(aes(group = ID))
..but I am not able to implement this into the ggplot2.boxplot code. Instead, I get this error:
Error in if (addMean) p <- p + stat_summary(fun.y = mean, geom = "point", :
argument is not interpretable as logical
In addition: Warning message:
In if (addMean) p <- p + stat_summary(fun.y = mean, geom = "point", :
the condition has length > 1 and only the first element will be used
Grateful for any input on this!
I do not know the package from which ggplot2.boxplot comes from but I will show you how perform the requested operation in ggplot.
The requested output is a bit problematic for ggplot since you want both points and lines connecting them to be jittered by the same amount. One way to perform that is to jitter the points prior making the plot. But the x axis is discrete, here is a workaround:
b <- runif(nrow(df), -0.1, 0.1)
ggplot(df) +
geom_boxplot(aes(x = as.numeric(condition), y = pain, group = condition))+
geom_point(aes(x = as.numeric(condition) + b, y = pain)) +
geom_line(aes(x = as.numeric(condition) + b, y = pain, group = ID)) +
scale_x_continuous(breaks = c(1,2), labels = c("No Treatment", "Treatment"))+
xlab("condition")
First I have made a vector to jitter by called b, and converted the x axis to numeric so I could add b to the x axis coordinates. Latter I relabeled the x axis.
I do agree with eipi10's comment that the plot works better without jitter:
ggplot(df, aes(condition, pain)) +
geom_boxplot(width=0.3, size=1.5, fatten=1.5, colour="grey70") +
geom_point(colour="red", size=2, alpha=0.5) +
geom_line(aes(group=ID), colour="red", linetype="11") +
theme_classic()
and the updated plot with jittered points eipi10 style:
ggplot(df) +
geom_boxplot(aes(x = as.numeric(condition),
y = pain,
group = condition),
width=0.3,
size=1.5,
fatten=1.5,
colour="grey70")+
geom_point(aes(x = as.numeric(condition) + b,
y = pain),
colour="red",
size=2,
alpha=0.5) +
geom_line(aes(x = as.numeric(condition) + b,
y = pain,
group = ID),
colour="red",
linetype="11") +
scale_x_continuous(breaks = c(1,2),
labels = c("No Treatment", "Treatment"),
expand = c(0.2,0.2))+
xlab("condition") +
theme_classic()
Although I like the oldschool way of plotting with ggplot as shown by #missuse's answer, I wanted to check whether using your ggplot2.boxplot-based code this was also possible.
I loaded your data:
'data.frame': 52 obs. of 3 variables:
$ ID : int 1 3 4 5 6 7 8 9 10 11 ...
$ condition: Factor w/ 2 levels "No Treatment",..: 2 2 2 2 2 2 2 2 2 2 ...
$ pain : num 4.5 12.5 16 61.8 23.2 ...
And called your code, adding geom_line at the end as you suggested your self:
ggplot2.boxplot(data = INdata,xName = 'condition', yName = 'pain', groupName = 'condition',showLegend = FALSE,
position = "dodge",
addDot = TRUE, dotSize = 3, dotPosition = c("jitter", "jitter"), jitter = 0,
ylim = c(0,100),
backgroundColor = "white",xtitle = "",ytitle = "Pain intenstity", mainTitle = "Pain intensity",
brewerPalette = "Paired") + geom_line(aes(group = ID))
Note that I set jitter to 0. The resulting graph looks like this:
If you don't set jitter to 0, the lines still run from the middle of each boxplot, ignoring the horizontal location of the dots.
Not sure why your call gives an error. I thought it might be a factor issue, but I see that my ID variable is not factor class.
I implemented missuse's jitter solution into the ggplot2.boxplot approach in order to align the dots and lines. Instead of using "addDot", I had to instead add dots using geom_point (and lines using geom_line) after, so I could apply the same jitter vector to both dots and lines.
b <- runif(nrow(df), -0.2, 0.2)
ggplot2.boxplot(data=df,xName='condition',yName='pain',groupName='condition',showLegend=FALSE,
ylim=c(0,100),
backgroundColor="white",xtitle="",ytitle="Pain intenstity",mainTitle="Pain intensity",
brewerPalette="Paired") +
geom_point(aes(x=as.numeric(condition) + b, y=pain),colour="black",size=3, alpha=0.7) +
geom_line(aes(x=as.numeric(condition) + b, y=pain, group=ID), colour="grey30", linetype="11", alpha=0.7)

ggplot2 plot two data sets into one picture

this must be a FAQ, but I can't find an exactly similar example in the other answers (feel free to close this if you can point a similar Q&A). I'm still a newbie with ggplot2 and can't seem to wrap my head around it quite so easily.
I have 2 data.frames (that come from separate mixed models) and I'm trying to plot them both into the same graph. The data.frames are:
newdat
id Type pred SE
1 1 15.11285 0.6966029
2 1 13.68750 0.9756909
3 1 13.87565 0.6140860
4 1 14.61304 0.6187750
5 1 16.33315 0.6140860
6 1 16.19740 0.6140860
1 2 14.88805 0.6966029
2 2 13.46270 0.9756909
3 2 13.65085 0.6140860
4 2 14.38824 0.6187750
5 2 16.10835 0.6140860
6 2 15.97260 0.6140860
and
newdat2
id pred SE
1 14.98300 0.6960460
2 13.25893 0.9872502
3 13.67650 0.6150701
4 14.39590 0.6178266
5 16.37662 0.6171588
6 16.08426 0.6152017
As you can see, the second data.frame doesn't have Type, whereas the first does, and therefore has 2 values for each id.
What I can do with ggplot, is plot either one, like this:
fig1
fig2
As you can see, in fig 1 ids are stacked by Type on the x-axis to form two groups of 6 ids. However, in fig 2 there is no Type, but instead just the 6 ids.
What I would like to accomplish is to plot fig2 to the left/right of fig1 with similar grouping. So the resulting plot would look like fig 1 but with 3 groups of 6 ids.
The problem is also, that I need to label and organize the resulting figure so that for newdat the x-axis would include a label for "model1" and for newdat2 a label for "model2", or some similar indicator that they are from different models. And to make things even worse, I need some labels for Type in newdat.
My (hopefully) reproducible (but obviously very bad) code for fig 1:
library(ggplot2)
pd <- position_dodge(width=0.6)
ggplot(newdat,aes(x=Type,y=newdat$pred,colour=id))+
geom_point(position=pd, size=5)
geom_linerange(aes(ymin=newdat$pred-1.96*SE,ymax=newdat$pred+1.96*SE), position=pd, size=1.5, linetype=1) +
theme_bw() +
scale_colour_grey(start = 0, end = .8, name="id") +
coord_cartesian(ylim=c(11, 18)) +
scale_y_continuous(breaks=seq(10, 20, 1)) +
scale_x_discrete(name="Type", limits=c("1","2"))
Code for fig 2 is identical, but without the limits in the last line and with id defined for x-axis in ggplot(aes())
As I understand it, defining stuff at ggplot() makes that stuff "standard" along the whole graph, and I've tried to remove the common stuff and separately define geom_point and geom_linerange for both newdat and newdat2, but no luck so far... Any help is much appreciated, as I'm completely stuck.
How about adding first adding some new variables to each dataset and then combining them:
newdat$model <- "model1"
newdat2$model <- "model2"
newdat2$Type <- 3
df <- rbind(newdat, newdat2)
# head(df)
Then we can plot with:
library(ggplot2)
ggplot(df, aes(x = interaction(model, factor(Type)), y = pred, color = factor(id))) +
geom_point(position = position_dodge(width = 0.6), size = 5) +
geom_linerange(aes(ymin = pred - 1.96 * SE, ymax = pred + 1.96 * SE),
position = position_dodge(width = 0.6),
size = 1.5, linetype = 1)
Alternatively, you pass an additional aesthetic to geom_linerange to further delineate the model type:
ggplot(df, aes(x = interaction(model, factor(Type)), y = pred, color = factor(id))) +
geom_point(position = position_dodge(width = 0.6), size = 5) +
geom_linerange(aes(ymin = pred - 1.96 * SE, ymax = pred + 1.96 * SE, linetype = model),
position = position_dodge(width = 0.6),
size = 1.5)
Finally, you may want to considered facets:
ggplot(df, aes(x = interaction(model, factor(Type)), y = pred, color = factor(id))) +
geom_point(position = position_dodge(width = 0.6), size = 5) +
geom_linerange(aes(ymin = pred - 1.96 * SE, ymax = pred + 1.96 * SE),
position = position_dodge(width = 0.6),
size = 1.5) +
facet_wrap(~ id)

Resources