I am trying to produce some charts of the dummy data at the bottom of this message and have a few questions.
Would it be recommended to generate a new dataframe with summary stats so that the Year column becomes unique and the second column provides the total count or can I work with the data as is?
Related to this, if I do want to create a new dataframe, what is the best way to make it so that it has: Year, TotalCount, Counts per Term, Counts per Society?
My dummyyearcount dataframe has been created using:
dummyyearcount <- count(dummydata, 'Year')
Is there a way to do multiple counts within the one line of code? If so, how?
Regarding the plots, I am looking to plot a cumulative line plot, however when running the code below, it is looking for a y axis value. Is there are a way to make it do a count of the number of publications within that year rather and then split it out by society or term as opposed to me having to output a summary table and feeding in the Total Count as the y-axis?
The code below is what I have for the line plot, which complains with:
"Error: geom_line requires the following missing aesthetics: y"
Also, how can I make this cumulative so in years of no publications it will just flat line?
ggplot() + aes(dummydata$Year, group=dummydata$Term, color=dummydata$Term) + geom_line(show.legend = TRUE) +
theme(axis.ticks=element_line(colour = 'black'), panel.background = element_rect('white'),
panel.grid.major = element_line(colour = 'gray85'), panel.border = element_rect(colour = 'black', fill = FALSE)) +
scale_y_continuous(expand = c(0,0), limits = c(0,5)) + scale_x_continuous(expand = c(0,0))
Output from dput():
structure(list(Year = c(2017L, 2011L, 2012L, 2010L, 2011L, 2015L,
2011L, 2011L, 2012L, 1994L, 2005L, 2009L, 1976L, 2007L, 2014L,
2013L, 2007L), Title = structure(1:17, .Label = c("Title of paper A",
"Title of paper B", "Title of paper C", "Title of paper D", "Title of paper E",
"Title of paper F", "Title of paper G", "Title of paper H", "Title of paper I",
"Title of paper J", "Title of paper K", "Title of paper L", "Title of paper M",
"Title of paper N", "Title of paper O", "Title of paper P", "Title of paper Q"
), class = "factor"), Authors = structure(c(1L, 1L, 2L, 1L, 3L,
4L, 7L, 1L, 8L, 5L, 4L, 6L, 10L, 10L, 9L, 4L, 2L), .Label = c("Bloggs",
"Jones", "Jones and Bloggs", "Smith", "Smith and Jones", "Smith, Jones and Wilson",
"White", "White and Bloggs", "Wilson", "Wilson and Jones"), class = "factor"),
Society = structure(c(4L, 4L, 1L, 1L, 4L, 4L, 2L, 3L, 4L,
1L, 1L, 4L, 4L, 2L, 4L, 4L, 4L), .Label = c("ABC", "MNO",
"N", "XYZ"), class = "factor"), Term = structure(c(1L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L
), .Label = c("A", "B"), class = "factor")), .Names = c("Year",
"Title", "Authors", "Society", "Term"), class = "data.frame", row.names = c(NA,
-17L))
An example plot of the look I am eventually wanting to achieve:
I am still very new to R so any help would be appreciated.
I like doing it like this using data.table package because it is quite tractable to me (but this is not the only way):
require(data.table)
# Turn data.frame into a data.table with term and year as group identifiers
setDT(dummydata ,key = c("Term","Year"))
# Get number of records in each group
dummydata[ , N := .N , by = .(Year,Term) ]
# Plot
ggplot( dummydata , aes( x = Year , y = cumsum(N) , colour = Term ) ) +
geom_line()
Using count function from plyr package to Count the number of occurrences.
#dummy data
df <- data.frame(Year = sample(1984:2014, 200, replace = TRUE), Title = sample(c("Paper A","Paper B","Paper C","Paper D","Paper E","Paper F","Paper G"), 200, replace = TRUE),Authors = sample(c("Stuart","Jerry","Kevin","Phil","Gru","Nefario","Phil","Josh"),200,replace = TRUE), Society = sample(c("lab1","lab2","lab3","lab4","lab5"),200,replace = TRUE),Term = sample(c("1st","2nd","3rd","4th"),200,replace = TRUE))
#grouping data based on society and year
library(plyr)
df.1 <- count(df, vars = c("Society","Year"))
#plotting the respective line plot
library(ggplot2)
p <- ggplot(df.1,aes(x = Year, y = freq, color = Society, group = Society)) + geom_line() + geom_point() + scale_x_continuous(breaks = df.1$Year)
p
Output Plot :
Additionally, if you want to add Term factor also in graph :
df.2 <- count(df, vars = c("Society","Year","Term"))
p2 <- ggplot(df.2,aes(x = Year, y = freq, color = Society, group = Society, shape = Term)) + geom_line() + geom_point(aes(size = Term)) + scale_x_continuous(breaks = df.2$Year)
p2
Related
I need to space the dates according to the days between sampling. Between some sampling there is 5 days and some 4 days.
data looks like this (also need to add to the labels BBCH):
structure(list(Time = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L,
4L, 5L, 5L), .Label = c("06.05.2016 BBCH 50–51", "09.05.2016 BBCH 51–53",
"13.05.2016 BBCH 55–59", "16.05.2016 BBCH 59–61", "20.05.2016 BBCH 61–64"
), class = "factor"), Mean1 = c(0.9133333, 0.4366667, 0.313333,
0.176, 0.4, 0.1533333, 0.2066667, 0.29, 0.4633333, 0.4833333),
sd = c(2.704973, 1.639598, 0.8780997, 0.5158375, 1.1213943,
0.5203121, 0.5461531, 0.6587969, 0.823153, 0.9965101), n = c(300L,
300L, 300L, 250L, 300L, 300L, 300L, 300L, 300L, 300L), Mean2 = c(0.15617168,
0.09466226, 0.05069711, 0.03262443, 0.06474373, 0.03004023,
0.03153216, 0.03803566, 0.04752476, 0.05753354), SNH = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("OC", "OF"
), class = "factor"), Round = structure(c(1L, 1L, 2L, 2L,
3L, 3L, 4L, 4L, 5L, 5L), .Label = c("Round 1", "Round 2",
"Round 3", "Round 4", "Round 5"), class = "factor")), class = "data.frame", row.names = c(NA,
-10L))
and my script:
Pan_16<-qplot(x= Time,
y= Mean1,
group= SNH,
data = Plant) +
geom_errorbar(aes(ymin = Mean1- Mean2,
ymax = Mean1 + Mean2),
width=0.2, size=1)+
coord_cartesian(xlim=c(), ylim=c(0,2))+
geom_line(size=1,aes(linetype = SNH)) +
scale_x_discrete(labels=function(x){sub("\\s", "\n", x)})+
scale_color_manual("Field type", values=c("#gray20", "#gray46"))+
labs(title = "", x = "", y = "")+
annotate("text", x = 1 , y = 1.3, label = c("* * * "), color="black", size=5 , fontface="bold")+
annotate("text", x = 2 , y = 0.8, label = c(" * * ") , color="black", size=5 , fontface="bold")+
annotate("text", x = 3 , y = 0.8, label = c("* * * "), color="black", size=5 , fontface="bold")+
theme(axis.line = element_line(size = 1, colour = "grey80"))+
theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text = element_text(colour = "black"))+
theme(
plot.background = element_rect(fill = "white"),
panel.background = element_rect(fill = "white", colour="white"))
Sisi, to get you going ... also check that your Time variable is a factor. Always check the data type, if you do not get expected results or errors.
The praise goes to #Rui who basically gave you the answer.
I stripped off the superfluous stuff from your plot to help you see the major building blocks. You can add these layers for your desired plot/end result.
library(dplyr)
df <- structure(list(Time = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L,
4L, 5L, 5L), .Label = c("06.05.2016 BBCH 50–51", "09.05.2016 BBCH 51–53",
"13.05.2016 BBCH 55–59", "16.05.2016 BBCH 59–61", "20.05.2016 BBCH 61–64"
), class = "factor"), Mean1 = c(0.9133333, 0.4366667, 0.313333,
0.176, 0.4, 0.1533333, 0.2066667, 0.29, 0.4633333, 0.4833333),
sd = c(2.704973, 1.639598, 0.8780997, 0.5158375, 1.1213943,
0.5203121, 0.5461531, 0.6587969, 0.823153, 0.9965101), n = c(300L,
300L, 300L, 250L, 300L, 300L, 300L, 300L, 300L, 300L), Mean2 = c(0.15617168,
0.09466226, 0.05069711, 0.03262443, 0.06474373, 0.03004023,
0.03153216, 0.03803566, 0.04752476, 0.05753354), SNH = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("OC", "OF"
), class = "factor"), Round = structure(c(1L, 1L, 2L, 2L,
3L, 3L, 4L, 4L, 5L, 5L), .Label = c("Round 1", "Round 2",
"Round 3", "Round 4", "Round 5"), class = "factor")), class = "data.frame", row.names = c(NA,
-10L))
# ---------- coerce Time to character
df <- df %>% mutate(Time = as.character(Time))
# ---------- now make a Date column
df$Date <- as.Date(df$Time, "%d.%m.%Y")
# with the given data frame plot and set time axis
qplot(x= Date, y= Mean1, group= SNH, data = df) +
geom_errorbar(aes(ymin = Mean1- Mean2,
ymax = Mean1 + Mean2),
width=0.2, size=1) +
# ------------- set a date scale and "configure" to your liking
scale_x_date( date_labels = "%d %b" # show day and month
, date_breaks = "2 days" # have a major break every 2 days
,date_minor_breaks = "1 day" # show minor breaks in between
)
Amendment to show-case setting of user-defined axis breaks
Scales support the setting of breaks. This allows to provide a vector of values or inject a function returning the desired breaks.
Below we replace the (regular) and preconfigured break setting of date_breaks by supplying a breaks statement.
# ---------- coerce Time to character
df <- df %>% mutate(Time = as.character(Time))
# ---------- now make a Date column
df$Date <- as.Date(df$Time, "%d.%m.%Y")
# with the given data frame plot and set time axis
qplot(x= Date, y= Mean1, group= SNH, data = df) +
geom_errorbar(aes(ymin = Mean1- Mean2,
ymax = Mean1 + Mean2),
width=0.2, size=1) +
# ------------- set a date scale and "configure" to your liking
scale_x_date( breaks = unique(df$Date) # setting user defined breaks
,minor_breaks = "1 day" # keep minor breaks evenly spaced
,date_labels = "%d %b" # show day and month
This yields:
I am trying to plot a line chart using Date-time and no of tweets at that period of date and time in R.
library(ggplot2)
df1 <- structure(list(Date = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Label = c("2020-03-12",
"2020-03-13"), class = "factor"), Time = structure(c(1L, 1L, 2L,
3L, 4L, 5L), .Label = c("00:00:00Z", "00:00:01Z", "00:10:04Z",
"00:25:12Z", "01:00:02Z"), class = "factor"), Text = structure(c(5L,
3L, 6L, 4L, 2L, 1L), .Label = c("The images of demonstrations and gathering", "Premium policy get activate by company abc",
"Launches of rocket", "Premium policy get activate by company abc",
"Technology makes trend", "The images of demonstrations and gatherings",
"Weather forecasting by xyz"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L))
ggplot(df1, aes(x = Date, y = text(count)) + geom_line(aes(color = variable), size = 1)
I tried the above code to plot desired result but got an error. Dataset given like that in csv format.
Date Time Text
2020-03-12 00:00:00Z The images of demonstrations and gatherings
2020-03-12 00:00:00Z Premium policy get activate by company abc
2020-03-12 00:00:01Z Weather forecasting by xyz
2020-03-12 00:10:04Z Technology makes trend
2020-03-12 00:25:12Z Launches of rocket
2020-03-12 01:00:02Z Government launch new policy to different sector improvement
I have a dataset of nearly 15 days and want to plot the line chart to visualize the number of tweets (given in text column) to see the trend of tweets on different time and date.
df1 <- structure(list(Date = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Label = c("3/12/2020",
"3/13/2020"), class = "factor"), Time = structure(c(1L, 1L, 2L,
3L, 4L, 5L), .Label = c("00:00:00Z", "00:00:01Z", "00:10:04Z",
"00:25:12Z", "01:00:02Z"), class = "factor"), Text = structure(c(5L,
3L, 6L, 4L, 2L, 1L), .Label = c("Government launch new policy to different sector",
"Launches of rocket", "Premium policy get activate by company abc",
"Technology makes trend", "The images of demonstrations and gatherings",
"Weather forecasting by xyz"), class = "factor"), X = structure(c(1L,
1L, 1L, 1L, 1L, 2L), .Label = c("", "improvement"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L))
Creating the dataset df1 as above then running this gives you required plot for hour
library(tidyverse)
library(lubridate)
df1 %>%
mutate(Time=hms(Time),
Date=mdy(Date),
hour=hour(Time)) %>%
count(hour) %>%
ggplot(aes(hour,n,group=1))+geom_line()+geom_point()
Is this what you are after?
library(dplyr)
library(lubridate)
library(stringr)
library(ggplot2)
Answer with your data
To demonstrate data wrangling.
# your data;
df1 <- structure(list(Date = structure(c(1L, 1L, 2L, 1L, 1L, 1L),
.Label = c("2020-03-12","2020-03-13"),
class = "factor"),
Time = structure(c(1L, 1L, 2L,3L, 4L, 5L),
.Label = c("00:00:00Z", "00:00:01Z", "00:10:04Z","00:25:12Z", "01:00:02Z"),
class = "factor"),
Text = structure(c(5L,3L, 6L, 4L, 2L, 1L),
.Label = c("The images of demonstrations and gathering", "Premium policy get activate by company abc",
"Launches of rocket", "Premium policy get activate by company abc",
"Technology makes trend", "The images of demonstrations and gatherings", "Weather forecasting by xyz"), class = "factor")),
class = "data.frame", row.names = c(NA,-6L))
# data wrangle
df2 <-
df1 %>%
# change all variables from factors to character
mutate_all(as.character) %>%
mutate(Time = str_remove(Time, "Z$"), #remove the trailing 'Z' from Time values
dt = ymd_hms(paste(Date, Time, sep = " ")), # change text into datetime format using lubridtate::ymd_hms
dt = ceiling_date(dt, unit="hour")) %>% # round to the end of the named hour, separated for clarity
group_by(dt) %>%
summarise(nr_tweets = n())
# plot
p1 <- ggplot(df2, aes(dt, nr_tweets))+
geom_line()+
scale_x_datetime(date_breaks = "1 day", date_labels = "%d/%m")+
ggtitle("Data from question `df1`")
Answer with made up large dataset
tib <- tibble(dt = sample(seq(ISOdate(2020,05,01), ISOdate(2020,05,15), by = "sec"), 10000, replace = TRUE),
text = sample(c(letters[1:26], LETTERS[1:26]), 10000, replace = TRUE))
tib1 <-
tib %>%
mutate(dt = round_date(dt, unit="hour"))%>%
group_by(dt) %>%
summarise(nr_tweets = n())
p2 <- ggplot(tib1, aes(dt, nr_tweets))+
geom_line()+
scale_x_datetime(date_breaks = "1 day", date_labels = "%d/%m")+
ggtitle("Result using `tib` data made up to answer the question")
p1/p2
Created on 2020-05-13 by the reprex package (v0.3.0)
I am a newbie to R and have been struggling like crazy to visualize a 3 way table as a heat map using geom_tile in R. I can easily do this in Excel, but cannot find any examples of how to do this in R. I have looked at using Mosaics but this is not what I want and I have found hundreds of examples of two way tables, but seems there are no examples of three way tables.
I want the output to look like this:
my data set looks like this: (its a small snapshot of 30,000 records):
xxx <- structure(list(rfm_score = c(111, 112, 113, 114, 115, 121), n = c(2624L,
160L, 270L, 23L, 5L, 650L), rec = structure(c(1L, 1L, 1L, 1L,
1L, 1L), .Label = c("1", "2", "3", "4", "5"), class = "factor"),
freq = structure(c(1L, 1L, 1L, 1L, 1L, 2L), .Label = c("1",
"2", "3", "4", "5"), class = "factor"), mon = structure(c(1L,
2L, 3L, 4L, 5L, 1L), .Label = c("1", "2", "3", "4", "5"), class = "factor")), row.names = c(NA,
6L), class = "data.frame")
It is essentially an RFM analysis of customer shopping behavior (Recency, Frequency and Monetary). The output heat map (that I want) should be the count of customers in each RFM segments. In the heat map I supplied, you will see there are two variables on the left (e.g. R = Recency(quintile ranges 1 to 5) and F = Frequency (quintile ranges 1 to 5)and at the top of the heat map is the M = monetary variable (quintile ranges 1 to 5). So, for instance, the segment RFM = 555 has a count of 2511 customers.
I have tried the following code and variations of it, but just get errors
library(ggplot2)
library(RColorBrewer)
library(dplyr)
cols <- rev(brewer.pal(11, 'RdYlBu'))
ols <- brewer.pal(9, 'RdYlGn')
ggplot(xxx)+ geom_tile(aes(x= mon, y = reorder(freq, desc(freq)), fill = n)) +
theme_change +
facet_grid(rec~.) +
# geom_text(aes(label=n)) +
# scale_fill_gradient2(midpoint = (max(xxx$n)/2), low = "red", mid = "yellow", high = "darkgreen") +
# scale_fill_gradient(low = "red", high = "blue") + scale_fill_gradientn(colours = cols) +
# scale_fill_brewer() +
labs(x = "monetary", y= "frequency") +
scale_x_discrete(expand = c(0,0)) + scale_y_discrete(expand = c(0,0)) +
coord_fixed(ratio= 0.5)
I have no idea how to to create this heat map in R. Can anyone please help me..
Kind regards
Heinrich
You can use DT and formattable package to make table with conditional colour formatting:
library(DT)
library(formattable)
xxx <- data.frame(rfm_score = c(111, 112, 113, 114, 115, 121),
n = c(2624L, 160L, 270L, 23L, 5L, 650L),
rec = c(1L, 1L, 1L, 1L, 1L, 1L),
freq = c(1L, 1L, 1L, 1L, 1L, 2L),
mon = c(1L, 2L, 3L, 4L, 5L, 1L))
xxx_dt <- formattable(
xxx,
list(
rfm_score = color_tile("pink", "light blue"),
n = color_tile("pink", "light blue"),
rec = color_tile("pink", "light blue"),
freq = color_tile("pink", "light blue"),
mon = color_tile("pink", "light blue")))
as.datatable(xxx_dt)
Output:
I am trying to reproduce this graph in R without success:
But for more years
This is the data:
title 2016 phased 2017 phased 2018 phased 2019 fully loaded
Pillar 1 minimum requirement (p1min) 4,50% 4,50% 4,50% 4,50%
Pillar 2 requirement (P2R) 4,63% 1,75% 1,75% 1,75%
Conservation Buffer 0,63% 1,25% 1,88% 2,50%
O-SII buffer 0,50% 1,00% 1,50% 1,50%
Countercyclical Buffer 0,00% 0,15% 0,25% 0,35%
Ideally, the colours would take the 'title' column as labels (pillar1, 2 etc.)
Here is my code so far
library(ggplot2)
library(xlsx)
library(reshape2)
mydata <- read.xlsx("C:/Users/ken/Desktop/donnees regulation kbc.xlsx", sheetName = "Feuil4", encoding = "UTF-8", stringsAsFactors = F)
years<-c('2015 phased','2016 phased','2017 phased','2018 phased','2019 fully loaded')
df<-data.frame(years,mydata)
df<-melt(df, id.vars="years")
ggplot(df, aes(x= years, y=value, fill=variable)) +
geom_bar(stat = "identity")
This is my graph so far (complete mess)
dput(df)
structure(list(years = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L), .Label = c("2015 phased", "2016 phased", "2017 phased",
"2018 phased", "2019 fully loaded"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L), .Label = c("title", "X2016.phased",
"X2017.phased", "X2018.phased", "X2019.fully.loaded"), class = "factor"),
value = c("Pillar 1 minimum requirement (p1min) ", "Pillar 2 requirement (P2R)",
"Conservation Buffer", "O-SII buffer", "Countercyclical Buffer",
"0.045", "0.04625", "0.00625", "0.005", "0", "0.045", "0.0175",
"0.0125", "0.01", "0.0015", "0.045", "0.0175", "0.01875",
"0.015", "0.0025", "0.045", "0.0175", "0.025", "0.015", "0.0035"
)), row.names = c(NA, -25L), .Names = c("years", "variable",
"value"), class = "data.frame")
Using the original data that you provided.
library(ggplot2)
library(reshape2)
df <- read.table(textConnection("title '2016 phased' '2017 phased' '2018 phased' '2019 fully loaded'
'Pillar 1 minimum requirement (p1min)' 4,50% 4,50% 4,50% 4,50%
'Pillar 2 requirement (P2R)' 4,63% 1,75% 1,75% 1,75%
'Conservation Buffer' 0,63% 1,25% 1,88% 2,50%
'O-SII buffer' 0,50% 1,00% 1,50% 1,50%
'Countercyclical Buffer' 0,00% 0,15% 0,25% 0,35%"), header=TRUE)
melt data.
df<-melt(df, id.vars="title", variable.name = "year")
Replace commas from values.
df$value <- gsub(",", ".", df$value)
And adapting the answer provided here:
Showing data values on stacked bar chart in ggplot2
ggplot(df, aes(x = year, y = value, fill = title, label = value)) +
geom_bar(stat = "identity") +
geom_text(size = 3, position = position_stack(vjust = 0.5)) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank()
)
Provides you with this.
Read the data in the first instance including these arguments:
read.xlsx(..., header = T, check.names = F)
This will stop your headers being included in the long format data frame and also stop R appending the X's and .'s into your legend labels. Hopefully this will fix your y-axis tick marks by making all values numeric (it currently contains strings, making it character type).
If this doesn't help, you could remove the titles from the dataframe into a legend_labs vector. You can the use this to add custom labels to the legend:
legend_labs <- c("Pillar 1", "Pillar 2"...)
ggplot(...)
+ scale_color_manual(labels = legend_labs)
Then you could use this to label your x, y and legend titles:
+ labs(x = "X Title", y = "Y title", fill = "Legend Title")
I'd like to add in ellipses around my three groups (based on the variable "outcome") on the following plot. Note that vsd is a DESeq2 object with the factors outcome and batch:
pcaData <- plotPCA(vsd, intgroup=c("outcome", "batch"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=outcome, shape=batch)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
geom_text(aes(label=rownames(coldata_WM_D56C)),hjust=.5, vjust=-.8, size=3) +
geom_density2d(alpha=.5) +
coord_fixed()
I tried adding an ellipse, thinking it would inherit aesthetics from the top but it tried to make an ellipse for each point.
stat_ellipse() +
Too few points to calculate an ellipse
geom_path: Each group consists of only one observation. Do you need to adjust the group aesthetic?
Computation failed in stat_density2d(): missing value where TRUE/FALSE needed
Suggestions? Thanks in advance.
> dput(pcaData)
structure(list(PC1 = c(-15.646673151638, -4.21111051849254, 13.1215703467274,
-6.5477433859415, -3.22129766721873, 4.59321517871152, 1.84089686598042,
37.8415172383233, 40.9996810499267, 37.6089348653721, -24.5520575763498,
-46.5840253031228, -4.01498554781508, -31.227922394463), PC2 = c(31.2712754127142,
5.89621557021357, -10.2425538634254, -3.44497747426626, 2.21504480008043,
0.315695833259479, -4.66467589267529, -4.27504355920903, -1.08666029542243,
-2.69753368235982, 5.89767436709778, -24.2836532766506, 4.43980653642228,
0.659385524221137), group = structure(c(4L, 5L, 6L, 7L, 8L, 5L,
8L, 1L, 2L, 3L, 6L, 9L, 9L, 9L), .Label = c("ctrl : 1", "ctrl : 2",
"ctrl : 3", "non : 1", "non : 2", "non : 3", "preg : 1", "preg : 2",
"preg : 3"), class = "factor"), outcome = structure(c(2L, 2L,
2L, 1L, 1L, 2L, 1L, 3L, 3L, 3L, 2L, 1L, 1L, 1L), .Label = c("preg",
"non", "ctrl"), class = "factor"), batch = structure(c(1L, 2L,
3L, 1L, 2L, 2L, 2L, 1L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("1",
"2", "3"), class = "factor"), name = structure(1:14, .Label = c("D5-R-N-1",
"D5-R-N-2", "D5-R-N-3", "D5-R-P-1", "D5-R-P-2", "D5-Z-N-1", "D5-Z-P-1",
"D6-C-T-1", "D6-C-T-2", "D6-C-T-3", "D6-Z-N-1", "D6-Z-P-1", "D6-Z-P-2",
"D6-Z-P-3"), class = "factor")), .Names = c("PC1", "PC2", "group",
"outcome", "batch", "name"), row.names = c("D5-R-N-1", "D5-R-N-2",
"D5-R-N-3", "D5-R-P-1", "D5-R-P-2", "D5-Z-N-1", "D5-Z-P-1", "D6-C-T-1",
"D6-C-T-2", "D6-C-T-3", "D6-Z-N-1", "D6-Z-P-1", "D6-Z-P-2", "D6-Z-P-3"
), class = "data.frame", percentVar = c(0.47709343625754, 0.0990361123451665
))
As Maurits Evers suggests, I've added a group aes, which only drew ellipses for 2 of 3 outcome types.
Since you don't provide any sample data, here is an example using the faithful data.
The key is to add a group aesthetic.
require(ggplot2);
# Generate sample data
df <- faithful[1:10, ];
df$batch <- as.factor(rep(1:5, each = 2));
# This will throw a similar error/warning to yours
#ggplot(df, aes(waiting, eruptions, color = eruptions > 3, shape = batch)) + geom_point() + stat_ellipse();
# Add a group aesthetic and it works
ggplot(df, aes(waiting, eruptions, color = eruptions > 3, shape = batch, group = eruptions > 3)) + geom_point() + stat_ellipse();
So in your case, try adding aes(..., group = outcome).