I have produced two different plots based on two different models: model and model1. Please find enclosed My Data below. I have attached the two plots:
Model
Model1
I wish to merge the two plots and keep the confidence bands at the same time. I have tried several solution, e.g. rbind, but that does not seem to work - please see below.
I have used the following scripts to produce the two plots
model <- cph(Surv(os.neck,mors)~rcs(test),data=n)
model1 <- cph(Surv(os.neck,mors)~rcs(test),data=n1)
j <- ggplot(Predict(model, fun=exp), colfill = "blue")
k <- ggplot(Predict(model1, fun=exp), colfill = "yellow")
I have tried rbind:
e <- Predict(model, fun=exp, conf.int = TRUE)
f <- Predict(model1, fun=exp, conf.int = TRUE)
j <- ggplot(rbind(e,f))
Which gave this:
rbind()
My data:
n <- subset(w, w$stadie %in% 1:2)
n1 <- subset(w, w$stadie %in% 3:5)
The requested dput(out) from the comments
w <- structure(list(model = c("1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2"), test = c(0.0438735177865613, 0.0465676207122569,
0.0492617236379526, 0.0519558265636483, 0.0546499294893439, 0.0573440324150396,
0.0600381353407353, 0.062732238266431, 0.0654263411921266, 0.0681204441178223,
0.070814547043518, 0.0735086499692136, 0.0762027528949093, 0.078896855820605,
0.0815909587463007, 0.0842850616719963, 0.086979164597692, 0.0896732675233877,
0.0923673704490833, 0.095061473374779, 0.05, 0.0530569514237856,
0.0561139028475712, 0.0591708542713568, 0.0622278056951424, 0.065284757118928,
0.0683417085427136, 0.0713986599664992, 0.0744556113902848, 0.0775125628140703,
0.0805695142378559, 0.0836264656616415, 0.0866834170854271, 0.0897403685092127,
0.0927973199329983, 0.0958542713567839, 0.0989112227805695, 0.101968174204355,
0.105025125628141, 0.108082077051926), yhat = c(0.715524721809984,
0.72420520893997, 0.732895287854242, 0.741495950465592, 0.749903690905934,
0.758010700841758, 0.765705214141122, 0.772872009692537, 0.779393079520142,
0.785148467039571, 0.79001727733411, 0.793878857700365, 0.796614142441177,
0.798107151024956, 0.798246668871875, 0.796979824770716, 0.794412433838086,
0.790683064226291, 0.785933397797749, 0.780306386213083, 1.24887346414771,
1.12142387236568, 1.00744333341272, 0.906978784944319, 0.819807522848923,
0.745379660125369, 0.682977886151413, 0.631846830283734, 0.591296955987878,
0.560790614744859, 0.53975355731851, 0.52685030147002, 0.520878199524915,
0.520957917193064, 0.526437601275528, 0.53682068603444, 0.551708849922178,
0.570754454105439, 0.593618741429514, 0.619933518450193), lower = c(0.445870969928758,
0.472487603995491, 0.498645159577579, 0.523317755828918, 0.545270747924011,
0.563214260495099, 0.576107648755599, 0.583517928079882, 0.585795811114823,
0.583918701876133, 0.579131268180072, 0.572630973080174, 0.565412209767786,
0.558237952034289, 0.551671245622871, 0.546072898734981, 0.541548416151744,
0.538098574671309, 0.535672640626991, 0.534183860233478, 0.613882362074539,
0.611611984419279, 0.601234738035742, 0.579326232945668, 0.543582975437934,
0.496000647093785, 0.443637816386947, 0.39437687025085, 0.353159479619957,
0.321944706132161, 0.30083406381699, 0.288326373517578, 0.282948308375769,
0.283624310505754, 0.289563062775844, 0.300128054614955, 0.314709399887597,
0.332603569457389, 0.352917102130059, 0.374528152852913), upper = c(1.14825961332055,
1.11002527943736, 1.07718984661152, 1.05063556210888, 1.03133268706487,
1.02018052967182, 1.01769951541058, 1.02367230657634, 1.03697151956046,
1.05572593121937, 1.07769573631852, 1.10061046351294, 1.12235654089946,
1.14104571750444, 1.1550316414364, 1.16317224781343, 1.16534569433533,
1.16183119131315, 1.15311341092747, 1.13982862772903, 2.54069024589915,
2.05619172538896, 1.68809618910841, 1.4199434956646, 1.23639702655924,
1.1201413566373, 1.05144055745915, 1.01230687460364, 0.990011907755607,
0.976832690818709, 0.968420593537629, 0.962698059052612, 0.958882208194717,
0.956889594556209, 0.957085290437296, 0.96017831230139, 0.967186411308867,
0.979426190201882, 0.998487202942342, 1.02613799355416), .predictor. = c("test",
"test", "test", "test", "test", "test", "test", "test", "test",
"test", "test", "test", "test", "test", "test", "test", "test",
"test", "test", "test", "test", "test", "test", "test", "test",
"test", "test", "test", "test", "test", "test", "test", "test",
"test", "test", "test", "test", "test", "test", "test"), .set. = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2"), class = "factor")), .Names = c("model",
"test", "yhat", "lower", "upper", ".predictor.", ".set."), row.names = c("1.1",
"1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8", "1.9", "1.10",
"1.11", "1.12", "1.13", "1.14", "1.15", "1.16", "1.17", "1.18",
"1.19", "1.20", "2.201", "2.202", "2.203", "2.204", "2.205",
"2.206", "2.207", "2.208", "2.209", "2.210", "2.211", "2.212",
"2.213", "2.214", "2.215", "2.216", "2.217", "2.218", "2.219",
"2.220"), class = c("Predict", "data.frame"), info = structure(list(
Design = structure(list(label = structure("Set", .Names = ".set."),
units = structure("", .Names = ".set.")), .Names = c("label",
"units")), varying = ".set.", adjust = structure(list(`1` = NULL,
`2` = NULL), .Names = c("1", "2"))), .Names = c("Design",
"varying", "adjust")))
Thank you in advance,
C.
Here is a basic plot
ggplot(as.data.frame(out), aes(x = test)) +
geom_ribbon(aes(fill = model, ymin = lower, ymax = upper), alpha = .3) +
geom_line(aes(y = yhat, col = model))
We need as.data.frame(out) because out is of class Predict.
You could add another theme change fill and color or you might also want to add a meaningful title, subtitle etc. SO is full of examples.
We can use the JCO palette from the ggsci package
library(ggsci)
ggplot(as.data.frame(out), aes(x = test)) +
geom_ribbon(aes(fill = model, ymin = lower, ymax = upper), alpha = .3) +
geom_line(aes(y = yhat, col = model)) +
scale_color_jco() +
scale_fill_jco()
To change legend labels do
... +
scale_color_jco(labels = c("A", "B")) +
scale_fill_jco(labels = c("A", "B"))
Related
I am hoping someone can help me.
I am performing Cramer's V tests on categorical data in R. Here's an example of the code:
#cramer's v
df1 <- subset(ACCIDENT_MASTER_single, select = c("SEVERITY", "ATMOSPH_COND"))
# Converting into numeric matrix
df3 <- data.matrix(df1)
#calculate Cramer's V
cramerV(df3)
I am using Shiny so that a user can select the categorical variables via dropdown menus and then the result of the Cramer's V is displayed. My code works, but interestingly, the results I am getting are completely different, even though I am using the same dataframe. Can anyone tell me why?
Here is an example of the R code using the Shiny package:
library(shinydashboard)
library(shiny)
library(dplyr)
library(DT)
library(rcompanion)
df <- data.frame(ACCIDENT_MASTER_single)
Cat1.Variables <- c("SEVERITY", "ATMOSPH_COND", "DAY_OF_WEEK")
Cat2.Variables <- c("SEVERITY", "ATMOSPH_COND", "DAY_OF_WEEK")
ui <- fluidPage(
titlePanel("Calculate the strength of the relationship between categorical variables"),
sidebarLayout(
sidebarPanel(
selectInput("cat1", choices = Cat1.Variables, label = "Select a Categorical Variable:"),
selectInput("cat2", choices = Cat2.Variables, label = "Select a Categorical Variable:")
),
mainPanel(
tableOutput("results")
)
)
)
server <- shinyServer(function(input, output) {
cramerdata <- reactive({
req(input$cat1, input$cat2)
df %>%
{
table(.[[input$cat1]], .[[input$cat2]])
}
})
output$results <- renderPrint({
cat(sprintf("\nThe results equal: \n"))
print(cramerV(cramerdata()))
})
})
shinyApp(ui, server)
Also, I have tested this on a number of different variables and all of my results are different, not just for the two variables in this example. Would love some help please!
EDIT: someone suggested I use dput(head(ACCIDENT_MASTER_single)) so a snippet of my results of that are found below (the dataset is very large). I hope this helps!
> dput(head(ACCIDENT_MASTER_single))
structure(list(ACCIDENT_NO = c("T20150000004", "T20150000017",
"T20150000020", "T20150000028", "T20150000034", "T20150000052"
), ACCIDENTDATE = c("2015-01-01", "2015-01-01", "2015-01-01",
"2015-01-01", "2015-01-01", "2015-01-01"), ACCIDENTTIME = c("02:10:00",
"07:20:00", "06:51:00", "07:55:00", "17:10:00", "01:20:00"),
ACCIDENT_TYPE = c(2L, 1L, 4L, 1L, 4L, 1L), DAY_OF_WEEK = c(5L,
5L, 5L, 4L, 5L, 5L), DCA_CODE = c(108L, 130L, 173L, 135L,
171L, 121L), DIRECTORY = c("MEL", "MEL", "MEL", "MEL", "MEL",
"MEL"), LIGHT_CONDITION = c(3L, 1L, 2L, 1L, 1L, 3L), ROAD_GEOMETRY = c(5L,
4L, 1L, 5L, 5L, 1L), SEVERITY = c(3L, 2L, 1L, 3L, 3L, 2L),
SPEED_ZONE = c(60L, 70L, 70L, 100L, 60L, 60L), ROAD_TYPE = c("ROAD",
"ROAD", "ROAD", "ROAD", "ROAD", "DRIVE"), ATMOSPH_COND = c("1",
"1", "1", "1", "1", "1"), ATMOSPH_COND_SEQ = c("1", "1",
"1", "0", "1", "1"), LGA_NAME = c("MOONEE VALLEY", "MONASH",
"BAYSIDE", "BRIMBANK", "MELTON", "BRIMBANK"), DEG_URBAN_NAME = c("MELB_URBAN",
"MELB_URBAN", "MELB_URBAN", "MELB_URBAN", "MELB_URBAN", "MELB_URBAN"
), Lat = c(-37.77922923, -37.88240078, -37.92909811, -37.76758102,
-37.72427767, -37.76316596), Long = c(144.9309415, 145.0903658,
145.0028103, 144.8002374, 144.7529804, 144.7897546), POSTCODE_NO = c(3032L,
3148L, 3186L, 3022L, 3023L, 3023L), Surface.Cond.Desc = c("Dry",
"Dry", "Dry", "Dry", "Dry", "Dry"), SURFACE_COND = c("1",
"1", "1", "1", "1", "1"), SURFACE_COND_SEQ = c("1", "1",
"1", "0", "1", "1"), ROAD_SURFACE_TYPE = c("1", "1,1", "1",
"1,1", "1", "1,1"), VEHICLE_TYPE = c("99", "5,2", "1", "1,62",
"1", "1,1"), TRAFFIC_CONTROL = c("0", "1,1", "0", "0,0",
"0", "1,1"), EVENT_TYPE = c("C", "C", "3,C", "C,3,C,3,C",
"3,C", "C"), SEX = c("M,U", "M,M", "M", "F,U", "M", "M,M,M,F"
), AGE = c("32,NA", "56,43", "28", "54,NA", "23", "17,16,19,41"
), Age.Group = c("30-39,unknown", "50-59,40-49", "26-29",
"50-59,unknown", "22-25", "16-17,16-17,17-21,40-49"), INJ_LEVEL = c("3,4",
"2,3", "1", "3,4", "3", "2,4,4,3"), ROAD_USER_TYPE = c("1,9",
"2,2", "2", "2,2", "2", "3,3,2,2")), row.names = c(NA, 6L
), class = "data.frame")
Thanks
The result is working for me... Try setting the seed also: set.seed(1)
cramerdata <- reactive({
req(input$cat1, input$cat2)
df3 <- data.matrix(ACCIDENT_MASTER_single[c(input$cat1, input$cat2)])
df3
})
output$results <- renderPrint({
cat(sprintf("\nThe results equal: \n"))
print(cramerV(cramerdata()))
})
Getting the following error:
Error in knn(train.x, test.x, train.y, k = 1) :
NA/NaN/Inf in foreign function call (arg 6)
In addition: Warning messages:
1: In knn(train.x, test.x, train.y, k = 1) : NAs introduced by coercion
2: In knn(train.x, test.x, train.y, k = 1) : NAs introduced by coercion
the code used that produced this error is as follows:
test = 1:200
train.x = absentknn[-test,]
test.x = absentknn[test,]
train.y <- Target[-test]
test.y = Target[test]
set.seed(1)
knn.pred = knn(train.x,test.x,train.y,k=1)
Dataframe is 740 rows by 20 , with target as the Dependent Variable.
where test = 1:200
Also, maybe helpful but when I use the below code and perform a dim() I get null value.
train.y <- Target[-test]
test.y = Target[test]
OUTPUT AS BELOW:
structure(list(Reason.for.absence = structure(c(3L, 1L), .Label = c("Diseases",
"Other", "W/o ICD"), class = "factor"), Month.of.absence = structure(c(8L,
8L), .Label = c("0", "1", "2", "3", "4", "5", "6", "7", "8",
"9", "10", "11", "12"), class = "factor"), Day.of.the.week = structure(c(2L,
2L), .Label = c("2", "3", "4", "5", "6"), class = "factor"),
Seasons = structure(c(1L, 1L), .Label = c("1", "2", "3",
"4"), class = "factor"), Disciplinary.failure = structure(1:2, .Label = c("0",
"1"), class = "factor"), Education = structure(c(1L, 1L), .Label = c("1",
"2", "3", "4"), class = c("ordered", "factor")), Son = structure(c(3L,
2L), .Label = c("0", "1", "2", "3", "4"), class = "factor"),
Social.drinker = structure(c(2L, 2L), .Label = c("0", "1"
), class = "factor"), Social.smoker = structure(c(1L, 1L), .Label = c("0",
"1"), class = "factor"), Pet = structure(c(2L, 1L), .Label = c("0",
"1", "2", "4", "5", "8"), class = "factor"), Target = structure(c(1L,
1L), .Label = c("Below THreshold", "Above Threshold"), class = "factor"),
Transportation.expense = c(1.01072476745574, -1.54333530271458
), Distance.from.Residence.to.Work = c(0.429265332324082,
-1.12093537978198), Service.time = c(0.101700985294323, 1.24198475980643
), Age = c(-0.532508283408938, 2.09144557686699), Work.load.Average.day = c(-0.817659381760693,
-0.817659381760693), Hit.target = c(0.638254115594372, 0.638254115594372
), Weight = c(0.851097236884886, 1.4720604661625), Height = c(-0.019033134875066,
0.975168263304808), Body.mass.index = c(0.775407774938871,
1.00875537699449)), .Names = c("Reason.for.absence", "Month.of.absence",
"Day.of.the.week", "Seasons", "Disciplinary.failure", "Education",
"Son", "Social.drinker", "Social.smoker", "Pet", "Target", "Transportation.expense",
"Distance.from.Residence.to.Work", "Service.time", "Age", "Work.load.Average.day",
"Hit.target", "Weight", "Height", "Body.mass.index"), row.names = 1:2, class = "data.frame")
I have two data sets like below
df1<- structure(list(time = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L), .Label = c("24", "48", "72"), class = "factor"), place = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("B,C", "D,E", "F,G"
), class = "factor"), key = c("boy1", "boy2", "boy3", "boy1",
"boy2", "boy3", "boy1", "boy2", "boy3"), value = c(177.72258835,
0, 74.438539625, 134.3410045, 48915.1, 38.302204425, 97.32286187,
25865.25, 28.67291878), x = c("1", "2", "3", "1", "2", "3", "1",
"2", "3"), y = c(177.72258835, 0, 74.438539625, 134.3410045,
48915.1, 38.302204425, 97.32286187, 25865.25, 28.67291878)), .Names = c("time",
"place", "key", "value", "x", "y"), row.names = c(NA, -9L), class = "data.frame")
df2<- structure(list(time = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L), .Label = c("24", "48", "72"), class = "factor"), place = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("B,C", "D,E", "F,G"
), class = "factor"), key = c("boy1", "boy2", "boy3", "boy1",
"boy2", "boy3", "boy1", "boy2", "boy3"), value = c(58.852340736,
0, 21.291893740908, 42.92051958201, 72521.52726, 16.309811239722,
32.403556124268, 38347.81965, 10.342042262244), x = c("1", "2",
"3", "1", "2", "3", "1", "2", "3"), y = c(58.852340736, 0, 21.291893740908,
42.92051958201, 72521.52726, 16.309811239722, 32.403556124268,
38347.81965, 10.342042262244)), .Names = c("time", "place", "key",
"value", "x", "y"), row.names = c(NA, -9L), class = "data.frame")
I want to plot them together with df2 as the standard deviation for df1
when I plot df1, I do the following
library(ggplot2)
ggplot(df1, aes(x, y, col = key)) +
geom_point() +
scale_x_discrete(labels=c("first", "second", "third"), limits = c(1, 2,3)) +
facet_grid(time ~ .)
but now I want to have the second df as the standard deviation (i.e., the first y-value in df1 is 177.72259, so it's standard deviation is the corresponding y-value in df2, which is 58.85234).
If I understand your question correctly, it sounds like you want to include error bars in your plot. This can be accomplished using only a single data frame, if you just add the standard error as an additional variable like so:
df <- structure(list(time = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L),
.Label = c("24", "48", "72"), class = "factor"), place = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L), .Label = c("B,C", "D,E", "F,G"), class = "factor"),
key = c("boy1", "boy2", "boy3", "boy1", "boy2", "boy3", "boy1", "boy2", "boy3"),
value = c(58.852340736, 0, 21.291893740908, 42.92051958201, 72521.52726,
16.309811239722, 32.403556124268, 38347.81965, 10.342042262244),
x = c("1", "2", "3", "1", "2", "3", "1", "2", "3"), y = c(177.72258835, 0,
74.438539625, 134.3410045, 48915.1, 38.302204425, 97.32286187, 25865.25, 28.67291878),
sd = c(58.852340736, 0, 21.291893740908, 42.92051958201, 72521.52726, 16.309811239722,
32.403556124268,38347.81965, 10.342042262244)), .Names = c("time", "place", "key",
"value", "x", "y", "sd"), row.names = c(NA, -9L), class = "data.frame")
Then you can add error bars to the plot using geom_errorbar(), as follows (I am borrowing the "free-y" scale trick from #jazzurro's answer above):
ggplot(df, aes(x, y, col = key)) +
geom_point() +
scale_x_discrete(labels=c("first", "second", "third"), limits = c(1, 2,3)) +
facet_grid(time ~ .) +
geom_errorbar(aes(ymin = y-sd, ymax = y+sd)) +
facet_grid(time ~ ., scale = "free_y")
Unfortunately your data is a little skewed, in that some measurements are way larger in magnitude than others (especially at time=48 and time=72); you may want to consider a log transformation so that the error bars for the smaller observations do not appear so negligible.
Here is one way for you. I changed the shape of the sd in the second geom_point(). Since the y-scale has a wide range for two of the plots, you see points overlapping.
ggplot() +
geom_point(data = df1, aes(x, y, col = key)) +
geom_point(data = df2, aes(x, y, col = key), shape = 22, alpha = 0.3) +
scale_x_discrete(labels=c("first", "second", "third"), limits = c(1, 2, 3)) +
facet_grid(time ~ ., scale = "free_y")
I am trying to make a ggplot. When I had shape in aesthetics, the code was working just fine. However, I need to put shape in geom_point() because I'm trying to reproduce a figure. And when I added shape to geom_point() it gave me the following error:
Aesthetics must be either length 1 or the same as the data (6): shape
I've looked for other answers here but apparently, nothing seems to be working for me. Above I've provided with an image of what my data looks like. There are 17000 entries.
Below is my code:
summarised_data <-ddply(mammals,c('mammals$chr','mammals$Species','mammals$chrMark'),
function (x) c(median_rpkm = median(x$RPKM), median = median(x$dNdS)))
ggplot(summarised_data,aes(x = summarised_data$median_rpkm, y = summarised_data$median,
color = summarised_data$`mammals$Species`)) + geom_smooth(se = FALSE, method = "lm") +
geom_point(shape = summarised_data$`mammals$chrMark`) + xlab("median RPKM") + ylab("dNdS")
"ENSG00000213221", "ENSG00000213341", "ENSG00000213380", "ENSG00000213424",
"ENSG00000213533", "ENSG00000213551", "ENSG00000213619", "ENSG00000213626",
"ENSG00000213699", "ENSG00000213782", "ENSG00000213949", "ENSG00000214013",
"ENSG00000214338", "ENSG00000214357", "ENSG00000214367", "ENSG00000214517",
"ENSG00000214814", "ENSG00000215203", "ENSG00000215305", "ENSG00000215367",
"ENSG00000215440", "ENSG00000215897", "ENSG00000221947", "ENSG00000222011",
"ENSG00000224051", "ENSG00000225830", "ENSG00000225921", "ENSG00000239305",
"ENSG00000239474", "ENSG00000239900", "ENSG00000241058", "ENSG00000242247",
"ENSG00000242612", "ENSG00000243646", "ENSG00000244038", "ENSG00000244045"),
class = "factor"), Species = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Chimp", "Gori", "Human", "Maca",
"Mouse", "Oran"), class = "factor"), labs = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Chimp-A", "Chimp-X",
"Gori-A", "Gori-X", "Human-A", "Human-X", "Maca-A", "Maca-X",
"Mouse-A", "Mouse-X", "Oran-A", "Oran-X"), class = "factor"),
chrMark = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("A", "X"), class = "factor"), chr = structure(c(27L,
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L), .Label = c("1",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"2", "20", "21", "22", "2a", "2A", "2b", "2B", "3", "4",
"5", "6", "7", "8", "9", "X"), class = "factor"), dN = c(3.00669,
3.27182, 7.02044, 1.01784, 3.0363, 2.32786, 4.92959, 3.03753,
3.0776, 1.02147), dS = c(3.15631, 5.87147, 3.13716, 2.05438,
4.10205, 5.24764, 4.2014, 3.18086, 5.4942, 3.02169), dNdS = c(0.9525965447,
0.5572403504, 2.2378329444, 0.4954487485, 0.7401908802, 0.4436013141,
1.1733207978, 0.954939859, 0.5601543446, 0.3380459279), RPKM = c(31.6,
13.9, 26.3, 9.02, 11.3, 137, 242, 1.05, 59.4, 10.1), Tau = c(0.7113820598,
0.8391023102, 0.3185943152, 0.6887167806, 0.9120531859, 0.6254200542,
0.7165302682, 0.7257435312, 0.2586613298, 0.6493567251),
GC3 = c(0.615502, 0.622543, 0.393064, 0.490141, 0.461592,
0.626407, 0.490305, 0.482853, 0.346424, 0.466484)), .Names = c("gene",
"Species", "labs", "chrMark", "chr", "dN", "dS", "dNdS", "RPKM",
"Tau", "GC3"), row.names = c(NA, 10L), class = "data.frame")
There's a few things wrong with your code and how ggplot handles non-standard evaluation, I'd recommend reading a ggplot tutorial or the docs. Having a column called within summarised_data called 'mammals$species' and 'mammals$chrMark' is going to cause lots of problems.
If we change these to something more sensible...
names(summarised_data)[names(summarised_data) == "mammals$species"] <- "mammals_species"
names(summarised_data)[names(summarised_data) == "mammals$chrMark"] <- "mammals_chrMark"
We can make the ggplot code more friendly. Note that shape has to been within aes, as you're mapping it to your data.
ggplot(summarised_data, aes(x = median_rpkm, y = median)) +
geom_smooth(se = FALSE, method = "lm") +
geom_point(aes(shape = mammals_chrMark,
color = mammals_species)) +
xlab("median RPKM") + ylab("dNdS")
Hopefully this should work, or at least get you somewhere closer to an answer.
I run ggpairs on my data and get this error:
library(GGally)
ggpairs(mix[, c("soc", "neg", "b")])
# Error in structure(list(call = match.call(), aesthetics = aesthetics, :
# object 'ContinuousRange' not found
Here's my data
structure(list(soc = c(-10.31, 14, 4, 1.5, 1.56), neg = c(-1.66,
-10.75, -0.63, -1.24, -0.9), b = c("2", "2", "3", "3", "2")), .Names = c("soc",
"neg", "b"), row.names = c(1L, 2L, 4L, 5L, 6L), class = "data.frame")
The same issue happened when I applied it on a different set of data.