Related
I have this data below called test1.melted. I also have the code to plot my data using package scatterpie, but due to inherent problem of scatterpie (if coordinates are not cartesian,i.e. equal horizontal and vertical distances), you would not get properly formatted plot. Is there a better way to plot this data without using scatterpie?
Data:
test1.melted<-structure(list(Wet_lab_dilution_A = structure(c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L), .Label = c("A", "B", "C", "D", "E", "F",
"G", "H", "I", "J", "K", "L"), class = "factor"), TypeA = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("I", "II"), class = "factor"),
NA12878 = c(100L, 50L, 25L, 20L, 10L, 0L, 100L, 50L, 25L,
20L, 10L, 0L, 100L, 50L, 25L, 20L, 10L, 0L, 100L, 50L, 25L,
20L, 10L, 0L), NA12877 = c(0L, 50L, 75L, 80L, 90L, 100L,
0L, 50L, 75L, 80L, 90L, 100L, 0L, 50L, 75L, 80L, 90L, 100L,
0L, 50L, 75L, 80L, 90L, 100L), IBD = c(1.02, 0.619, 0.294,
0.244, 0.134, 0.003, 0.003, 0.697, 0.964, 0.978, 1, 1, 1.02,
0.619, 0.294, 0.244, 0.134, 0.003, 0.003, 0.697, 0.964, 0.978,
1, 1), variableA = c("tEst", "tEst", "tEst", "tEst", "tEst",
"tEst", "tEst", "tEst", "tEst", "tEst", "tEst", "tEst", "pair",
"pair", "pair", "pair", "pair", "pair", "pair", "pair", "pair",
"pair", "pair", "pair"), valueA = c(0.1, 59.8, 84.6, 89.2,
97.4, 100, 99.6, 56.4, 29.9, 24, 12.1, 0.1, 0.1, 51.08, 75.28,
80.09, 90.16, 100, 100, 48.09, 23.97, 18.81, 9.24, 0.08)), row.names = c(NA,
-24L), .Names = c("Wet_lab_dilution_A", "TypeA", "NA12878", "NA12877",
"IBD", "variableA", "valueA"), class = "data.frame")
code:
p<- ggplot() + geom_scatterpie(aes(x=valueA, y=IBD, group=TypeA), data=test1.melted,
cols=c("NA12878", "NA12877")) + coord_equal()+
facet_grid(TypeA~variableA)
p
Do you have to use a pie chart? (And you might; there's nothing wrong with them.)
Cause something like this could illustrate literally every variable in the dataset:
library(ggplot2)
test1.melted$NA12877 <- as.factor(test1.melted$NA12877)
test1.melted$NA12878 <- as.factor(test1.melted$NA12878)
p <- ggplot(data = test1.melted, aes(x=valueA, y=IBD, group=TypeA))
p <- p + geom_point(aes(colour=NA12877, fill = NA12878), stroke=3, size = 3, shape = 21)
p <- p + geom_text(aes(label = Wet_lab_dilution_A), size = 2)
p + facet_grid(TypeA ~ variableA) + theme_minimal()
I want to reproduce the following drc::plot.drc graphs with ggplot2.
df1 <-
structure(list(TempV = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L), .Label = c("22.46FH-142", "27.59FH-142", "26.41FH-142",
"29.71FH-142", "31.66FH-142", "34.11FH-142", "33.22FH-142", "22.46FH-942",
"27.59FH-942", "26.41FH-942", "29.71FH-942", "31.66FH-942", "34.11FH-942",
"33.22FH-942"), class = "factor"), Start = c(0L, 24L, 48L, 72L,
96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L,
144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L,
192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L,
0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L,
48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L,
96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L,
144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L,
192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L,
0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L,
48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L,
96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L,
144L, 168L, 192L, 216L), End = c(24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf,
24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96,
120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf,
24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96,
120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf,
24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96,
120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf),
Germinated = c(0L, 0L, 0L, 0L, 3L, 67L, 46L, 12L, 101L, 221L,
0L, 0L, 0L, 0L, 57L, 50L, 44L, 31L, 32L, 236L, 0L, 0L, 0L,
31L, 68L, 50L, 31L, 34L, 29L, 207L, 0L, 0L, 8L, 30L, 31L,
55L, 27L, 22L, 4L, 273L, 0L, 0L, 46L, 64L, 16L, 8L, 15L,
15L, 20L, 266L, 0L, 0L, 0L, 0L, 4L, 13L, 63L, 51L, 147L,
172L, 0L, 0L, 4L, 26L, 92L, 31L, 91L, 14L, 7L, 185L, 0L,
0L, 0L, 0L, 0L, 32L, 59L, 36L, 50L, 273L, 0L, 0L, 0L, 4L,
13L, 32L, 42L, 52L, 42L, 265L, 0L, 0L, 0L, 6L, 22L, 40L,
57L, 44L, 73L, 208L, 0L, 1L, 2L, 24L, 55L, 41L, 68L, 24L,
33L, 202L, 0L, 0L, 18L, 31L, 26L, 30L, 61L, 25L, 58L, 201L,
0L, 0L, 36L, 54L, 33L, 55L, 12L, 27L, 55L, 178L, 0L, 0L,
6L, 28L, 26L, 31L, 53L, 48L, 33L, 225L)), .Names = c("TempV",
"Start", "End", "Germinated"), row.names = c(NA, -140L), class = "data.frame")
library(data.table)
dt1 <- data.table(df1)
library(drc)
dt1fm1 <-
drm(
formula = Germinated ~ Start + End
, curveid = TempV
# , pmodels =
# , weights =
, data = dt1
# , subset =
, fct = LL.2()
, type = "event"
, bcVal = NULL
, bcAdd = 0
# , start =
, na.action = na.fail
, robust = "mean"
, logDose = NULL
, control = drmc(
constr = FALSE
, errorm = TRUE
, maxIt = 1500
, method = "BFGS"
, noMessage = FALSE
, relTol = 1e-07
, rmNA = FALSE
, useD = FALSE
, trace = FALSE
, otrace = FALSE
, warnVal = -1
, dscaleThres = 1e-15
, rscaleThres = 1e-15
)
, lowerl = NULL
, upperl = NULL
, separate = FALSE
, pshifts = NULL
)
## ----dt1fm1Plot1----
plot(
x = dt1fm1
, xlab = "Time (Hours)"
, ylab = "Proportion Germinated (\\%)"
# , ylab = "Proportion Germinated (%)"
, add = FALSE
, level = NULL
, type = "average" # c("average", "all", "bars", "none", "obs", "confidence")
, broken = FALSE
# , bp
, bcontrol = NULL
, conName = NULL
, axes = TRUE
, gridsize = 100
, log = ""
# , xtsty
, xttrim = TRUE
, xt = NULL
, xtField = NULL
, xField = "Time (Hours)"
, xlim = c(0, 200)
, yt = NULL
, ytField = NULL
, yField = "Proportion Germinated"
, ylim = c(0, 1.05)
, lwd = 1
, cex = 1.2
, cex.axis = 1
, col = TRUE
# , lty
# , pch
, legend = TRUE
# , legendText
, legendPos = c(40, 1.1)
, cex.legend = 0.6
, normal = FALSE
, normRef = 1
, confidence.level = 0.95
)
## ----dt1fm1Plot2----
dt1fm1Means1 <- dt1[, .(Germinated=mean(Germinated)/450), by=.(TempV, Start, End)]
dt1fm1Means2 <- dt1fm1Means1[, .(Start=Start, End=End, Cum_Germinated=cumsum(Germinated)), by=.(TempV)]
dt1fm1Means <- data.table(dt1fm1Means2[End!=Inf], Pred=predict(object=dt1fm1))
dt1fm1Plot2 <-
ggplot(data= dt1fm1Means, mapping=aes(x=End, y=Cum_Germinated, group=TempV, color=TempV, shape=TempV)) +
geom_point() +
geom_line(aes(y = Pred)) +
scale_shape_manual(values=seq(0, 13)) +
labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
theme_bw() +
scale_x_continuous(expand = c(0, 0), breaks = c(0, unique(dt1fm1Means$End))) +
scale_y_continuous(expand = c(0, 0), labels = function(x) paste0(100*x,"\\%")) +
# scale_y_continuous(expand = c(0, 0), labels = percent) +
expand_limits(x = c(0, max(dt1fm1Means$End)+20), y = c(0, max(dt1fm1Means$Pred)+0.1)) +
theme(axis.title.x = element_text(size = 12, hjust = 0.54, vjust = 0),
axis.title.y = element_text(size = 12, angle = 90, vjust = 0.25))
print(dt1fm1Plot2)
Question
There are few discrepancies in ggplot2 output. These discrepancies occur because the predict function gives output in different pattern than the given levels in the data.
Edited
Actually drm function changed the order of levels of TempV and this is clear from summary(dt1fm1) output and the graph of drc::plot.drc output.
As noted in the question, there is an issue related to drm shuffling the order of factor levels. Un-shuffling this mess proved more tricky than I expected.
In the end I approached this by calling the drm function once per factor level to build up a table of results one factor level at a time.
Doing it this long-winded way uncovered the fact that your 1st plot from plot.drc and the ggplot version are both incorrect.
Let's start by wrapping your function call to drm() inside another wrapper function, to facilitate calling it repeatedly for each trace:
drcmod <- function(dt1){
drm(formula = Germinated ~ Start + End
, curveid = TempV
, data = dt1
, fct = LL.2()
, type = "event"
, bcVal = NULL
, bcAdd = 0
, na.action = na.fail
, robust = "mean"
, logDose = NULL
, control = drmc(
constr = FALSE
, errorm = TRUE
, maxIt = 1500
, method = "BFGS"
, noMessage = FALSE
, relTol = 1e-07
, rmNA = FALSE
, useD = FALSE
, trace = FALSE
, otrace = FALSE
, warnVal = -1
, dscaleThres = 1e-15
, rscaleThres = 1e-15
)
, lowerl = NULL
, upperl = NULL
, separate = FALSE
, pshifts = NULL
)
}
Now we can use this wrapper to fit the drc model to each factor level in turn:
dt2 <- data.table()
for (i in 1:nlevels(dt1$TempV)) {
dt <- dt1[TempV==levels(TempV)[i]]
dt[, TempV:=as.character(TempV)]
dt[, Germ_frac := mean(Germinated)/450, by=.(Start)]
dt[, cum_Germinated := cumsum(Germ_frac)]
dt[, Pred := c(predict(object=drcmod(dt)), NA)]
dt2 <- rbind(dt2, dt)
}
and plot:
ggplot(dt2[End != Inf], aes(x=End, y=cum_Germinated, group=TempV, color=TempV, shape=TempV)) +
geom_point() +
geom_line(aes(y = Pred)) +
scale_shape_manual(values=seq(0, 13)) +
labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
theme_bw()
Edit
If we run the original code in the question using a subset of the data with fewer factor levels, for example using
dt1 <- dt1[TempV %in% levels(TempV)[1:5],]
dt1 <- droplevels(dt1)
all the plots (the 2 versions in OP, and the version in this answer) give the same result. The discrepancies only seem to arise when a large number of factor levels are used. The fact that both the ggplot and the plot.drc in OP give incorrect matching of traces to factor levels indicates that the problem is most likely to be in the drm() function, rather than in plot.drc.
So I have R program, and am struggling with getting all points in map
library(ggmap)
library(ggplot2)
setwd("d:/GIS/")
sep <- read.csv("SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
Percent_SEP12_Assets <- ifelse(sep[,8] >= 50, "Over 50", "Under 50")
# get the map
map <- get_map("Kissena Park, Queens", zoom = 13, maptype = 'roadmap')
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
And here is output map
I wish to zoom in enough without cutting out data points, but no matter location I pick on map, the data keeps getting cut, i.e. Removed 2 rows containing missing values (geom_point).
Is there a way to set boundaries based on the extremities of latitude and longitude? The csv I import at
sep <- read.csv("SEP_assets_csv.csv")
Has list of latitude and longitude.
Help!
Coordinates
Latitude Longitude
40.758365 -73.824407
40.774168 -73.818543
40.761748 -73.811379
40.765602 -73.828293
40.751762 -73.81778
40.764834 -73.789712
40.777951 -73.842932
40.76501 -73.794319
40.785959 -73.817349
40.755764 -73.799256
40.745593 -73.829283
40.789929 -73.839501
40.760072 -73.783908
40.726437 -73.807592
40.741093 -73.808757
40.720926 -73.823358
40.729642 -73.81781
40.724191 -73.80937
40.782346 -73.77844
40.778164 -73.799841
40.775122 -73.8185
40.760344 -73.817909
40.792326 -73.809516
40.78322 -73.806977
40.73106 -73.805449
40.736521 -73.813001
40.783714 -73.795027
40.770194 -73.82762
40.735855 -73.823583
40.74943 -73.82141
40.769753 -73.832001
40.754465 -73.826204
40.738775 -73.823892
40.764868 -73.826819
40.738332 -73.82028
40.735017 -73.821339
40.72535 -73.811325
40.721466 -73.820401
dput
> dput(sep)
structure(list(School = structure(1:38, .Label = c("Queens\\25Q020",
"Queens\\25Q021", "Queens\\25Q022", "Queens\\25Q023", "Queens\\25Q024",
"Queens\\25Q025", "Queens\\25Q029", "Queens\\25Q032", "Queens\\25Q079",
"Queens\\25Q107", "Queens\\25Q120", "Queens\\25Q129", "Queens\\25Q130",
"Queens\\25Q154", "Queens\\25Q163", "Queens\\25Q164", "Queens\\25Q165",
"Queens\\25Q168", "Queens\\25Q169", "Queens\\25Q184", "Queens\\25Q185",
"Queens\\25Q189", "Queens\\25Q193", "Queens\\25Q194", "Queens\\25Q200",
"Queens\\25Q201", "Queens\\25Q209", "Queens\\25Q214", "Queens\\25Q219",
"Queens\\25Q237", "Queens\\25Q242", "Queens\\25Q244", "Queens\\25Q425",
"Queens\\25Q460", "Queens\\25Q499", "Queens\\25Q515", "Queens\\25Q707",
"Queens\\25Q792"), class = "factor"), Latitude = c(40.758365,
40.774168, 40.761748, 40.765602, 40.751762, 40.764834, 40.777951,
40.76501, 40.785959, 40.755764, 40.745593, 40.789929, 40.760072,
40.726437, 40.741093, 40.720926, 40.729642, 40.724191, 40.782346,
40.778164, 40.775122, 40.760344, 40.792326, 40.78322, 40.73106,
40.736521, 40.783714, 40.770194, 40.735855, 40.74943, 40.769753,
40.754465, 40.738775, 40.764868, 40.738332, 40.735017, 40.72535,
40.721466), Longitude = c(-73.824407, -73.818543, -73.811379,
-73.828293, -73.81778, -73.789712, -73.842932, -73.794319, -73.817349,
-73.799256, -73.829283, -73.839501, -73.783908, -73.807592, -73.808757,
-73.823358, -73.81781, -73.80937, -73.77844, -73.799841, -73.8185,
-73.817909, -73.809516, -73.806977, -73.805449, -73.813001, -73.795027,
-73.82762, -73.823583, -73.82141, -73.832001, -73.826204, -73.823892,
-73.826819, -73.82028, -73.821339, -73.811325, -73.820401), Windows.SEP.11 = c(48L,
154L, 11L, 62L, 20L, 72L, 9L, 37L, 8L, 22L, 9L, 47L, 44L, 99L,
78L, 91L, 42L, 122L, 55L, 14L, 162L, 108L, 89L, 87L, 23L, 14L,
75L, 74L, 141L, 73L, 43L, 14L, 534L, 189L, 128L, 10L, 79L, 38L
), Mac.SEP.11 = c(49L, 0L, 180L, 2L, 202L, 116L, 41L, 1L, 17L,
22L, 33L, 43L, 1L, 28L, 2L, 0L, 238L, 13L, 76L, 55L, 76L, 42L,
0L, 1L, 12L, 0L, 16L, 10L, 1L, 7L, 0L, 1L, 1L, 67L, 16L, 7L,
31L, 24L), Windows.SEP.12 = c(52L, 252L, 1L, 2L, 12L, 45L, 108L,
15L, 14L, 4L, 19L, 21L, 46L, 90L, 10L, 86L, 15L, 76L, 122L, 2L,
9L, 52L, 39L, 120L, 43L, 17L, 9L, 54L, 19L, 199L, 40L, 25L, 64L,
164L, 14L, 27L, 45L, 2L), Mac.SEP.12 = c(73L, 2L, 91L, 53L, 288L,
6L, 2L, 107L, 109L, 97L, 41L, 18L, 12L, 16L, 2L, 2L, 270L, 32L,
45L, 92L, 54L, 190L, 1L, 4L, 19L, 53L, 1L, 10L, 0L, 61L, 50L,
27L, 27L, 25L, 3L, 1L, 43L, 0L), newCol = c(56.3063063063063,
62.2549019607843, 32.5088339222615, 46.218487394958, 57.4712643678161,
21.3389121338912, 68.75, 76.25, 83.1081081081081, 69.6551724137931,
58.8235294117647, 30.2325581395349, 56.3106796116505, 45.4935622317597,
13.0434782608696, 49.1620111731844, 50.4424778761062, 44.4444444444444,
56.0402684563758, 57.6687116564417, 20.9302325581395, 61.734693877551,
31.0077519379845, 58.4905660377358, 63.9175257731959, 83.3333333333333,
9.9009900990099, 43.2432432432432, 11.8012422360248, 76.4705882352941,
67.6691729323308, 77.6119402985075, 14.5367412140575, 42.4719101123596,
10.5590062111801, 62.2222222222222, 44.4444444444444, 3.125)), .Names = c("School",
"Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12",
"Mac.SEP.12", "newCol"), row.names = c(NA, -38L), class = "data.frame")
You haven't provided us with any of the data, so I'm going to give an example using a dataset in the historydata package. Instead of getting a map based on a location and a zoom, you can get a map based on the bounding box of the latitudes and longitudes in your dataset.
library(historydata)
library(ggmap)
data("catholic_dioceses")
bbox <- make_bbox(catholic_dioceses$long, catholic_dioceses$lat, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=catholic_dioceses, aes(x = long, y = lat))
Note that the f = argument to make_bbox() lets you control how much padding there is around your map.
In your case, I think this will work:
library(ggmap)
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude,
color = Percent_SEP12_Assets),
size = 9, alpha = 0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
I have the following data frame
structure(list(Lightbox = c(84L, 67L, 80L, 63L, 76L, 66L, 79L,
81L, 77L, 82L, 84L, 67L, 80L, 63L, 76L, 66L, 79L, 81L, 77L, 82L,
84L, 67L, 80L, 63L, 76L, 66L, 79L, 81L, 77L, 82L, 84L, 67L, 80L,
63L, 76L, 66L, 79L, 81L, 77L, 82L, 84L, 67L, 80L, 63L, 76L, 66L,
79L, 81L, 77L, 82L), variable = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("S1",
"S2", "S3", "S4", "S5"), class = "factor"), value = c(82L, 65L,
73L, 50L, 50L, 50L, 72L, 56L, 76L, 78L, 88L, 66L, 71L, 60L, 54L,
55L, 63L, 68L, 73L, 75L, 73L, 65L, 76L, 57L, 51L, 57L, 75L, 65L,
69L, 66L, 77L, 67L, 79L, 58L, 55L, 56L, 77L, 66L, 73L, 80L, 78L,
62L, 78L, 52L, 63L, 59L, 71L, 64L, 69L, 89L), mean = c(83, 66,
76.5, 56.5, 63, 58, 75.5, 68.5, 76.5, 80, 86, 66.5, 75.5, 61.5,
65, 60.5, 71, 74.5, 75, 78.5, 78.5, 66, 78, 60, 63.5, 61.5, 77,
73, 73, 74, 80.5, 67, 79.5, 60.5, 65.5, 61, 78, 73.5, 75, 81,
81, 64.5, 79, 57.5, 69.5, 62.5, 75, 72.5, 73, 85.5), diff = c(2L,
2L, 7L, 13L, 26L, 16L, 7L, 25L, 1L, 4L, -4L, 1L, 9L, 3L, 22L,
11L, 16L, 13L, 4L, 7L, 11L, 2L, 4L, 6L, 25L, 9L, 4L, 16L, 8L,
16L, 7L, 0L, 1L, 5L, 21L, 10L, 2L, 15L, 4L, 2L, 6L, 5L, 2L, 11L,
13L, 7L, 8L, 17L, 8L, -7L)), .Names = c("Lightbox", "variable",
"value", "mean", "diff"), row.names = c(NA, -50L), class = "data.frame")
I wish to plot a bland altman graph, difference against mean for 5 facet groups S1->S5 which is easy enough
p <- ggplot(df_melt, aes(mean, diff))+ geom_point(na.rm=TRUE)+ facet_wrap(~variable)
However, I would also like to add some geom_hline to each facet showing the mean for each group and the standard deviations. If I had only one group I would do the following:
yintercepts_mean <- c(mean(df_melt$diff, na.rm = TRUE))
yintercepts_mean_r <- round(yintercepts_mean,3)
yintercepts_sd_p <- c(mean(df_melt$diff, na.rm = TRUE) + c(2) * sd(df_melt$diff, na.rm = TRUE))
yintercepts_sd_n <- c(mean(df_melt$diff, na.rm = TRUE) + c(-2) * sd(df_melt$diff, na.rm = TRUE))
yintercepts_sd_p_r <- round(yintercepts_sd_p,3)
yintercepts_sd_n_r <- round(yintercepts_sd_n,3)
#ylabels <- c("- 2SD", "+ 2SD", "Mean")
ylabels <- c("mean")
ylabels2 <- c("+ 2SD")
ylabels3 <- c("- 2SD")
p + geom_hline(yintercept = yintercepts_mean_r, linetype=1, color='blue') +
geom_hline(yintercept = yintercepts_sd_p_r, linetype=2, color='blue') +
geom_hline(yintercept = yintercepts_sd_n_r, linetype=2, color='blue')
How can I incorporate the above when facetting my data?
library(plyr)
df2 <- ddply(df_melt,.(variable),summarise,mean=mean(diff, na.rm = TRUE),
sd=sd(diff, na.rm = TRUE))
library(ggplot2)
p <- ggplot(df_melt, aes(mean, diff)) +
geom_point(na.rm=TRUE) +
geom_hline(data=df2,aes(yintercept=c(round(mean,3),
round(mean+2*sd,3),
round(mean-2*sd,3))),
linetype=c(1,2,2), color='blue') +
facet_wrap(~variable)
print(p)
I know that this is a very silly question but I cannot work out how to do it.
I want to subtract 300 from the values on each row on the trial1 column if they are larger than 299.
I tried:
sums[sums$trial1 > 299, ][,"trial1"] -= 300
but didn't work. SO far the only way I managed to get it to work is by splitting the data.frame using subset and then modifying it with :
sums$trial1 = sums$trial1 - 300
and then using rbind(). I am pretty sure that using subset and rbind is overkill, but I haven't been able to find a direct way yet...
I used dput to get a sample of my data.frame.
structure(list(part_no = c(10L, 10L, 10L, 10L, 10L, 10L), trial1 = c(294L,
296L, 298L, 300L, 302L, 304L), trial2 = c(295L, 297L, 299L, 301L,
303L, 305L), id1 = c(1.5, 1.5, 1.5, 2, 2, 2), id2 = c(1.5, 1.5,
1.5, 2, 2, 2), dist1 = c(141L, 141L, 115L, 126L, 177L, 141L),
width1 = c(77L, 77L, 63L, 42L, 59L, 47L), dist2 = c(143L,
135L, 146L, 255L, 327L, 369L), width2 = c(78L, 74L, 80L,
85L, 109L, 123L), ttime1 = c(1752L, 1681L, 1664L, 1798L,
1664L, 1697L), ttime2 = c(2563L, 1849L, 2067L, 1933L, 2118L,
2245L), no_clicks1 = c(8L, 8L, 8L, 8L, 8L, 8L), no_clicks2 = c(8L,
8L, 8L, 8L, 8L, 8L), no_ontarget1 = c(7L, 8L, 8L, 8L, 8L,
8L), no_ontarget2 = c(8L, 8L, 8L, 4L, 7L, 8L), e1 = c(1L,
0L, 0L, 0L, 0L, 0L), e2 = c(0L, 0L, 0L, 4L, 1L, 0L), rating = c(252,
252, 252, 252, 252, 252), prat = c(0.8, 0.8, 0.8, 0.8, 0.8,
0.8), ptim = c(-46.2899543378995, -9.9940511600238, -24.21875,
-7.5083426028921, -27.2836538461538, -32.2922804949912),
ptdiff = c(-47.0899543378995, -10.7940511600238, -25.01875,
-8.3083426028921, -28.0836538461538, -33.0922804949912),
pdist = c(-1.41843971631206, 4.25531914893617, -26.9565217391304,
-102.380952380952, -84.7457627118644, -161.702127659574),
pddiff = c(-2.21843971631206, 3.45531914893617, -27.7565217391304,
-103.180952380952, -85.5457627118644, -162.502127659574),
perr = c(100, NaN, NaN, -Inf, -Inf, NaN), pediff = c(99.2,
NaN, NaN, -Inf, -Inf, NaN)), .Names = c("part_no", "trial1",
"trial2", "id1", "id2", "dist1", "width1", "dist2", "width2",
"ttime1", "ttime2", "no_clicks1", "no_clicks2", "no_ontarget1",
"no_ontarget2", "e1", "e2", "rating", "prat", "ptim", "ptdiff",
"pdist", "pddiff", "perr", "pediff"), row.names = 148:153, class = "data.frame")
Thanks!
Your first attempt was close but there is no -= operator in base R, so you need to supply the subset on the right hand side as well.
sums[sums$trial1 > 299,"trial1"] <- sums[sums$trial1 > 299,"trial1"]-300
Why not just use mod(x,300) ?
[charcountfiller]