Related
I have a bar chart presenting the number of samples, with the number of responders and non-responders in multiple datasets. I want to order it according to the number of samples in each dataset, in an ascending manner (starting with lowest samples number and going to the top).
the code:
myData <- data.frame(Articles, Samples_number, Response, No_Response)
library(ggplot2)
myData |>
tidyr::pivot_longer(c(Response, No_Response)) |>
ggplot(aes(Articles, value, fill = name)) +
geom_col(position = position_stack()) +
theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1))
the plot:
The dataframe that contains all the information, myData :
structure(list(Articles = c("Allen.SKCM", "Auslander.SKCM", "Braun.KIRC",
"Cho.NSCLC", "Freeman.SKCM (MGH)", "He.THCA", "Hoffman.BLCA (IMvigor210)",
"Hugo.SKCM", "Liu.SKCM", "Lozano.SKCM", "McDermott.KIRC (IMmotion150)",
"Motzer.KIRC", "Newell.SKCM", "Pender.PAN", "Riaz.SKCM", "Rizos.SKCM",
"Snyder.BLCA"), Samples_number = c(49L, 181L, 44L, 14L, 354L,
165L, 298L, 47L, 39L, 25L, 21L, 53L, 9L, 82L, 165L, 119L, 16L
), Response = c(10L, 57L, 22L, 2L, 197L, 48L, 68L, 27L, 7L, 13L,
7L, 34L, 4L, 17L, 14L, 47L, 4L), No_Response = c(39L, 124L, 22L,
12L, 157L, 117L, 230L, 20L, 32L, 12L, 14L, 19L, 5L, 65L, 24L,
72L, 12L)), class = "data.frame", row.names = c(NA, -17L))
You can use the following code:
myData <- structure(list(Articles = c("Allen.SKCM", "Auslander.SKCM", "Braun.KIRC",
"Cho.NSCLC", "Freeman.SKCM (MGH)", "He.THCA", "Hoffman.BLCA (IMvigor210)",
"Hugo.SKCM", "Liu.SKCM", "Lozano.SKCM", "McDermott.KIRC (IMmotion150)",
"Motzer.KIRC", "Newell.SKCM", "Pender.PAN", "Riaz.SKCM", "Rizos.SKCM",
"Snyder.BLCA"), Samples_number = c(49L, 181L, 44L, 14L, 354L,
165L, 298L, 47L, 39L, 25L, 21L, 53L, 9L, 82L, 165L, 119L, 16L
), Response = c(10L, 57L, 22L, 2L, 197L, 48L, 68L, 27L, 7L, 13L,
7L, 34L, 4L, 17L, 14L, 47L, 4L), No_Response = c(39L, 124L, 22L,
12L, 157L, 117L, 230L, 20L, 32L, 12L, 14L, 19L, 5L, 65L, 24L,
72L, 12L)), class = "data.frame", row.names = c(NA, -17L))
library(ggplot2)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidyr)
myData %>%
pivot_longer(c(Response, No_Response)) %>%
group_by(Articles) %>%
mutate(total = sum(value)) %>%
ggplot(aes(x = Articles, y = value, fill = name)) +
geom_col(aes(x = reorder(Articles, total, sum), y = value), position = position_stack()) +
theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1))
Created on 2022-08-28 with reprex v2.0.2
use reorder function:
library(ggplot2)
myData |>
tidyr::pivot_longer(c(Response, No_Response)) |>
ggplot(aes(reorder(Articles, value), value, fill = name)) +
geom_col(position = position_stack()) +
theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1))
I am trying to plot a piecewise growth curve similar to this first plot. I used the separate slopes coding scheme and placed a breakpoint at time 2
| time | 0 | 1 | 2 | 5 | 10 | 15 | 20|
| time1 | 0 | 1 | 2 | 2 | 2 | 2 | 2 |
| time2 | 0 | 0 | 0 | 1 | 2 | 3 | 4 |
I used the following code to create my growth model
m1 <- lmer(sdmtwr ~ time1 + time2 + (time1 | id) + (0 + time2 | id), data = SDMT, REML = FALSE)
I'm also exploring an interaction with a 2-level categorical predictor with the following code
m2 <- lmer(sdmtwr ~ (time1 + time2)*edu + (time1 | id) + (0 + time2 | id), data = SDMT, REML = FALSE)
I've attempted to create the plots with the ggplot2, sjPlot, and effects packages to no avail, and I am at a loss due to limited programming experience. I have only ever been able to plot segments separately for both the baseline and interaction models.
If anyone could provide assistance on the appropriate code, I would appreciate it!
Edit: Here is the dput summary (edited for length to show edu, time1, and time2)
> dput(sdmt)
structure(list(id = c(3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 6L,
6L, 6L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 62L, 62L, 62L, 62L,
108L, 108L, 108L, 108L, 119L, 119L, 120L, 120L, 120L, 120L, 132L,
132L, 132L, 132L, 132L, 148L, 148L, 148L, 148L, 148L, 148L, 175L,
175L, 175L, 178L, 178L, 178L, 178L, 201L, 201L, 201L, 201L, 201L,
201L, 201L, 253L, 253L, 253L, 253L, 327L, 327L, 327L, 327L, 336L,
336L, 336L, 336L, 336L, 336L, 343L, 343L, 360L, 360L, 360L, 366L,
366L, 366L), time = c(0L, 2L, 10L, 15L, 20L, 5L, 10L, 15L, 2L,
2L, 15L, 20L, 0L, 1L, 2L, 5L, 10L, 15L, 20L, 5L, 10L, 15L, 20L,
0L, 2L, 15L, 20L, 0L, 2L, 0L, 10L, 15L, 20L, 0L, 1L, 5L, 10L,
20L, 1L, 2L, 5L, 10L, 15L, 20L, 0L, 1L, 2L, 0L, 1L, 2L, 5L, 0L,
1L, 2L, 5L, 10L, 15L, 20L, 0L, 1L, 5L, 15L, 0L, 1L, 10L, 20L,
0L, 1L, 5L, 10L, 15L, 20L, 0L, 10L, 1L, 5L, 10L, 0L, 10L, 15L
), sdmtwr = c(20L, 24L, 18L, 19L, 9L, 17L, 24L, 17L, 41L, 33L,
27L, 29L, 31L, 29L, 26L, 29L, 32L, 20L, 19L, 40L, 42L, 46L, 38L,
14L, 25L, 24L, 29L, 46L, 45L, 29L, 26L, 34L, 38L, 30L, 33L, 71L,
52L, 51L, 29L, 33L, 50L, 55L, 40L, 39L, 32L, 34L, 35L, 28L, 37L,
37L, 36L, 37L, 29L, 52L, 51L, 50L, 44L, 42L, 30L, 43L, 43L, 41L,
33L, 46L, 49L, 38L, 52L, 50L, 48L, 49L, 49L, 50L, 40L, 39L, 18L,
NA, 3L, 31L, 43L, 47L), time_seg1 = c(0, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2,
0, 2, 2, 2, 0, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 1, 2, 0, 1, 2,
2, 0, 1, 2, 2, 2, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 2,
2, 0, 2, 1, 2, 2, 0, 2, 2), time_seg2 = c(0, 0, 2, 3, 4, 1, 2,
3, 0, 0, 3, 4, 0, 0, 0, 1, 2, 3, 4, 1, 2, 3, 4, 0, 0, 3, 4, 0,
0, 0, 2, 3, 4, 0, 0, 1, 2, 4, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 1, 2, 3, 4, 0, 0, 1, 3, 0, 0, 2, 4, 0, 0, 1, 2,
3, 4, 0, 2, 0, 1, 2, 0, 2, 3), ed_dich = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, NA, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L), .Label = c("< HS",
">= HS"), class = "factor")), row.names = c(NA, -80L), class = "data.frame")
What I think you want is a piecewise linear spline. You can do this with a truncated power basis function. In your model, you would include time and a function that is time-2 if time is greater than 2 and 0 otherwise. This makes a piecewise linear function that meet each other at time=2. You can do this in the model as follows:
library(lme4)
mod <- lmer(sdmtwr ~ time + I(ifelse(time > 2, time-2, 0)) +
(1 |id), data=tmp, REML=TRUE)
Then, you could use the ggpredict() function from the ggeffects package to produce the plot:
library(ggeffects)
g <- ggpredict(mod, "time")
plot(g)
Note: I couldn't get it to run with random effects on the time variables, but with more data perhaps you'll be able to get it to work.
I have noted that different versions of this question have been asked throughout the years. However, I cannot find a solution that fits my script. I have seen several functions, but they do not quite do the thing.
Please find my data w1 and w2 below.
I want to add text manually below the legend as demonstrated below.
I use the following:
# My script
df <- data.frame(x = as.factor(c(w1$ny_stadie, w2$ny_stadie)),
y = c(w1$n.fjernet, w2$n.fjernet),
f = rep(c("N+", "N0"), c(nrow(w1), nrow(w2))))
df <- df[!is.na(df$x),]
ggplot(df) +
geom_boxplot(aes(x, y, fill = f, colour = f), outlier.alpha = 0, position = position_dodge(width = 0.78)) +
scale_x_discrete(name = "", label=c("X I\nn=113","X II\nn=102","X III\nn=115","X IV\nn=302")) +
scale_y_continuous(name="X", breaks=seq(0,130,10), limits=c(-5,130)) +
stat_boxplot(aes(x, y, colour = f), geom = "errorbar", width = 0.3,position = position_dodge(0.7753), size=1) +
geom_point(aes(x, y, fill = f, colour = f), size = 3, shape = 21, position = position_jitterdodge(), alpha=0.7) +
scale_fill_manual(values = c("#edf1f9", "#fcebeb"), name = "X",
labels = c("X", "X")) +
scale_colour_manual(values = c("#1C73C2", "red"), name = "X",
labels = c("X", "X"))
theme(axis.text.x = element_text(color = "grey20", size = 14), axis.title.x = element_text(color = "grey20", size = 14, face="bold", margin=margin(t=12))) +
theme(axis.text.y = element_text(color = "grey20", size = 11), axis.title.y = element_text(color = "grey20", size = 14, face="bold", margin=margin(r=12))) +
theme(legend.key = element_rect(fill = "white")) +
theme(legend.text=element_text(size=12)) + theme(legend.title=element_text(size=14))
And my data
# My Data
w1 <- structure(list(ny_stadie = structure(c(1, 1, 4, 4, 1, 3, 1, 4,
3, 2, 2, 3, 4, 4, 4, 2, 4, 4, 3, 4, 2, 4, 4, 4, 4, 1, 4, 4, 4,
2, 1, 2, 2, 3, 1, 4, 3, 1, 3, 1, 4, 4, 2, 1, 4, 1, 4, 2, 4, 2
), class = "AsIs"), n.fjernet = c(25L, 3L, 27L, 22L, 18L, 9L,
6L, 5L, 25L, 13L, 5L, 56L, 56L, 30L, 27L, 27L, 26L, 24L, 22L,
22L, 20L, 19L, 18L, 17L, 15L, 15L, 13L, 12L, 12L, 11L, 11L, 10L,
9L, 8L, 8L, 8L, 8L, 7L, 7L, 6L, 5L, 4L, 2L, 2L, 2L, 34L, 30L,
28L, 25L, 22L)), row.names = c(1L, 3L, 4L, 33L, 41L, 65L, 74L,
81L, 82L, 84L, 86L, 88L, 89L, 97L, 100L, 101L, 102L, 104L, 107L,
108L, 112L, 114L, 116L, 117L, 119L, 120L, 126L, 128L, 129L, 135L,
136L, 137L, 143L, 144L, 145L, 147L, 148L, 150L, 154L, 157L, 160L,
162L, 167L, 168L, 169L, 170L, 171L, 172L, 173L, 174L), class = "data.frame")
AND
w2 <- structure(list(ny_stadie = structure(c(4, 3, 4, 4, 4, 4, 4, 4,
3, 1, 3, 4, 3, 1, 1, 1, 4, 4, 3, 4, 4, 2, 2, 2, 2, 4, 3, 2, 1,
1, 4, 3, 2, 1, 1, 1, 4, 3, 4, 2, 4, 4, 4, 4, 3, 4, 2, 2, 4, 4
), class = "AsIs"), n.fjernet = c(10L, 13L, 9L, 7L, 7L, 7L, 6L,
6L, 5L, 4L, 3L, 37L, 26L, 19L, 17L, 15L, 9L, 57L, 55L, 33L, 33L,
33L, 28L, 27L, 27L, 26L, 23L, 23L, 23L, 22L, 21L, 21L, 20L, 20L,
19L, 18L, 18L, 18L, 17L, 17L, 16L, 16L, 16L, 15L, 15L, 15L, 14L,
14L, 13L, 13L)), row.names = c(2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L,
53L, 54L, 55L), class = "data.frame")
You can use grobs (graphical objects) from grid package.
See this following code:
library(grid)
library(gridExtra)
vp = viewport(x=.8725, y=.4, width=.15, height=.3, just="left", clip="off", angle=0)
pushViewport(vp)
tbl <- textGrob("Text here", gp = gpar(fontsize=12, col="blue"))
grid.draw(tbl)
upViewport()
So I have R program, and am struggling with getting all points in map
library(ggmap)
library(ggplot2)
setwd("d:/GIS/")
sep <- read.csv("SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
Percent_SEP12_Assets <- ifelse(sep[,8] >= 50, "Over 50", "Under 50")
# get the map
map <- get_map("Kissena Park, Queens", zoom = 13, maptype = 'roadmap')
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
And here is output map
I wish to zoom in enough without cutting out data points, but no matter location I pick on map, the data keeps getting cut, i.e. Removed 2 rows containing missing values (geom_point).
Is there a way to set boundaries based on the extremities of latitude and longitude? The csv I import at
sep <- read.csv("SEP_assets_csv.csv")
Has list of latitude and longitude.
Help!
Coordinates
Latitude Longitude
40.758365 -73.824407
40.774168 -73.818543
40.761748 -73.811379
40.765602 -73.828293
40.751762 -73.81778
40.764834 -73.789712
40.777951 -73.842932
40.76501 -73.794319
40.785959 -73.817349
40.755764 -73.799256
40.745593 -73.829283
40.789929 -73.839501
40.760072 -73.783908
40.726437 -73.807592
40.741093 -73.808757
40.720926 -73.823358
40.729642 -73.81781
40.724191 -73.80937
40.782346 -73.77844
40.778164 -73.799841
40.775122 -73.8185
40.760344 -73.817909
40.792326 -73.809516
40.78322 -73.806977
40.73106 -73.805449
40.736521 -73.813001
40.783714 -73.795027
40.770194 -73.82762
40.735855 -73.823583
40.74943 -73.82141
40.769753 -73.832001
40.754465 -73.826204
40.738775 -73.823892
40.764868 -73.826819
40.738332 -73.82028
40.735017 -73.821339
40.72535 -73.811325
40.721466 -73.820401
dput
> dput(sep)
structure(list(School = structure(1:38, .Label = c("Queens\\25Q020",
"Queens\\25Q021", "Queens\\25Q022", "Queens\\25Q023", "Queens\\25Q024",
"Queens\\25Q025", "Queens\\25Q029", "Queens\\25Q032", "Queens\\25Q079",
"Queens\\25Q107", "Queens\\25Q120", "Queens\\25Q129", "Queens\\25Q130",
"Queens\\25Q154", "Queens\\25Q163", "Queens\\25Q164", "Queens\\25Q165",
"Queens\\25Q168", "Queens\\25Q169", "Queens\\25Q184", "Queens\\25Q185",
"Queens\\25Q189", "Queens\\25Q193", "Queens\\25Q194", "Queens\\25Q200",
"Queens\\25Q201", "Queens\\25Q209", "Queens\\25Q214", "Queens\\25Q219",
"Queens\\25Q237", "Queens\\25Q242", "Queens\\25Q244", "Queens\\25Q425",
"Queens\\25Q460", "Queens\\25Q499", "Queens\\25Q515", "Queens\\25Q707",
"Queens\\25Q792"), class = "factor"), Latitude = c(40.758365,
40.774168, 40.761748, 40.765602, 40.751762, 40.764834, 40.777951,
40.76501, 40.785959, 40.755764, 40.745593, 40.789929, 40.760072,
40.726437, 40.741093, 40.720926, 40.729642, 40.724191, 40.782346,
40.778164, 40.775122, 40.760344, 40.792326, 40.78322, 40.73106,
40.736521, 40.783714, 40.770194, 40.735855, 40.74943, 40.769753,
40.754465, 40.738775, 40.764868, 40.738332, 40.735017, 40.72535,
40.721466), Longitude = c(-73.824407, -73.818543, -73.811379,
-73.828293, -73.81778, -73.789712, -73.842932, -73.794319, -73.817349,
-73.799256, -73.829283, -73.839501, -73.783908, -73.807592, -73.808757,
-73.823358, -73.81781, -73.80937, -73.77844, -73.799841, -73.8185,
-73.817909, -73.809516, -73.806977, -73.805449, -73.813001, -73.795027,
-73.82762, -73.823583, -73.82141, -73.832001, -73.826204, -73.823892,
-73.826819, -73.82028, -73.821339, -73.811325, -73.820401), Windows.SEP.11 = c(48L,
154L, 11L, 62L, 20L, 72L, 9L, 37L, 8L, 22L, 9L, 47L, 44L, 99L,
78L, 91L, 42L, 122L, 55L, 14L, 162L, 108L, 89L, 87L, 23L, 14L,
75L, 74L, 141L, 73L, 43L, 14L, 534L, 189L, 128L, 10L, 79L, 38L
), Mac.SEP.11 = c(49L, 0L, 180L, 2L, 202L, 116L, 41L, 1L, 17L,
22L, 33L, 43L, 1L, 28L, 2L, 0L, 238L, 13L, 76L, 55L, 76L, 42L,
0L, 1L, 12L, 0L, 16L, 10L, 1L, 7L, 0L, 1L, 1L, 67L, 16L, 7L,
31L, 24L), Windows.SEP.12 = c(52L, 252L, 1L, 2L, 12L, 45L, 108L,
15L, 14L, 4L, 19L, 21L, 46L, 90L, 10L, 86L, 15L, 76L, 122L, 2L,
9L, 52L, 39L, 120L, 43L, 17L, 9L, 54L, 19L, 199L, 40L, 25L, 64L,
164L, 14L, 27L, 45L, 2L), Mac.SEP.12 = c(73L, 2L, 91L, 53L, 288L,
6L, 2L, 107L, 109L, 97L, 41L, 18L, 12L, 16L, 2L, 2L, 270L, 32L,
45L, 92L, 54L, 190L, 1L, 4L, 19L, 53L, 1L, 10L, 0L, 61L, 50L,
27L, 27L, 25L, 3L, 1L, 43L, 0L), newCol = c(56.3063063063063,
62.2549019607843, 32.5088339222615, 46.218487394958, 57.4712643678161,
21.3389121338912, 68.75, 76.25, 83.1081081081081, 69.6551724137931,
58.8235294117647, 30.2325581395349, 56.3106796116505, 45.4935622317597,
13.0434782608696, 49.1620111731844, 50.4424778761062, 44.4444444444444,
56.0402684563758, 57.6687116564417, 20.9302325581395, 61.734693877551,
31.0077519379845, 58.4905660377358, 63.9175257731959, 83.3333333333333,
9.9009900990099, 43.2432432432432, 11.8012422360248, 76.4705882352941,
67.6691729323308, 77.6119402985075, 14.5367412140575, 42.4719101123596,
10.5590062111801, 62.2222222222222, 44.4444444444444, 3.125)), .Names = c("School",
"Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12",
"Mac.SEP.12", "newCol"), row.names = c(NA, -38L), class = "data.frame")
You haven't provided us with any of the data, so I'm going to give an example using a dataset in the historydata package. Instead of getting a map based on a location and a zoom, you can get a map based on the bounding box of the latitudes and longitudes in your dataset.
library(historydata)
library(ggmap)
data("catholic_dioceses")
bbox <- make_bbox(catholic_dioceses$long, catholic_dioceses$lat, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=catholic_dioceses, aes(x = long, y = lat))
Note that the f = argument to make_bbox() lets you control how much padding there is around your map.
In your case, I think this will work:
library(ggmap)
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude,
color = Percent_SEP12_Assets),
size = 9, alpha = 0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
In the data included below I have three sites (AAA,BBB,CCC) and individuals within each site (7, 12, 7 respectively). For each individual I have observed values (ObsValues) and three sets of predicted values each with a standard error. I have 26 rows (i.e. 26 individuals) and 9 columns.
The data is included here through dput()
help <- structure(list(StudyArea = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), .Label = c("AAA", "BBB", "CCC"), class = "factor"),
Ind = structure(1:26, .Label = c("AAA_F01", "AAA_F17", "AAA_F33",
"AAA_F49", "AAA_F65", "AAA_F81", "AAA_F97", "BBB_P01", "BBB_P02",
"BBB_P03", "BBB_P04", "BBB_P05", "BBB_P06", "BBB_P07", "BBB_P08",
"BBB_P09", "BBB_P10", "BBB_P11", "BBB_P12", "CCC_F02", "CCC_F03",
"CCC_F04", "CCC_F05", "CCC_F06", "CCC_F07", "CCC_F08"), class = "factor"),
ObsValues = c(22L, 50L, 8L, 15L, 54L, 30L, 11L, 90L, 6L,
53L, 9L, 42L, 72L, 40L, 60L, 58L, 1L, 20L, 37L, 2L, 50L,
68L, 20L, 19L, 58L, 5L), AAAPred = c(28L, 52L, 6L, 15L, 35L,
31L, 13L, 79L, 6L, 58L, 5L, 42L, 88L, 49L, 68L, 60L, 1L,
26L, 46L, 0L, 34L, 71L, 20L, 15L, 35L, 5L), AAAPredSE = c(3.5027829,
4.7852191, 1.231803, 2.5244013, 4.873907, 3.8854192, 2.3532752,
6.3444402, 1.7387295, 5.605111, 1.667818, 4.4709107, 7.0437967,
5.447496, 6.0840486, 5.4371275, 0.8156916, 3.5153847, 4.698754,
0, 3.8901103, 5.993616, 3.1720272, 2.6777869, 4.5647313,
1.4864128), BBBPred = c(14L, 43L, 5L, 13L, 26L, 32L, 14L,
80L, 5L, 62L, 4L, 44L, 67L, 44L, 55L, 42L, 1L, 20L, 47L,
0L, 26L, 51L, 15L, 16L, 34L, 6L), BBBPredSE = c(3.1873435,
4.8782831, 1.3739863, 2.5752273, 4.4155679, 3.8102168, 2.3419518,
6.364606, 1.7096028, 5.6333421, 1.5861323, 4.4951428, 6.6046699,
5.302902, 5.9244328, 5.1887055, 0.8268689, 3.4014041, 4.6600598,
0, 3.8510512, 5.5776686, 3.0569531, 2.6358433, 4.5273782,
1.4263518), CCCPred = c(29L, 53L, 7L, 15L, 44L, 32L, 15L,
86L, 8L, 61L, 5L, 46L, 99L, 54L, 74L, 67L, 1L, 30L, 51L,
1L, 37L, 94L, 21L, 17L, 36L, 6L), CCCPredSE = c(3.4634488,
4.7953389, 0.9484051, 2.5207022, 5.053452, 3.8072731, 2.2764727,
6.3605968, 1.6044067, 5.590048, 1.6611899, 4.4183913, 7.0124638,
5.6495918, 6.1091934, 5.4797929, 0.8135164, 3.4353934, 4.6261147,
0.8187396, 3.7936333, 5.6512378, 3.1686123, 2.633179, 4.5841921,
1.3989955)), .Names = c("StudyArea", "Ind", "ObsValues",
"AAAPred", "AAAPredSE", "BBBPred", "BBBPredSE", "CCCPred", "CCCPredSE"
), class = "data.frame", row.names = c(NA, -26L))
The head() and dim() of help are below too
head(help)
StudyArea Ind ObsValues AAAPred AAAPredSE BBBPred BBBPredSE CCCPred CCCPredSE
1 AAA AAA_F01 22 28 3.502783 14 3.187343 29 3.4634488
2 AAA AAA_F17 50 52 4.785219 43 4.878283 53 4.7953389
3 AAA AAA_F33 8 6 1.231803 5 1.373986 7 0.9484051
4 AAA AAA_F49 15 15 2.524401 13 2.575227 15 2.5207022
5 AAA AAA_F65 54 35 4.873907 26 4.415568 44 5.0534520
6 AAA AAA_F81 30 31 3.885419 32 3.810217 32 3.8072731
dim(help)
> dim(help)
[1] 26 9
I am a relative newcomer to ggplot and am trying to make a plot that displays the observed and predicted values for each individual with a different color for each StudyArea. I can manually add points and force the color with the code below, however this feel rather clunky and also does not produce a legend as I have not specified color in aes().
require(ggplot2)
ggplot(help, aes(x=Ind, y=ObsValues))+
geom_point(color="red", pch = "*", cex = 10)+
geom_point(aes(y = AAAPred), color="blue")+
geom_errorbar(aes(ymin=AAAPred-AAAPredSE, ymax=AAAPred+AAAPredSE), color = "blue")+
geom_point(aes(y = BBBPred), color="darkgreen")+
geom_errorbar(aes(ymin=BBBPred-BBBPredSE, ymax=BBBPred+BBBPredSE), color = "darkgreen")+
geom_point(aes(y = CCCPred), color="black")+
geom_errorbar(aes(ymin=CCCPred-CCCPredSE, ymax=CCCPred+CCCPredSE), color = "black")+
theme(axis.text.x=element_text(angle=30, hjust=1))
In the figure above, the asterisks are the observed values and the values are the predicted values, one from each StudyArea.
I tried to melt() the data, but ran into more problems plotting. That being said, I suspect melt()ing or reshape()ing is the best option.
Any suggestions on how to best alter/restructure the help data so that I can plot the observed and predicted values for each individual with a different color for each StudyArea would be greatly appreciated.
I also hope to produce a legend - the likely default once the data is correctly formatted
Note: Indeed the resulting figure is very busy will likely be simplified once I get a better handle on ggplot.
thanks in advance.
Try this:
library(reshape2)
x.value <- melt(help,id.vars=1:3, measure.vars=c(4,6,8))
x.se <- melt(help,id.vars=1:3, measure.vars=c(5,7,9))
gg <- data.frame(x.value,se=x.se$value)
ggplot(gg)+
geom_point(aes(x=Ind, y=ObsValues),size=5,shape=18)+
geom_point(aes(x=Ind, y=value, color=variable),size=3, shape=1)+
geom_errorbar(aes(x=Ind, ymin=value-se, ymax=value+se, color=variable))+
theme(axis.text.x=element_text(angle=-90))
Produces this:
Edit:: Response to #B.Davis' questions below:
You have to group the ObsValues by StudyArea, not variable. But when you do that you get six colors, three for StudyArea and three for the predictor groups (variable). If we give the predictor groups (e.g., AAAPred, etc.) the same names as the StudyArea groups (e.g. AAA, etc.), then ggplot just generates three colors.
gg$variable <- substring(gg$variable,1,3) # removes "Pred" from group names
ggplot(gg)+
geom_point(aes(x=Ind, y=ObsValues, color=StudyArea),size=5,shape=18)+
geom_point(aes(x=Ind, y=value, color=variable),size=3, shape=1)+
geom_errorbar(aes(x=Ind, ymin=value-se, ymax=value+se, color=variable))+
theme(axis.text.x=element_text(angle=-90))
Produces this:
Similar to #jlhoward solution but I choose to treat ObsValues as a variable to get it in the legend.
help <- dat
x.value <- melt(help,id.vars=1:2, measure.vars=c(3,4,6,8))
x.se <- melt(help,id.vars=1:2, measure.vars=c(3,5,7,9))
gg <- data.frame(x.value,se=x.se$value)
ggplot(gg)+
geom_point(aes(x=Ind, y=value, color=variable),size=3, shape=1)+
geom_errorbar(data= subset(gg,variable!='ObsValues'),
aes(x=Ind, ymin=value-se, ymax=value+se, color=variable))+
theme(axis.text.x=element_text(angle=-90))
This is a little clumsy, but gets you what you want:
# jlhoward's melting is more elegant.
require(reshape2)
melted.points<-melt(help[,c('Ind','ObsValues','AAAPred','BBBPred','CCCPred')])
melted.points$observed<-ifelse(melted.points$variable=='ObsValues','observed','predicted')
melted.points.se<-melt(help[,c('Ind','AAAPredSE','BBBPredSE','CCCPredSE')])
melted.points.se$variable<-gsub('SE','',melted.points.se$variable,)
help2<-merge(melted.points,melted.points.se,by=c('Ind','variable'),all.x=TRUE)
help2<-rename(help2,c(value.x='value',value.y='se'))
And now the actual plot:
ggplot(help2,aes(x=Ind,y=value,color=variable,size=observed,shape=observed,ymin=value-se,ymax=value+se)) +
geom_point() +
geom_errorbar(size=1) +
scale_colour_manual(values = c("red","blue","darkgreen", "black")) +
scale_size_manual(values=c(observed=4,predicted=3)) +
scale_shape_manual(values=c(observed=8,predicted=16))