How could I insert a histogram into a `geom_smooth` plot?

How could I insert a histogram into a `geom_smooth` plot? - r

I am trying to mimic some figures from journal papers. Here is an example from Schlenker and Roberts (2009).
I'd like to add a similar histogram to my own plot. Please see below. Is it possible to achieve this task with ggplot? Thanks.
See a dput data below. rh represents x axis and yhat1 indicates the y axis.
> dput(df.m[,c('rh','yhat1')])
structure(list(rh = c(11L, 13L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L,
47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L,
60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L,
73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L,
86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L,
99L, 100L), yhat1 = c(0.0097784, 0.111762325, 0.0887123966666667,
0.24714677, 0.079887235, 0.162714825, 0.24789043, 0.107558165,
0.182885584545455, 0.136690964444444, 0.159203683333333, 0.5156053805,
0.587034213636364, 0.233377613, 0.31531245, 0.4778449572, 0.212574774137931,
0.2274105676, 0.253733041707317, 0.560999839354839, 0.224892959444444,
0.392268151304348, 0.351498776603774, 0.366547010727273, 0.35013903469697,
0.382026272372881, 0.510611202461538, 0.391176294871795, 0.423356474328358,
0.380316089137931, 0.459821489651163, 0.388949226593407, 0.506833284166667,
0.459263999259259, 0.558535709906542, 0.745323656071429, 0.60167464606383,
0.72210854266129, 0.695203745656566, 0.638265557105263, 0.52373110503876,
0.611695133046875, 0.963833986386555, 0.803060819275362, 0.837984669112426,
0.7931166204, 0.870764136976744, 1.21005393820225, 0.862845527777778,
1.028402381125, 1.2077895633526, 1.01176334204082, 1.08139833964706,
0.90346288, 1.05871937863014, 1.27788244930233, 1.16250975336634,
1.1450916525, 1.4412301412, 1.21264826238281, 1.35417930411504,
1.18588206727273, 1.40277204710084, 1.33194569259259, 1.18413544210084,
1.22718163528571, 1.33992107226667, 1.44770425268156, 1.43974964777778,
1.26656031551351, 1.58998655363636, 1.29994566024272, 1.46398530493902,
1.26061274530055, 1.30718501225275, 1.20523443567901, 1.23789593428571,
1.34433582230769, 1.36438752851852, 1.5915544857037, 1.10979387898438,
1.31898147708661, 1.426120105, 1.52075980155738, 1.40629729460177,
0.9048366681, 1.2973945580531, 1.37696154192982)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -88L))

Hopefully this can get you started:
library(ggplot2)
breaks <- 20
maxcount <- max(table(cut(df.m$rh,breaks = 20))) + 1
ggplot(data = df.m, aes(x = rh)) +
stat_smooth(formula = y ~ x, aes(y = yhat1 * 10 + maxcount), method = "loess") +
scale_y_continuous(breaks = c(0,5), "Exposure (Days)",
sec.axis = sec_axis(~ (. - maxcount) /10,
"Log of Daily Confirmed Case Counts")) +
geom_histogram(bins = breaks, color = "black", fill = "green") +
geom_segment(aes(x = 85, xend = 85, y = 0 + maxcount, yend = Inf),
col = "red", linetype = "dashed") +
labs(x = "Relative Humidity Percentage") + theme_classic() +
theme(axis.line.y.left = element_line(color= "green"),
axis.title.y.left = element_text(hjust = 0.05, color = "green"))

Related

Plotting multiple different histograms based on vector of column names

I have the following dataframe that I want to plot a histogram for each column:
structure(list(ACTB = c(11.7087918, 13.1847403, 8.767737, 12.2949669,
12.399929, 12.130683, 9.816222, 10.700336, 11.862543, 12.479818,
12.48152, 11.798277, 12.0932696, 11.014992, 12.3496682, 11.9810211,
11.946094, 12.1517049, 11.6794028, 12.4895911, 12.787039, 12.2927522,
12.746232, 12.4428358, 11.6382198, 11.6833202, 12.3320067, 12.390378,
12.5550587, 11.597384, 11.7608624, 12.018702, 11.9211984, 11.7143178,
11.800693, 12.7543979, 12.7028472, 11.6509804, 11.5112258, 12.36468,
12.0704304, 12.5876125, 12.2929857, 11.764464, 12.3740263, 12.275172,
11.5247418, 11.9290723, 11.100383, 12.5631062, 10.647334, 12.265323,
11.457643, 12.194339, 11.468173, 12.355388, 12.3233796, 12.200504,
11.716417, 12.430028, 11.3201558, 11.43911, 12.9782049, 11.139062,
11.181185, 10.123614, 11.963833, 10.919224, 11.873896, 11.800616,
12.2159602, 11.6360763, 11.6204291, 11.5500821, 12.6783682, 11.918854,
11.8701782, 10.98058, 11.6254916, 12.1558646, 11.533709, 12.0096358,
12.2830638, 11.772724, 11.8853726, 12.041823, 12.623814, 12.3134903,
11.6714245, 12.1333082, 12.4747336, 11.5326378, 12.6222532, 10.922728,
10.9492515, 11.3410073, 12.3005053), ATP5F1 = c(8.3731175, 8.3995189,
8.871088, 8.4389342, 8.529104, 9.004405, 8.883721, 8.70097, 8.24411,
8.393635, 8.76813, 8.756177, 8.4418168, 7.986864, 8.4840108,
8.6523954, 8.5645576, 8.2452877, 8.2440872, 8.7155973, 9.028364,
8.3578703, 9.007441, 7.8892308, 9.0255621, 8.3165712, 8.3400111,
8.061171, 8.5216917, 8.337517, 8.2341439, 8.810458, 8.8794988,
8.4657149, 8.311901, 8.131606, 8.5865282, 9.0900416, 8.8407707,
7.437107, 8.3982759, 8.7610335, 8.3624475, 8.353429, 8.3630127,
8.555639, 8.6435841, 8.9587154, 8.517079, 8.9597121, 8.111514,
8.99767, 8.266991, 8.106218, 8.518875, 8.445485, 8.6409752, 8.662025,
8.697312, 8.071819, 8.3113401, 8.709276, 8.9154896, 8.138148,
6.866765, 9.391611, 8.448086, 8.29189, 8.541953, 8.801044, 8.3088083,
8.288688, 8.8357729, 8.4731257, 8.7321095, 8.383259, 8.4729561,
5.551528, 8.526436, 8.4548827, 8.242625, 8.9862422, 8.5688994,
8.848029, 8.2656363, 8.434976, 8.8023704, 8.6692361, 8.4333198,
8.2926568, 8.2141276, 8.3246346, 7.7262395, 8.0797336, 8.7005427,
8.7695946, 8.1262312), DDX5 = c(11.3122241, 11.7042284, 8.866042,
12.0376754, 12.417701, 11.479431, 10.078783, 9.043405, 11.216074,
11.846906, 11.161803, 8.713301, 11.0790887, 11.685125, 11.9599302,
12.4036502, 11.9778411, 11.9900709, 11.6069971, 11.2651929, 11.455536,
12.3741866, 11.558182, 11.498146, 12.5073231, 11.4546523, 11.8465482,
11.51445, 11.721283, 12.340818, 11.5388553, 11.920725, 11.7067172,
11.6207138, 11.638226, 11.1407525, 11.5832407, 11.981909, 11.7684202,
12.435987, 11.5253382, 10.9882446, 12.1789747, 11.956257, 12.5427815,
12.007658, 11.6360041, 12.2520109, 11.858959, 12.4740761, 6.927855,
11.117424, 7.749824, 11.518817, 11.322855, 11.74096, 11.768474,
11.497009, 11.912888, 11.570506, 11.8167398, 11.912566, 11.2631437,
11.328946, 11.072161, 12.807216, 12.127281, 12.125497, 11.524622,
11.20101, 11.5451414, 12.0747211, 11.5716524, 11.7223929, 11.8529683,
11.868865, 11.8998228, 9.859857, 12.1404707, 11.9166386, 12.613162,
12.9062351, 11.6691732, 11.984726, 11.727059, 11.421816, 11.9506736,
12.2447547, 11.8167228, 11.9021356, 12.5527606, 12.6511506, 11.8550833,
11.382018, 11.8314198, 11.8394352, 11.8128198), EEF1G = c(12.622405,
11.2945857, 8.610078, 13.1323891, 12.702769, 12.319703, 10.181874,
8.615338, 11.526551, 12.106198, 11.602801, 9.137166, 13.0991666,
13.049641, 12.2938678, 11.7442632, 12.7866184, 12.6753617, 12.9552413,
12.0861518, 13.136434, 12.64865, 13.298616, 11.8531038, 12.7791485,
13.4150478, 11.636058, 12.013313, 11.8785493, 12.771945, 12.5351321,
13.147321, 11.6760014, 12.2604174, 11.802344, 12.23351, 12.1175728,
12.7360727, 12.5730595, 11.13, 11.7737462, 11.9774565, 11.8927844,
12.17392, 12.441605, 12.221691, 12.4866463, 12.5645763, 12.070268,
12.1801377, 8.80704, 12.288168, 8.298831, 12.234659, 11.832415,
12.474423, 12.4440819, 11.888544, 11.625162, 12.161204, 12.2707656,
12.941017, 12.3491325, 12.978561, 11.833124, 11.782119, 12.273029,
12.462202, 12.538127, 12.236135, 12.2884941, 12.4195123, 12.5274317,
12.3917089, 11.912339, 12.439751, 12.0962051, 10.912737, 11.999598,
12.3776528, 11.348448, 12.4151316, 11.5389366, 11.328957, 12.4397802,
12.238454, 12.0192408, 12.2290439, 12.8381542, 11.1834666, 12.0636739,
12.4752125, 12.7681644, 12.1747129, 12.7343662, 12.3493937, 11.7971488
)), class = "data.frame", row.names = c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L,
33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L,
46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L,
59L, 60L, 61L, 62L, 63L, 64L, 66L, 67L, 68L, 69L, 70L, 71L, 72L,
73L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L,
87L, 88L, 89L, 90L, 91L, 92L, 93L, 97L, 98L, 99L, 100L, 102L,
103L))
I want to create a grid of histograms for each column, the list of column is:
HK_GENES = c(
"ACTB", "ATP5F1", "DDX5", "EEF1G"
)
Is there a way of doing it with ggplot2?
I tried with no success the following:
ggplot(data=df_hk_genes, aes_string(x=HK_GENES)) +
geom_histogram(bins=15) +
facet_wrap(HK_GENES, nrow = 5, scale = "free_x")
In python I could create a subfigure for each histogram an iterate over it.
I have around 20 column in my original dataframe, and I want to avoid calling the same block with different column

You can reshape the data and facet over the groups.
library(reshape2)
library(dplyr)
melt(df_hk_genes) %>%
ggplot(aes(x = value)) +
facet_wrap(~ variable, nrow = 5, scale = "free_x") +
geom_histogram(bins=15)

Non-linear regression vs log model

library(ggplot2)
dat <- structure(list(y = c(52L, 63L, 59L, 58L, 57L, 54L, 27L, 20L, 15L, 27L, 27L, 26L, 70L, 70L, 70L, 70L, 70L, 70L, 45L, 42L, 41L, 55L, 45L, 39L, 51L,
64L, 57L, 39L, 59L, 37L, 44L, 44L, 38L, 57L, 50L, 56L, 66L, 66L, 64L, 64L, 60L, 55L, 52L, 57L, 47L, 57L, 64L, 63L, 49L, 49L,
56L, 55L, 57L, 42L, 60L, 53L, 53L, 57L, 56L, 54L, 42L, 45L, 34L, 52L, 57L, 50L, 60L, 59L, 52L, 42L, 45L, 47L, 45L, 51L, 39L,
38L, 42L, 33L, 62L, 57L, 65L, 44L, 44L, 39L, 46L, 49L, 52L, 44L, 43L, 38L),
x = c(122743L, 132300L, 146144L, 179886L, 195180L, 233605L, 1400L, 1400L, 3600L, 5000L, 14900L, 16000L, 71410L, 85450L, 106018L,
119686L, 189746L, 243171L, 536545L, 719356L, 830031L, 564546L, 677540L, 761225L, 551561L, 626799L, 68618L, 1211267L, 1276369L,
1440113L, 1153720L, 1244575L, 1328641L, 610452L, 692624L, 791953L, 4762522L, 5011232L, 5240402L, 521339L,
560098L, 608641L, 4727833L, 4990042L, 5263899L, 1987296L, 2158704L, 2350927L, 7931905L, 8628608L, 8983683L, 2947957L, 3176995L, 3263118L,
55402L, 54854L, 55050L, 52500L, 72000L, 68862L, 1158244L, 1099976L, 1019490L, 538146L, 471219L, 437954L, 863592L, 661055L,
548097L, 484450L, 442643L, 404487L, 1033728L, 925514L, 854793L, 371420L, 285257L, 260157L, 2039241L, 2150710L, 1898614L,
1175287L, 1495433L, 1569586L, 2646966L, 3330486L, 3282677L, 745784L, 858574L, 1119671L)),
class = "data.frame", row.names = c(NA, -90L))
ggplot(dat, aes(x = x, y = y)) + geom_point()
The relationship seems like a non-linear relationship. Hence I will fitted a model where I logged y and x
mod.lm <- lm(log(y) ~ log(x), data = dat)
ggplot(dat, aes(x = log(x), y = log(y))) + geom_point() + geom_smooth(method = "lm")
However, I can see that for lower values, the log-transformation results in big differences as shown by the residuals. I then moved to non linear least square method. I have not used this before but using this post
Why is nls() giving me "singular gradient matrix at initial parameter estimates" errors?
c.0 <- min(dat$y) * 0.5
model.0 <- lm(log(y - c.0) ~ x, data = dat)
start <- list(a = exp(coef(model.0)[1]), b = coef(model.0)[2], c = c.0)
model <- nls(y ~ a * exp(b * x) + c, data = dat, start = start)
Error in nls(y ~ a * exp(b * x) + c, data = dat, start = start) :
step factor 0.000488281 reduced below 'minFactor' of 0.000976562
Can anyone advise me what does this error mean and how to fit a nls model to the above data?

In your case nls get in problems as your starting values are not good and you introduced the coefficient c which is not there in the linearized form.
To fit your nls you can do it the following way, with better staring values and removing the coefficient c:
mod.glm <- glm(y ~ x, dat=dat, family=poisson(link = "log"))
start <- list(a = coef(mod.glm)[1], b = coef(mod.glm)[2])
mod.nls <- nls(y ~ exp(a + b * x), data = dat, start = start)
I would recommend to use glm, as shown above, instead of nls to find the coefficients.
If the estimate of the linearized model (mod.lm) should not have a bias you need to adjust it.
mod.lm <- lm(log(y) ~ log(x), data = dat)
mean(dat$y) #50.44444
mean(predict(mod.glm, type="response")) #50.44444
mean(predict(mod.nls)) #50.44499
mean(exp(predict(mod.lm))) #49.11622 !
f <- log(mean(dat$y) / mean(exp(predict(mod.lm)))) #bias corection for a
mean(exp(coef(mod.lm)[1] + f + coef(mod.lm)[2]*log(dat$x))) #50.44444
In case you want to get the coefficients given from James Phillips in the comments by your own, you can try:
mod.nlsJP <- nls(y ~ a * (x^(b*x)) + offset, data=dat, start=list(a=-30, b=-5e-6, offset=50))

R - barplot grouping columns

I have the following data which contains data from 7 combinations (rows) and 12 methods (columns).
structure(list(Beams = structure(c(1L, 3L, 4L, 5L, 6L, 7L, 2L
), .Label = c("1 – 2", "1 – 2 – 3 – 4", "1 – 3", "1 – 4", "2 – 3",
"2 – 4", "3 – 4"), class = "factor"), Slope...No.weight = c(75L,
65L, 45L, 30L, 95L, 70L, 75L), Slope...W1 = c(85L, 70L, 65L,
55L, 90L, 85L, 75L), Slope...W2 = c(80L, 65L, 65L, 50L, 90L,
90L, 75L), Slope...W3 = c(80L, 75L, 75L, 65L, 90L, 95L, 80L),
Average.Time...No.Weight = c(75L, 65L, 45L, 30L, 95L, 70L,
70L), Average.Time...W1 = c(70L, 60L, 75L, 60L, 75L, 75L,
80L), Average.Time...W2 = c(65L, 40L, 65L, 50L, 75L, 85L,
70L), Average.Time...W3 = c(65L, 40L, 80L, 75L, 65L, 85L,
80L), Momentum...No.weight = c(80L, 60L, 45L, 30L, 95L, 70L,
75L), Momentum...W1 = c(85L, 75L, 60L, 55L, 95L, 90L, 80L
), Momentum...W2 = c(80L, 65L, 70L, 50L, 90L, 90L, 85L),
Momentum...W3 = c(85L, 75L, 75L, 55L, 90L, 95L, 80L)), .Names = c("Beams",
"Slope...No.weight", "Slope...W1", "Slope...W2", "Slope...W3",
"Average.Time...No.Weight", "Average.Time...W1", "Average.Time...W2",
"Average.Time...W3", "Momentum...No.weight", "Momentum...W1",
"Momentum...W2", "Momentum...W3"), class = "data.frame", row.names = c(NA,
-7L))
I would like to get a barplot like the one below:
I've tried with
library(RColorBrewer)
dat<-read.csv("phaser-p13-30dBm-100ms.csv")
names <- c("1-2","1-3","1-4","2-3","2-4","3-4","1-2-3-4")
barx <-
barplot(as.integer(dat2[,2:13]),
beside=TRUE,
col=brewer.pal(12,"Set3"),
names.arg=names,
ylim=c(0,100),
xlab='Combination of beams',
ylab='Correct detection [%]')
box()
par(xpd=TRUE)
legend("top", c("Slope - No weight","Slope - W1","Slope - W2","Slope - W3","Average Time - No weight","Average Time - W1","Average Time - W2","Average Time - W3","Momentum - No weight","Momentum - W1","Momentum - W2","Momentum - W3"), fill = brewer.pal(12,"Set3"),horiz = T)
but I got this error:
Error in barplot.default(as.integer(dat2[, 2:13]), beside = TRUE, col = brewer.pal(12, :
incorrect number of names
Could you find the error?

I've named you dataframe df here and made use of three packages. This is not a base R solution. Given your dataset format, this is the easiest way (IMO) to do this:
library(dplyr)
library(tidyr)
library(ggplot2)
df %>% # dataframe
gather(variable, value, -Beams) %>% # convert to long format excluding beams column
ggplot(aes(x=Beams, y=value, fill=variable)) + # plot the bar plot
geom_bar(stat='identity', position='dodge')

This should get you started, if you wish to use base graphics and not ggplot2:
df <- as.matrix(dat[,-1])
rownames(df) <- dat[, 1]
barplot(df, beside = TRUE, las = 2)

Use ggplot2 package and make sure that your data is neat and ordered?
something like ggplot(dataframe, aes(colour = some_factor))) + geom_bar(aes(x=Some_variable, y=Some_other_variable))
More explict statement as to how your data matches the image would be useful.

Legends for two different sized series in ggplot2#R

Is there a way to get legends for two series when plotted using ggplot in R?
May be I am missing some silly (should have known) argument in the function. I did not find the answer on the internet.
Here is the data:
df1 <- structure(list(time = structure(c(1352804400, 1352804430, 1352804460,
1352804490, 1352804520, 1352804550, 1352804580, 1352804610, 1352804640,
1352804670, 1352804700, 1352804730, 1352804760, 1352804790, 1352804820,
1352804850, 1352804880, 1352804910, 1352804940, 1352804970, 1352805000,
1352805030, 1352805060, 1352805090, 1352805120, 1352805150, 1352805180,
1352805210, 1352805240, 1352805270, 1352805300, 1352805330, 1352805360,
1352805390, 1352805420, 1352805450, 1352805480, 1352805510, 1352805540,
1352805570), class = c("POSIXct", "POSIXt"), tzone = ""), VE = c(36L,
31L, 32L, 55L, 39L, 45L, 46L, 60L, 56L, 53L, 58L, 60L, 30L, 38L,
55L, 40L, 47L, 52L, 33L, 34L, 58L, 38L, 39L, 33L, 39L, 50L, 38L,
32L, 32L, 41L, 44L, 35L, 48L, 51L, 59L, 35L, 51L, 56L, 39L, 35L
)), .Names = c("time", "VE"), row.names = c(NA, -40L), class = "data.frame")
df2 <- structure(list(time = structure(c(1352804400, 1352804430, 1352804460,
1352804490, 1352804520, 1352804550, 1352804580, 1352804610, 1352804640,
1352804670, 1352804700, 1352804730, 1352804760, 1352804790, 1352804820,
1352804850, 1352804880, 1352804910, 1352804940, 1352804970, 1352805000,
1352805030, 1352805060, 1352805090, 1352805120, 1352805150, 1352805180,
1352805210, 1352805240, 1352805270), class = c("POSIXct", "POSIXt"
), tzone = ""), VE = c(47L, 45L, 45L, 40L, 42L, 40L, 48L, 48L,
43L, 44L, 44L, 46L, 42L, 49L, 41L, 48L, 47L, 44L, 44L, 48L, 47L,
42L, 42L, 40L, 47L, 46L, 50L, 49L, 46L, 49L)), .Names = c("time",
"VE"), row.names = c(NA, -30L), class = "data.frame")
Here is the code:
ggplot(df1,aes(x=time, y=VE))+geom_line(color='red',size=1)+geom_line(data=df2,aes(x=time, y=VE),colour="blue",size=2)

Specifically, implementing #baptiste's comment:
dff <- rbind(data.frame(s=factor(1),df1),
data.frame(s=factor(2),df2))
ggplot(dff,aes(x=time, y=VE,colour=s,size=s))+
geom_line()+
scale_colour_manual(values=c("red","blue"))+
scale_size_manual(values=1:2)

Using #Ben's answer and comments, removing the extra-legend, and then renaming the legend title I wrote:
dff <- rbind(data.frame(s=factor(1),df1),data.frame(s=factor(2),df2))
ggplot(dff,aes(x=time, y=VE,colour=s))+geom_line()+scale_colour_manual(values=c("red","blue"),labels=c('My label-1','My label-2'),name='Legend Title')+ scale_size_manual(values=c("red","blue"))+theme(axis.title.x = element_text(face="bold", size=16),axis.title.y= element_text(face="bold",size=16),axis.text.x = element_text(angle=0, vjust=0.5, size=14),axis.text.y = element_text(angle=0, vjust=0.5, size=14))
and got

maps of subregion in R

I am trying to plot geographical location on a map. I am using ggplot2 in combination with maps libraries.
Everything is nice except for on region that is very crowded. I would like to make a zoom in on this region but I am not able.
So I would like to zoom on the Galapagos island.
Here is the script I have used:
library(ggplot2)
library(maps)
measurements <- read.csv("all_podo.count.csv", header=T)
allworld <- map_data("world")
d <- ggplot(measurements, aes(long, lat)) +
geom_polygon(data = allworld, xlim = c(-50, 100), aes(x = long, y = lat, group = group),
colour = "grey70", fill = "grey70") +
geom_point(size = 0.7, shape = 8) +
opts(axis.title.x = theme_blank(),
axis.title.y = theme_blank()) +
geom_text(aes(label = name), size = 1, vjust = 0, hjust = 0)
d
The measurements look like this:
structure(list(site_num = c(59L, 54L, 44L, 42L, 38L, 37L, 43L,
39L, 36L, 40L, 34L, 35L, 33L, 41L, 32L, 31L, 30L, 29L, 28L, 27L,
26L, 25L, 24L, 23L, 22L, 21L, 20L, 19L, 18L, 17L, 16L, 15L, 14L,
13L, 12L, 11L, 10L, 7L, 8L, 9L, 1L, 3L, 6L, 5L, 79L, 77L, 78L,
76L, 75L, 74L, 80L, 81L, 72L, 73L, 71L, 70L, 69L, 68L, 66L, 67L,
65L, 63L, 64L, 62L, 60L, 61L), name = structure(c(44L, 43L, 42L,
40L, 36L, 35L, 41L, 37L, 34L, 38L, 32L, 33L, 31L, 39L, 30L, 29L,
28L, 27L, 26L, 25L, 24L, 23L, 22L, 21L, 20L, 19L, 18L, 17L, 16L,
15L, 14L, 13L, 12L, 11L, 10L, 9L, 8L, 5L, 6L, 7L, 1L, 2L, 4L,
3L, 64L, 62L, 63L, 61L, 60L, 59L, 65L, 66L, 57L, 58L, 56L, 55L,
54L, 53L, 51L, 52L, 50L, 48L, 49L, 47L, 45L, 46L), .Label = c("GS000a",
"GS000b", "GS000c", "GS000d", "GS001a", "GS001b", "GS001c", "GS002",
"GS003", "GS004", "GS005", "GS006", "GS007", "GS008", "GS009",
"GS010", "GS011", "GS012", "GS013", "GS014", "GS015", "GS016",
"GS017", "GS018", "GS019", "GS020", "GS021", "GS022", "GS023",
"GS025", "GS026", "GS027", "GS028", "GS029", "GS030", "GS031",
"GS032", "GS033", "GS034", "GS035", "GS036", "GS037", "GS047",
"GS051", "GS108a", "GS108b", "GS109", "GS110a", "GS110b", "GS111",
"GS112a", "GS112b", "GS113", "GS114", "GS115", "GS116", "GS117a",
"GS117b", "GS119", "GS120", "GS121", "GS122a", "GS122b", "GS123",
"GS148", "GS149"), class = "factor"), lat = c(-15.143611, -10.131389,
-1.9738889, 1.3891667, -0.3011111, 0.27222222, -0.020833334,
-0.5938889, -0.2, -1.2283334, -1.2161111, -1.2169445, 1.2641667,
-0.38305557, 5.552778, 5.64, 6.492778, 8.129167, 9.164444, 10.716389,
18.036667, 20.5225, 24.174723, 24.488333, 32.506943, 36.003887,
38.946945, 39.417778, 38.94, 41.09111, 41.485832, 43.63222, 45.111668,
44.690277, 44.137222, 42.85278, 42.503056, 32.166668, 32.166668,
32.166668, 31.175, 31.175, 31.175, 32.174835, -32.399166, -30.898333,
-30.898333, -29.348888, -26.035, -23.21611, -6.3166666, -6.3166666,
-4.613611, -4.613611, -4.635, -4.6625, -4.990278, -7.0075, -8.505,
-8.505, -9.596945, -10.446111, -10.446111, -10.943611, -12.0925,
-12.0925), long = c(-147.435, -135.44945, -95.014725, -91.81695,
-91.651665, -91.63333, -91.19778, -91.06944, -90.83528, -90.42917,
-90.422775, -90.319725, -90.295, -90.279724, -87.087776, -86.56528,
-82.90389, -79.69111, -79.83611, -80.25445, -83.78472, -85.41361,
-84.344444, -83.07, -79.263885, -75.39472, -76.41722, -75.504166,
-74.685, -71.60222, -71.35111, -66.84722, -64.94666, -63.637222,
-63.644444, -66.217224, -67.24, -64.5, -64.5, -64.5, -64.32433,
-64.32433, -64.32433, -64.01017, 36.591946, 40.420277, 40.420277,
43.215557, 50.123055, 52.30611, 39.009167, 39.009167, 55.50861,
55.50861, 56.836113, 60.523056, 64.97667, 76.33139, 80.37556,
80.37556, 84.1975, 88.30278, 88.30278, 92.05889, 96.88167, 96.88167
)), .Names = c("site_num", "name", "lat", "long"), class = "data.frame", row.names = c(NA,
-66L))
Can you help me ?
Thank you,
Simon

This shows you that the world map on package maps is not particularly high resolution by the time you get down to the level of the Galapagos:
if (require("maps")) {
world <- map_data("world")
mid_range <- function(x) mean(range(x))
library(plyr)
ggplot(world, aes(long, lat)) +
geom_polygon( aes(group = group), fill = "green", colour = "red")+
ylim(c(-2, 1 )) + xlim( c(-93,-88) ) }
There doesn't seem to be a group with that name. I'm thinking yoou might want to find a better shapefile for this project.:
grep("Is", unique(world$region), value=TRUE)
1 "Israel"
[2] "Marshall Islands"
[3] "Solomon Islands"
[4] "Cook Islands"
[5] "South Sandwich Islands"
[6] "Sonsorol Island"
[7] "Maug Island"
[8] "Pitcairn Islands"
[9] "Isle of Man"
[10] "Andaman Islands"
[11] "Northern Mariana Islands"
[12] "Madeira Islands"
[13] "Sin Cowe Island"
[14] "Paracel Islands"
[15] "Falkland Islands"
[16] "Cayman Islands"
[17] "Virgin Islands"
[18] "Canary Islands"
[19] "Spratly Island"
[20] "Isle of Wight"
An RSeek search finds several links. One of them to :
http://downloads.cloudmade.com/americas/south_america/ecuador/galapagos/galapagos.shapefiles.zip
"All of the files at downloads.cloudmade.com are based on data from OpenStreetMap and are licensed under the terms of the Creative Commons Attribution Share-Alike 2.0 license. If you use these files please make sure you attribute the OpenStreetMap community by including a link to www.openstreetmap.org.:

Try running the following after your code above:
library(grid)
pushViewport(
viewport( x=unit(0.1,'npc'), y=unit(0.1,'npc'), width=unit(0.2,'npc'),
height=unit(0.2,'npc'))
)
d2 <- ggplot(measurements, aes(long, lat)) +
geom_polygon(data = allworld, xlim = c(-50, 100), aes(x = long, y = lat, group = group), colour = "grey70", fill = "grey70") +
geom_point(size = 0.7, shape = 8) + ylim(c(-2, 1 )) + xlim( c(-93,-88) )
print(d2, newpage=FALSE)
This should give a general idea of adding a sub map, but you will want to adjust the above to get a better zoom and better looking results.

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

How could I insert a histogram into a `geom_smooth` plot? - r

Related

Plotting multiple different histograms based on vector of column names

Non-linear regression vs log model

R - barplot grouping columns

Legends for two different sized series in ggplot2#R

maps of subregion in R

Categories

Resources