Set ggmap boundary based on Latitude and Longitude - r

So I have R program, and am struggling with getting all points in map
library(ggmap)
library(ggplot2)
setwd("d:/GIS/")
sep <- read.csv("SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
Percent_SEP12_Assets <- ifelse(sep[,8] >= 50, "Over 50", "Under 50")
# get the map
map <- get_map("Kissena Park, Queens", zoom = 13, maptype = 'roadmap')
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
And here is output map
I wish to zoom in enough without cutting out data points, but no matter location I pick on map, the data keeps getting cut, i.e. Removed 2 rows containing missing values (geom_point).
Is there a way to set boundaries based on the extremities of latitude and longitude? The csv I import at
sep <- read.csv("SEP_assets_csv.csv")
Has list of latitude and longitude.
Help!
Coordinates
Latitude Longitude
40.758365 -73.824407
40.774168 -73.818543
40.761748 -73.811379
40.765602 -73.828293
40.751762 -73.81778
40.764834 -73.789712
40.777951 -73.842932
40.76501 -73.794319
40.785959 -73.817349
40.755764 -73.799256
40.745593 -73.829283
40.789929 -73.839501
40.760072 -73.783908
40.726437 -73.807592
40.741093 -73.808757
40.720926 -73.823358
40.729642 -73.81781
40.724191 -73.80937
40.782346 -73.77844
40.778164 -73.799841
40.775122 -73.8185
40.760344 -73.817909
40.792326 -73.809516
40.78322 -73.806977
40.73106 -73.805449
40.736521 -73.813001
40.783714 -73.795027
40.770194 -73.82762
40.735855 -73.823583
40.74943 -73.82141
40.769753 -73.832001
40.754465 -73.826204
40.738775 -73.823892
40.764868 -73.826819
40.738332 -73.82028
40.735017 -73.821339
40.72535 -73.811325
40.721466 -73.820401
dput
> dput(sep)
structure(list(School = structure(1:38, .Label = c("Queens\\25Q020",
"Queens\\25Q021", "Queens\\25Q022", "Queens\\25Q023", "Queens\\25Q024",
"Queens\\25Q025", "Queens\\25Q029", "Queens\\25Q032", "Queens\\25Q079",
"Queens\\25Q107", "Queens\\25Q120", "Queens\\25Q129", "Queens\\25Q130",
"Queens\\25Q154", "Queens\\25Q163", "Queens\\25Q164", "Queens\\25Q165",
"Queens\\25Q168", "Queens\\25Q169", "Queens\\25Q184", "Queens\\25Q185",
"Queens\\25Q189", "Queens\\25Q193", "Queens\\25Q194", "Queens\\25Q200",
"Queens\\25Q201", "Queens\\25Q209", "Queens\\25Q214", "Queens\\25Q219",
"Queens\\25Q237", "Queens\\25Q242", "Queens\\25Q244", "Queens\\25Q425",
"Queens\\25Q460", "Queens\\25Q499", "Queens\\25Q515", "Queens\\25Q707",
"Queens\\25Q792"), class = "factor"), Latitude = c(40.758365,
40.774168, 40.761748, 40.765602, 40.751762, 40.764834, 40.777951,
40.76501, 40.785959, 40.755764, 40.745593, 40.789929, 40.760072,
40.726437, 40.741093, 40.720926, 40.729642, 40.724191, 40.782346,
40.778164, 40.775122, 40.760344, 40.792326, 40.78322, 40.73106,
40.736521, 40.783714, 40.770194, 40.735855, 40.74943, 40.769753,
40.754465, 40.738775, 40.764868, 40.738332, 40.735017, 40.72535,
40.721466), Longitude = c(-73.824407, -73.818543, -73.811379,
-73.828293, -73.81778, -73.789712, -73.842932, -73.794319, -73.817349,
-73.799256, -73.829283, -73.839501, -73.783908, -73.807592, -73.808757,
-73.823358, -73.81781, -73.80937, -73.77844, -73.799841, -73.8185,
-73.817909, -73.809516, -73.806977, -73.805449, -73.813001, -73.795027,
-73.82762, -73.823583, -73.82141, -73.832001, -73.826204, -73.823892,
-73.826819, -73.82028, -73.821339, -73.811325, -73.820401), Windows.SEP.11 = c(48L,
154L, 11L, 62L, 20L, 72L, 9L, 37L, 8L, 22L, 9L, 47L, 44L, 99L,
78L, 91L, 42L, 122L, 55L, 14L, 162L, 108L, 89L, 87L, 23L, 14L,
75L, 74L, 141L, 73L, 43L, 14L, 534L, 189L, 128L, 10L, 79L, 38L
), Mac.SEP.11 = c(49L, 0L, 180L, 2L, 202L, 116L, 41L, 1L, 17L,
22L, 33L, 43L, 1L, 28L, 2L, 0L, 238L, 13L, 76L, 55L, 76L, 42L,
0L, 1L, 12L, 0L, 16L, 10L, 1L, 7L, 0L, 1L, 1L, 67L, 16L, 7L,
31L, 24L), Windows.SEP.12 = c(52L, 252L, 1L, 2L, 12L, 45L, 108L,
15L, 14L, 4L, 19L, 21L, 46L, 90L, 10L, 86L, 15L, 76L, 122L, 2L,
9L, 52L, 39L, 120L, 43L, 17L, 9L, 54L, 19L, 199L, 40L, 25L, 64L,
164L, 14L, 27L, 45L, 2L), Mac.SEP.12 = c(73L, 2L, 91L, 53L, 288L,
6L, 2L, 107L, 109L, 97L, 41L, 18L, 12L, 16L, 2L, 2L, 270L, 32L,
45L, 92L, 54L, 190L, 1L, 4L, 19L, 53L, 1L, 10L, 0L, 61L, 50L,
27L, 27L, 25L, 3L, 1L, 43L, 0L), newCol = c(56.3063063063063,
62.2549019607843, 32.5088339222615, 46.218487394958, 57.4712643678161,
21.3389121338912, 68.75, 76.25, 83.1081081081081, 69.6551724137931,
58.8235294117647, 30.2325581395349, 56.3106796116505, 45.4935622317597,
13.0434782608696, 49.1620111731844, 50.4424778761062, 44.4444444444444,
56.0402684563758, 57.6687116564417, 20.9302325581395, 61.734693877551,
31.0077519379845, 58.4905660377358, 63.9175257731959, 83.3333333333333,
9.9009900990099, 43.2432432432432, 11.8012422360248, 76.4705882352941,
67.6691729323308, 77.6119402985075, 14.5367412140575, 42.4719101123596,
10.5590062111801, 62.2222222222222, 44.4444444444444, 3.125)), .Names = c("School",
"Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12",
"Mac.SEP.12", "newCol"), row.names = c(NA, -38L), class = "data.frame")

You haven't provided us with any of the data, so I'm going to give an example using a dataset in the historydata package. Instead of getting a map based on a location and a zoom, you can get a map based on the bounding box of the latitudes and longitudes in your dataset.
library(historydata)
library(ggmap)
data("catholic_dioceses")
bbox <- make_bbox(catholic_dioceses$long, catholic_dioceses$lat, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=catholic_dioceses, aes(x = long, y = lat))
Note that the f = argument to make_bbox() lets you control how much padding there is around your map.
In your case, I think this will work:
library(ggmap)
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude,
color = Percent_SEP12_Assets),
size = 9, alpha = 0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))

Related

Creating scatter plot class or group wise

Im using ggstatsplot's ggscatterstats function to calculate correlation between various clinical parameters and then plotting them. For example
here my variables are age and WBC. This is taking all the data points irrespective of the class they belong. I would like to do the same with each FAB classification that is present in my data.
dat <- merge_clinical_class_TMB %>% select(FAB,AGE,Wbc,Platelet,HB,PB_Blasts,BM_Blasts,TMB_NONSYNONYMOUS)
df2 <- dat
library(ggstatsplot)
ggscatterstats(
df2,
x = AGE,
y = Wbc,
type = "np" # try the "robust" correlation too! It might be even better here
#, marginal.type = "boxplot"
)
My dataframe looks like this
head(df2)
FAB AGE Wbc Platelet HB PB_Blasts BM_Blasts TMB_NONSYNONYMOUS
1 M4 50 17 231 10 88 52 0.3000000
2 M3 61 1 90 10 44 0 0.4333333
3 M3 30 6 114 11 82 6 0.2333333
4 M0 77 92 105 9 67 56 0.4000000
5 M1 46 29 90 9 90 81 0.5666667
6 M1 68 3 63 8 91 55 0.9000000
My data
dput(df2)
structure(list(FAB = structure(c(5L, 4L, 4L, 1L, 2L, 2L, 3L,
3L, 3L, 5L, 3L, 5L, 1L, 5L, 5L, 3L, 3L, 3L, 1L, 2L, 1L, 4L, 6L,
6L, 5L, 3L, 5L, 7L, 5L, 1L, 6L, 5L, 5L, 6L, 5L, 6L, 3L, 3L, 4L,
4L, 5L, 7L, 3L, 3L, 5L, 2L, 5L, 1L, 3L, 6L, 2L, 5L, 2L, 5L, 7L,
3L, 3L, 8L, 6L, 4L, 2L, 2L, 2L, 2L, 3L, 8L, 3L, 2L, 2L, 4L, 6L,
3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 6L, 2L, 1L, 3L, 2L, 5L, 5L,
1L, 2L, 5L, 6L, 6L, 2L, 6L, 4L, 2L, 5L, 2L, 2L, 2L, 1L, 4L, 4L,
1L, 3L, 9L, 6L, 5L, 5L, 1L, 3L, 3L, 5L, 1L, 2L, 2L, 3L, 5L, 1L,
5L, 5L, 6L, 2L, 2L, 2L, 1L, 3L, 3L, 6L, 5L, 2L, 5L, 1L, 2L, 8L,
2L, 3L, 9L, 5L, 2L, 1L, 5L, 3L, 5L, 5L, 1L, 3L, 2L, 5L, 3L, 6L,
5L, 1L, 2L, 2L, 5L, 3L, 5L, 5L, 6L, 5L, 5L, 3L, 5L, 6L, 3L, 2L,
3L, 3L, 2L, 4L, 6L, 4L, 1L, 2L, 6L, 3L, 6L, 2L, 3L, 2L, 4L, 2L,
2L, 4L, 3L, 3L, 4L, 4L, 4L, 3L, 4L, 3L, 6L, 2L, 4L, 2L, 5L, 2L,
4L), .Label = c("M0", "M1", "M2", "M3", "M4", "M5", "M6", "M7",
"nc"), class = "factor"), AGE = c(50L, 61L, 30L, 77L, 46L, 68L,
23L, 64L, 76L, 81L, 25L, 78L, 39L, 49L, 57L, 63L, 62L, 52L, 76L,
64L, 65L, 61L, 44L, 31L, 64L, 33L, 55L, 50L, 64L, 59L, 59L, 77L,
33L, 48L, 35L, 66L, 67L, 51L, 74L, 51L, 64L, 77L, 63L, 37L, 57L,
53L, 62L, 39L, 72L, 66L, 51L, 51L, 18L, 63L, 54L, 75L, 40L, 60L,
76L, 33L, 63L, 53L, 75L, 67L, 66L, 77L, 64L, 76L, 51L, 42L, 51L,
59L, 43L, 45L, 60L, 47L, 68L, 24L, 48L, 73L, 60L, 44L, 71L, 25L,
60L, 57L, 55L, 69L, 42L, 42L, 45L, 50L, 41L, 21L, 50L, 69L, 76L,
70L, 27L, 76L, 65L, 48L, 59L, 69L, 81L, 22L, 61L, 51L, 63L, 61L,
22L, 73L, 49L, 41L, 47L, 54L, 44L, 55L, 83L, 78L, 59L, 57L, 57L,
88L, 43L, 71L, 62L, 75L, 62L, 58L, 65L, 66L, 60L, 35L, 76L, 72L,
35L, 73L, 67L, 70L, 48L, 65L, 41L, 52L, 67L, 58L, 34L, 60L, 55L,
56L, 61L, 31L, 71L, 56L, 57L, 60L, 57L, 58L, 79L, 55L, 34L, 76L,
82L, 67L, 67L, 54L, 53L, 71L, 61L, 30L, 50L, 35L, 29L, 45L, 38L,
81L, 31L, 75L, 67L, 29L, 51L, 40L, 32L, 57L, 25L, 63L, 75L, 25L,
68L, 62L, 25L, 31L, 68L, 45L, 61L, 35L, 22L, 23L, 21L, 53L),
Wbc = c(17L, 1L, 6L, 92L, 29L, 3L, 32L, 117L, 62L, 91L, 34L,
10L, 2L, 57L, 88L, 77L, 75L, 4L, 15L, 1L, 3L, 86L, 9L, 137L,
132L, 3L, 22L, 6L, 3L, 1L, 12L, 40L, 26L, 116L, 53L, 112L,
2L, 42L, 32L, 4L, 2L, 3L, 17L, 19L, 14L, 3L, 119L, 5L, 3L,
79L, 104L, 3L, 35L, 77L, 2L, 8L, 8L, 1L, 4L, 1L, 46L, 2L,
6L, 31L, 3L, 2L, 3L, 34L, 2L, 2L, 15L, 12L, 4L, 29L, 12L,
12L, 60L, 224L, 33L, 2L, 7L, 14L, 5L, 11L, 47L, 5L, 31L,
6L, 11L, 38L, 5L, 7L, 134L, 93L, 3L, 10L, 3L, 48L, 90L, 297L,
1L, 1L, 1L, 2L, 2L, 115L, 35L, 50L, 18L, 62L, 52L, 15L, 12L,
48L, 81L, 13L, 35L, 28L, 78L, 17L, 30L, 99L, 20L, 3L, 172L,
6L, 28L, 98L, 59L, 101L, 68L, 2L, 2L, 43L, 4L, 38L, 34L,
59L, 37L, 1L, 111L, 49L, 43L, 298L, 26L, 47L, 14L, 16L, 114L,
203L, 8L, 133L, 1L, 31L, 3L, 68L, 3L, 20L, 19L, 73L, 20L,
5L, 1L, 15L, 45L, 68L, 88L, 36L, 10L, 23L, 1L, 72L, 1L, 2L,
40L, 12L, 13L, 7L, 46L, 2L, 64L, NA, 5L, 103L, 8L, 1L, 3L,
16L, 29L, 1L, 99L, 2L, 6L, 2L, 3L, 2L, 115L, 27L, 8L, 1L),
Platelet = c(231L, 90L, 114L, 105L, 90L, 63L, 38L, 100L,
32L, 32L, 23L, 98L, 215L, 14L, 56L, 19L, 110L, 22L, 85L,
42L, 16L, 22L, 50L, 42L, 15L, 61L, 65L, 50L, 134L, 102L,
57L, 29L, 111L, 50L, 44L, 34L, 28L, 232L, 42L, 58L, 27L,
86L, 23L, 38L, 76L, 108L, 52L, 175L, 52L, 132L, 23L, 143L,
30L, 41L, 9L, 21L, 95L, 59L, 79L, 38L, 11L, 68L, 22L, 141L,
168L, 70L, 41L, 21L, 25L, 35L, 14L, 20L, 67L, 116L, 45L,
57L, 8L, 34L, 32L, 60L, 93L, 145L, 48L, 33L, 50L, 129L, 9L,
61L, 176L, 12L, 53L, 136L, 40L, 73L, 27L, 12L, 166L, 30L,
87L, 40L, 94L, 52L, 23L, 127L, 39L, 57L, 35L, 21L, 148L,
25L, 149L, 64L, 351L, 71L, 53L, 22L, 35L, 31L, 46L, 85L,
18L, 80L, 62L, 156L, 32L, 50L, 69L, 31L, 20L, 57L, 142L,
37L, 79L, 66L, 21L, 31L, 88L, 11L, 15L, 82L, 53L, 76L, 51L,
68L, 64L, 55L, 40L, 90L, 37L, 45L, 36L, 52L, 86L, 88L, 35L,
174L, 28L, 121L, 131L, 17L, 152L, 52L, 30L, 79L, 79L, 87L,
30L, 44L, 140L, 59L, 58L, 19L, 29L, 156L, 19L, 61L, 36L,
11L, 71L, 13L, 45L, 34L, 39L, 82L, 18L, 43L, 118L, 32L, 73L,
15L, 60L, 208L, 96L, 257L, 61L, 12L, 32L, 23L, 52L, 46L),
HB = c(10L, 10L, 11L, 9L, 9L, 8L, 7L, 10L, 10L, 11L, 11L,
10L, 10L, 8L, 10L, 13L, 11L, 9L, 9L, 8L, 9L, 12L, 8L, 6L,
10L, 7L, 8L, 9L, 11L, 12L, 11L, 10L, 10L, 9L, 8L, 10L, 9L,
13L, 9L, 8L, 12L, 9L, 12L, 9L, 9L, 9L, 11L, 10L, 11L, 12L,
12L, 11L, 9L, 10L, 9L, 9L, 10L, 9L, 10L, 9L, 8L, 9L, 9L,
10L, 12L, 10L, 10L, 8L, 10L, 9L, 11L, 11L, 11L, 8L, 9L, 9L,
9L, 6L, 10L, 10L, 9L, 9L, 8L, 9L, 9L, 7L, 9L, 11L, 12L, 10L,
9L, 10L, 12L, NA, 10L, 7L, 11L, 10L, 9L, 11L, 10L, 9L, 8L,
8L, 10L, 9L, 12L, 11L, 8L, 13L, 11L, 9L, 9L, 12L, 10L, 9L,
10L, 8L, 9L, 9L, 9L, 10L, 9L, 10L, 10L, 9L, 10L, 8L, 7L,
9L, 9L, 8L, 9L, 9L, 8L, 10L, 8L, 9L, 9L, 8L, 9L, 9L, 9L,
9L, 9L, 10L, 9L, 8L, 9L, 10L, 7L, 11L, 11L, 10L, 6L, 8L,
9L, 9L, 10L, 8L, 11L, 10L, 11L, 8L, 9L, 8L, 9L, 8L, 10L,
10L, 10L, 9L, 9L, 12L, 9L, 9L, 11L, 9L, 13L, 9L, 10L, 8L,
9L, 10L, 10L, 11L, 9L, 9L, 10L, 9L, 9L, 11L, 7L, 13L, 14L,
12L, 8L, 12L, 8L, 9L), PB_Blasts = c(88L, 44L, 82L, 67L,
90L, 91L, 59L, 60L, 48L, 98L, 53L, 40L, 75L, 81L, 90L, 57L,
46L, 67L, 74L, 61L, 99L, 73L, 74L, 83L, 72L, 33L, 35L, 70L,
85L, 61L, 95L, 80L, 71L, 83L, 90L, 90L, 50L, 64L, 51L, 93L,
95L, 75L, 80L, 52L, 61L, 72L, 65L, 83L, 45L, 32L, 85L, 73L,
86L, 82L, 30L, 48L, 47L, 58L, 78L, 100L, 81L, 82L, 40L, 89L,
70L, 47L, 80L, 73L, 62L, 88L, 57L, 70L, 40L, 56L, 86L, 37L,
90L, 77L, 75L, 37L, 94L, 86L, 97L, 72L, 87L, 40L, 52L, 60L,
68L, 40L, 95L, 81L, 92L, 90L, 90L, 42L, 37L, 84L, 77L, 99L,
83L, 65L, 79L, 82L, 46L, 94L, 71L, 39L, 62L, 95L, 55L, 11L,
51L, 42L, 77L, 72L, 39L, 69L, 75L, 70L, 75L, 52L, 91L, 33L,
87L, 55L, 72L, 76L, 85L, 79L, 79L, 81L, 50L, 81L, 33L, 88L,
34L, 90L, 69L, 32L, 92L, 90L, 47L, 75L, 30L, 59L, 57L, 62L,
54L, 60L, 89L, 82L, 90L, 90L, 64L, 89L, 43L, 58L, 58L, 97L,
71L, 91L, 53L, 75L, 85L, 67L, 86L, 70L, 43L, 86L, 74L, 87L,
0L, 0L, 86L, 53L, 63L, 41L, 76L, 45L, 85L, 0L, 94L, 6L, 91L,
0L, 2L, 93L, 85L, 82L, 56L, 40L, 48L, 0L, 14L, 90L, 71L,
51L, 91L, 42L), BM_Blasts = c(52L, 0L, 6L, 56L, 81L, 55L,
0L, 0L, 88L, 37L, 87L, 6L, 4L, 48L, 84L, 70L, 53L, 18L, 82L,
5L, 34L, 68L, 5L, 6L, 90L, 0L, 67L, 0L, 22L, 12L, 0L, 2L,
14L, 3L, 18L, 7L, 17L, 79L, 0L, 40L, 0L, 8L, 71L, 33L, 17L,
41L, 65L, 53L, 0L, 11L, 85L, 2L, 90L, 39L, 0L, 54L, 23L,
0L, 0L, 0L, 97L, 42L, 48L, 61L, 6L, 0L, 46L, 55L, 10L, 2L,
0L, 48L, 39L, 37L, 43L, 0L, 91L, 76L, 41L, 16L, 30L, 17L,
54L, 50L, 65L, 0L, 59L, 22L, 51L, 16L, 6L, 10L, 90L, 72L,
0L, 32L, 0L, 49L, 88L, 98L, 0L, 0L, 15L, 0L, 0L, 94L, 55L,
39L, 9L, 86L, 70L, 11L, 5L, 74L, 79L, 90L, 83L, 57L, 74L,
28L, 17L, 4L, 91L, 0L, 91L, 50L, 49L, 80L, 22L, 64L, 84L,
12L, 14L, 86L, 6L, 18L, 40L, 0L, 61L, 6L, 87L, 0L, 62L, 51L,
6L, 72L, 59L, 29L, 24L, 96L, 0L, 53L, 13L, 45L, 61L, 56L,
35L, 10L, 0L, 8L, 58L, 16L, 25L, 10L, 3L, 71L, 52L, 67L,
32L, 88L, 10L, 8L, 0L, 0L, 97L, 7L, 45L, 0L, 49L, 9L, 85L,
0L, 70L, 91L, 7L, 0L, 2L, 0L, 32L, 11L, 71L, 0L, 48L, 0L,
14L, 7L, 90L, 63L, 83L, 29L), TMB_NONSYNONYMOUS = c(0.3,
0.433333333333, 0.233333333333, 0.4, 0.566666666667, 0.9,
0.3, 0.133333333333, 0.4, 0.3, 0.233333333333, 0.5, 0.266666666667,
0, 0.2, 0.4, 0.266666666667, 0.333333333333, 0.4, 0.4, 0.566666666667,
0.0333333333333, 0.166666666667, 0.1, 0.166666666667, 0.266666666667,
0.3, 0.3, 0.466666666667, 0.0666666666667, 0.266666666667,
0.266666666667, 0.0333333333333, 0.1, 0.133333333333, 0.0333333333333,
0.5, 0.6, 0.0333333333333, 0.1, 0.0333333333333, 0.333333333333,
0.433333333333, 0.2, 0.466666666667, 0.2, 0.0333333333333,
0.733333333333, 0.2, 0.233333333333, 0.233333333333, 0.3,
0.133333333333, 0, 0.3, 0.333333333333, 0.333333333333, 0.266666666667,
0.533333333333, 0.2, 0.533333333333, 0.466666666667, 0.533333333333,
0.0333333333333, 0.3, 0.5, 0.333333333333, 0.266666666667,
0.5, 0.333333333333, 0.0666666666667, 0.466666666667, 0.333333333333,
0.266666666667, 0.7, 0.433333333333, 0.166666666667, 0.0666666666667,
0.233333333333, 0.5, 0.0333333333333, 0.2, 0.433333333333,
0.433333333333, 0.4, 0.233333333333, 0.0666666666667, 0.233333333333,
0.466666666667, 0.0666666666667, 0, 0.1, 0.4, 0.1, 0.2, 0.4,
0.433333333333, 0.566666666667, 0.2, 0.0333333333333, 0.533333333333,
0.566666666667, 0.3, 0.466666666667, 0.566666666667, 0.0333333333333,
0.4, 0.0666666666667, 0.633333333333, 0.4, 0.466666666667,
0.466666666667, 0.3, 0.5, 0.0333333333333, 0.333333333333,
0.333333333333, 0.266666666667, 0.366666666667, 0.666666666667,
0.333333333333, 0.533333333333, 0.466666666667, 0.6, 0.333333333333,
0.4, 0.266666666667, 0.366666666667, 0.2, 0.0333333333333,
0.266666666667, 0.3, 0.166666666667, 0.4, 0.566666666667,
0.4, 0.1, 0.1, 0.0666666666667, 0.366666666667, 0, 0.4, 0.0333333333333,
0.1, 0.0666666666667, 0.5, 0.3, 0.466666666667, 0.0333333333333,
0.4, 0.1, 0.0666666666667, 0.766666666667, 0.5, 0.466666666667,
0.333333333333, 0.4, 0.333333333333, 0.4, 0.266666666667,
0.2, 0.3, 0.7, 0.166666666667, 0.2, 0, 0.5, 0.166666666667,
0.533333333333, 0.233333333333, 0.166666666667, 0.133333333333,
0.0666666666667, 0.4, 0.333333333333, 0.133333333333, 0.4,
0.233333333333, 0.466666666667, 0.366666666667, 0.266666666667,
0.266666666667, 0.266666666667, 0.4, 0.2, 0.166666666667,
0.4, 0.333333333333, 0.166666666667, 0.266666666667, 0.1,
0.333333333333, 0.733333333333, 0.466666666667, 0.466666666667,
0.2, 0.1, 1.13333333333, 0.2, 0.3)), class = "data.frame", row.names = c(NA,
-200L))
Objective I would like to do the same with various FABI have FAB label from M0 to M7 I would like to ignore nc
So for each FAB label I would like to see the correlation for example if I have to take the M0 class then I would like to see their Age vs Wbc correlation and similarly for other FAB class as well. Is it possible to do these in ggstataplot as I don't see for correlation any such functionality there .
Simple way is I can subset them and do the same like M0 ,M1, M2 etc etc but that is a long process can I split the FAB column and pass it to the library?
I would like to know other ways to do the above and plot the same
Any help or suggestion would be appreciated
Update: We could also use the built in function see comments:
Many thanks to #Indrajeet Patil: https://indrajeetpatil.github.io/ggstatsplot/articles/web_only/ggscatterstats.html#grouped-analysis-with-grouped_ggscatterstats
To subset FAB we use filter:
## for reproducibility
set.seed(123)
## plot
grouped_ggscatterstats(
## arguments relevant for ggscatterstats
data = df2 %>% filter(as.integer(FAB)<5),
x = AGE,
y = Wbc,
grouping.var = FAB,
type = "r",
# ggtheme = ggthemes::theme_tufte(),
## arguments relevant for combine_plots
annotation.args = list(
title = "Relationship between Wbc and Age",
caption = "Source: stackoverflow"
),
plotgrid.args = list(nrow = 2, ncol = 2)
)
First answer:
We could do something like this:
write a function and pass the data frame + the column FAB value:
library(ggstatsplot)
my_function <- function(df, x){
ggscatterstats(
df %>% filter(FAB == x),
x = AGE,
y = Wbc,
type = "np" # try the "robust" correlation too! It might be even better here
#, marginal.type = "boxplot"
)
}
M0 <- my_function(df2, "M0")
M1 <- my_function(df2, "M1")
M2 <- my_function(df2, "M2")
M3 <- my_function(df2, "M3")
.
.
.
library(patchwork)
(M0 / M1 | M2 / M3)

Loop with multiple subset of data frame

I have a data.frame fish.test0 for which I want to grep specific variables (in varlist) matching the group column to create a sub-data.frame that will undergo a statistical test. The results of the test is saved in tests.res.t. I want to loop the varlist so that I get one results for each input in varlist
Script:
varlist <- c("Abiotrophia","Alphatorquevirus")
for (i in varlist) {
fish.test <- fish.test0[grep("i",fish.test0$group),]
column <- c("ACDC")
tests <- list()
dat_test <- sapply( column, function(colx)
lapply( unique(fish.test$Merge), function(x)
fisher.test( data.frame(
a=c(( fish.test[ which(fish.test$Merge %in% x)[2],"Present"] -
fish.test[ which(fish.test$Merge %in% x)[2], colx] ),fish.test[ which(fish.test$Merge %in% x)[2], colx]
),
b=c(( fish.test[ which(fish.test$Merge %in% x)[1],"NotPresent"] -
fish.test[ which(fish.test$Merge %in% x)[1], colx] ), fish.test[ which(fish.test$Merge %in% x)[1], colx]))) #,alternative = "greater"
) )
rownames(dat_test) <- unique(fish.test$Merge )
colnames(dat_test) <- column
tests.res <- sapply(dat_test[1:dim(dat_test)[1],1], function(x) {
c(x$estimate[1],
x$estimate[2],
ci.lower = x$conf.int[1],
ci.upper = x$conf.int[2],
p.value = x$p.value)
})
tests.res.t <- as.data.frame(t(tests.res))
}
test-data:
fish.test0 <- structure(list(Present = c(4L, 4L, 9L, 9L, 57L, 57L, 146L, 146L,
91L, 91L, 26L, 26L, 6L, 6L, 12L, 12L, 33L, 33L, 10L, 10L, 66L,
66L, 4L, 4L, 4L, 4L, 9L, 9L, 18L, 18L, 19L, 19L, 51L, 51L, 50L,
50L, 12L, 12L, 7L, 7L, 14L, 14L, 27L, 27L, 9L, 9L, 5L, 5L, 6L,
6L, 22L, 22L, 3L, 3L, 14L, 14L, 4L, 4L, 15L, 15L, 6L, 6L, 8L,
8L, 4L, 4L), NotPresent = c(11L, 11L, 44L, 44L, 126L, 126L, 532L,
532L, 382L, 382L, 97L, 97L, 14L, 14L, 43L, 43L, 85L, 85L, 41L,
41L, 336L, 336L, 19L, 19L, 27L, 27L, 67L, 67L, 108L, 108L, 81L,
81L, 240L, 240L, 258L, 258L, 47L, 47L, 31L, 31L, 82L, 82L, 110L,
110L, 63L, 63L, 178L, 178L, 672L, 672L, 451L, 451L, 120L, 120L,
104L, 104L, 47L, 47L, 387L, 387L, 94L, 94L, 300L, 300L, 133L,
133L), group = c("G__Abiotrophia_NotPresent_Anus", "G__Abiotrophia_Present_Anus",
"G__Abiotrophia_NotPresent_Bile duct", "G__Abiotrophia_Present_Bile duct",
"G__Abiotrophia_NotPresent_Bone/Soft tissue", "G__Abiotrophia_Present_Bone/Soft tissue",
"G__Abiotrophia_NotPresent_Breast", "G__Abiotrophia_Present_Breast",
"G__Abiotrophia_NotPresent_Colorectum", "G__Abiotrophia_Present_Colorectum",
"G__Abiotrophia_NotPresent_Esophagus", "G__Abiotrophia_Present_Esophagus",
"G__Abiotrophia_NotPresent_Gallbladder", "G__Abiotrophia_Present_Gallbladder",
"G__Abiotrophia_NotPresent_Head and neck", "G__Abiotrophia_Present_Head and neck",
"G__Abiotrophia_NotPresent_Kidney", "G__Abiotrophia_Present_Kidney",
"G__Abiotrophia_NotPresent_Liver", "G__Abiotrophia_Present_Liver",
"G__Abiotrophia_NotPresent_Lung", "G__Abiotrophia_Present_Lung",
"G__Abiotrophia_NotPresent_Lymphoid tissue", "G__Abiotrophia_Present_Lymphoid tissue",
"G__Abiotrophia_NotPresent_Mesothelium", "G__Abiotrophia_Present_Mesothelium",
"G__Abiotrophia_NotPresent_Nervous system", "G__Abiotrophia_Present_Nervous system",
"G__Abiotrophia_NotPresent_Ovary", "G__Abiotrophia_Present_Ovary",
"G__Abiotrophia_NotPresent_Pancreas", "G__Abiotrophia_Present_Pancreas",
"G__Abiotrophia_NotPresent_Prostate", "G__Abiotrophia_Present_Prostate",
"G__Abiotrophia_NotPresent_Skin", "G__Abiotrophia_Present_Skin",
"G__Abiotrophia_NotPresent_Small intestine", "G__Abiotrophia_Present_Small intestine",
"G__Abiotrophia_NotPresent_Stomach", "G__Abiotrophia_Present_Stomach",
"G__Abiotrophia_NotPresent_Unknown", "G__Abiotrophia_Present_Unknown",
"G__Abiotrophia_NotPresent_Urothelial tract", "G__Abiotrophia_Present_Urothelial tract",
"G__Abiotrophia_NotPresent_Uterus", "G__Abiotrophia_Present_Uterus",
"G__Alphatorquevirus_NotPresent_Bone/Soft tissue", "G__Alphatorquevirus_Present_Bone/Soft tissue",
"G__Alphatorquevirus_NotPresent_Breast", "G__Alphatorquevirus_Present_Breast",
"G__Alphatorquevirus_NotPresent_Colorectum", "G__Alphatorquevirus_Present_Colorectum",
"G__Alphatorquevirus_NotPresent_Esophagus", "G__Alphatorquevirus_Present_Esophagus",
"G__Alphatorquevirus_NotPresent_Kidney", "G__Alphatorquevirus_Present_Kidney",
"G__Alphatorquevirus_NotPresent_Liver", "G__Alphatorquevirus_Present_Liver",
"G__Alphatorquevirus_NotPresent_Lung", "G__Alphatorquevirus_Present_Lung",
"G__Alphatorquevirus_NotPresent_Pancreas", "G__Alphatorquevirus_Present_Pancreas",
"G__Alphatorquevirus_NotPresent_Skin", "G__Alphatorquevirus_Present_Skin",
"G__Alphatorquevirus_NotPresent_Urothelial tract", "G__Alphatorquevirus_Present_Urothelial tract"
), ABCD = c(3L, 2L, 17L, 6L, 34L, 18L, 240L, 53L, 321L, 73L,
87L, 25L, 6L, 3L, 20L, 8L, 15L, 7L, 19L, 4L, 265L, 42L, 6L, 1L,
4L, 2L, 22L, 4L, 70L, 13L, 54L, 12L, 116L, 33L, 58L, 11L, 6L,
2L, 26L, 6L, 42L, 8L, 74L, 18L, 19L, 3L, 52L, 0L, 288L, 5L, 377L,
17L, 110L, 2L, 19L, 3L, 21L, 2L, 298L, 9L, 60L, 6L, 68L, 1L,
89L, 3L), Total = c(15L, 15L, 53L, 53L, 183L, 183L, 678L, 678L,
473L, 473L, 123L, 123L, 20L, 20L, 55L, 55L, 118L, 118L, 51L,
51L, 402L, 402L, 23L, 23L, 31L, 31L, 76L, 76L, 126L, 126L, 100L,
100L, 291L, 291L, 308L, 308L, 59L, 59L, 38L, 38L, 96L, 96L, 137L,
137L, 72L, 72L, 183L, 183L, 678L, 678L, 473L, 473L, 123L, 123L,
118L, 118L, 51L, 51L, 402L, 402L, 100L, 100L, 308L, 308L, 137L,
137L), Merge = c("Abiotrophia_Anus", "Abiotrophia_Anus", "Abiotrophia_Bile duct",
"Abiotrophia_Bile duct", "Abiotrophia_Bone/Soft tissue", "Abiotrophia_Bone/Soft tissue",
"Abiotrophia_Breast", "Abiotrophia_Breast", "Abiotrophia_Colorectum",
"Abiotrophia_Colorectum", "Abiotrophia_Esophagus", "Abiotrophia_Esophagus",
"Abiotrophia_Gallbladder", "Abiotrophia_Gallbladder", "Abiotrophia_Head and neck",
"Abiotrophia_Head and neck", "Abiotrophia_Kidney", "Abiotrophia_Kidney",
"Abiotrophia_Liver", "Abiotrophia_Liver", "Abiotrophia_Lung",
"Abiotrophia_Lung", "Abiotrophia_Lymphoid tissue", "Abiotrophia_Lymphoid tissue",
"Abiotrophia_Mesothelium", "Abiotrophia_Mesothelium", "Abiotrophia_Nervous system",
"Abiotrophia_Nervous system", "Abiotrophia_Ovary", "Abiotrophia_Ovary",
"Abiotrophia_Pancreas", "Abiotrophia_Pancreas", "Abiotrophia_Prostate",
"Abiotrophia_Prostate", "Abiotrophia_Skin", "Abiotrophia_Skin",
"Abiotrophia_Small intestine", "Abiotrophia_Small intestine",
"Abiotrophia_Stomach", "Abiotrophia_Stomach", "Abiotrophia_Unknown",
"Abiotrophia_Unknown", "Abiotrophia_Urothelial tract", "Abiotrophia_Urothelial tract",
"Abiotrophia_Uterus", "Abiotrophia_Uterus", "Alphatorquevirus_Bone/Soft tissue",
"Alphatorquevirus_Bone/Soft tissue", "Alphatorquevirus_Breast",
"Alphatorquevirus_Breast", "Alphatorquevirus_Colorectum", "Alphatorquevirus_Colorectum",
"Alphatorquevirus_Esophagus", "Alphatorquevirus_Esophagus", "Alphatorquevirus_Kidney",
"Alphatorquevirus_Kidney", "Alphatorquevirus_Liver", "Alphatorquevirus_Liver",
"Alphatorquevirus_Lung", "Alphatorquevirus_Lung", "Alphatorquevirus_Pancreas",
"Alphatorquevirus_Pancreas", "Alphatorquevirus_Skin", "Alphatorquevirus_Skin",
"Alphatorquevirus_Urothelial tract", "Alphatorquevirus_Urothelial tract"
)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 10L, 9L, 12L,
11L, 13L, 14L, 16L, 15L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 28L, 27L, 29L, 30L, 31L, 32L, 34L, 33L, 35L, 36L, 38L,
37L, 40L, 39L, 42L, 43L, 45L, 44L, 47L, 46L, 1011L, 1012L, 1014L,
1013L, 1015L, 1016L, 1017L, 1018L, 1019L, 1020L, 1022L, 1021L,
1023L, 1024L, 1026L, 1025L, 1027L, 1028L, 1029L, 1030L), class = "data.frame")
This is probably not an answer but it should help to improve you code. If I'm terribly wrong, I'll remove my answer right away. I have loeft out the test business which I don't understand, but your problem seems to be extraction.
The first thing is that you need to remove the quotation marks in your grep command, try:
varlist <- c("Abiotrophia","Alphatorquevirus")
for( i in varlist )
{
# extract rows which contain the variable
fish.test <- fish.test0[ grep( i, fish.test0$group ), ]
print( head( fish.test ) )
}
From what I understand, you need to define column and tests outside your loop. Does that give you more of what you want:
varlist <- c("Abiotrophia","Alphatorquevirus")
column <- "ACDC"
tests <- list()
for( i in 1 : length( varlist ) ) # index can be used later to fill the list
{
# extract rows which contain the variable
fish.test <- fish.test0[ grep( varlist[ i ], fish.test0$group ), ]
# add a column with your name of choice
fish.test <- cbind( fish.test, c( 1: length( fish.test$group ) ) )
colnames( fish.test )[ length( fish.test ) ] <- column
# write each result into your defined list
tests[[ i ]] <- fish.test
}

Tabu search in R

Good evening,
As part of a data analysis course we have been thrown into the Metaheuristics realm.....and I am really struggling to understand how to implement a Tabu search in R since my background in programming is rather limited.
I haven't found any R or Python example on Google or youtube either so I'm really praying I'll find something here.
The problem I have is similar to the "location problem" in optimisation. I need to find the best combination of Hubs that minimizes the total distance between Hubs and nodes.
I need to find 5 hubs, and the total capacity for each one is 120
nodes <- structure(list(node_number = 1:50,
x = c(2L, 80L, 36L, 57L, 33L, 76L, 77L, 94L,
89L, 59L, 39L, 87L, 44L, 2L, 19L, 5L,
58L, 14L, 43L, 87L, 11L, 31L, 51L, 55L,
84L, 12L, 53L, 53L, 33L, 69L, 43L, 10L,
8L, 3L, 96L, 6L, 59L, 66L, 22L, 75L, 4L,
41L, 92L, 12L, 60L, 35L, 38L, 9L, 54L, 1L),
y = c(62L, 25L, 88L, 23L, 17L, 43L, 85L, 6L, 11L,
72L, 82L, 24L, 76L, 83L, 43L, 27L, 72L, 50L,
18L, 7L, 56L, 16L, 94L, 13L, 57L, 2L, 33L, 10L,
32L, 67L, 5L, 75L, 26L, 1L, 22L, 48L, 22L, 69L,
50L, 21L, 81L, 97L, 34L, 64L, 84L, 100L, 2L, 9L, 59L, 58L),
node_demand = c(3L, 14L, 1L, 14L, 19L, 2L, 14L, 6L,
7L, 6L, 10L, 18L, 3L, 6L, 20L, 4L,
14L, 11L, 19L, 15L, 15L, 4L, 13L,
13L, 5L, 16L, 3L, 7L, 14L, 17L,
3L, 3L, 12L, 14L, 20L, 13L, 10L,
9L, 6L, 18L, 7L, 20L, 9L, 1L, 8L,
5L, 1L, 7L, 9L, 2L)),
.Names = c("node_number", "x", "y", "node_demand"),
class = "data.frame", row.names = c(NA, -50L))
hubs_required = 5
total_capacity = 120
My strategy was to create a distance matrix, then I will create another 50 x 50 matrix to represent wether a node becomes a hub or not, and finally I will multiply both and add all the distances to get the total distance.
I created the dataframe:
nodes_df <- as.data.frame(nodes)
colnames(nodes_df) <- c("x", "y", "node_demand")
rownames(nodes_df) <- paste('Node',1:50)
I created the distance matrix
distance_df <-as.data.frame(as.matrix(round(dist(nodes_df,method = "euclidean",diag = TRUE,upper = TRUE))))
colnames(distance_df) <- paste("Node",1:50)
I created the node demand matrix:
demand <- as.vector(rep(c(nodes_df[,'node_demand']),50))
demand_matrix <- matrix(demand,nrow=50,ncol=50,byrow = TRUE)
diag(demand_matrix) <- 0
demand_matrix <- as.data.frame(demand_matrix)
I created an empty matrix to show whether a node becomes a hub "1" or not "0"
hubs_matrix <- matrix(0,nrow = 50,ncol = 50,byrow = TRUE)
colnames(hubs_matrix) <- paste("Hub",1:50)
rownames(hubs_matrix) <- paste("Node",1:50)
Then to create the initial solution I randomly assign Hubs and calculate the distance and demand.
set.seed(37)
hubs_matrix <- do.call("cbind", lapply(1:50, function(x) sample(c(1, rep(0, 49)), 50)))
sum_distances <- (hubs_matrix * distance_df)
sum(rowSums(sum_distances))
The idea is to try different combinations of '1'' and '0' as to minimise the total distance but I am having the following issues:
I got no idea how to do the local search and do the permutations from the initial solution.
I got no idea how to prevent R to use the best solution for a certain period of time, i.e the Tabu list
I got no idea how to deal with the supply restriction for each node ( total demand from each node < 120), I could do it with a loop but since in this case I'm multiplying matrices I'm pretty lost.
Anybody could give me a hand???
Many thanks!

How do I plot a sequence of number using ggplot2

Suppose I have a sequence of numbers:
> dput(firstGrade_count)
c(4L, 346L, 319L, 105L, 74L, 5L, 124L, 2L, 10L, 35L, 6L, 206L,
7L, 8L, 6L, 9L, 26L, 1L, 35L, 18L, 4L, 4L, 2L, 63L, 6L, 23L,
6L, 82L, 10L, 17L, 45L, 74L, 10L, 8L, 14L, 23L, 26L, 53L, 55L,
16L, 2L, 141L, 113L, 98L, 179L, 13L, 34L, 16L, 8L, 144L, 2L,
141L, 26L, 9L, 125L, 201L, 32L, 452L, 179L, 30L, 4L, 141L, 5L,
40L, 7L, 255L, 120L, 223L, 28L, 252L, 21L, 8L, 362L, 4L, 5L,
2L, 285L, 18L, 76L, 5L, 73L, 11L, 367L, 7L, 50L, 6L, 37L, 15L,
48L, 5L, 12L, 7L, 96L)
I want to plot it using ggplot2 so the result would be something similar to:
barplot(firstGrade_count)
How would I define the aesthetics in ggplot2?
Here is the plot produced by base plot that I mentioned above:
firstGrade_count <- c(4,346,319,105,74,5,124,2,10,35,6,206,7,8,6,9,26,1,35,18,4,4,2,63,6,23,6,
82,10,17,45,74,10,8,14,23,26,53,55,16,2,141,113,98,179,13,34,16,8,144,2,141,26,9,
125,201,32,452,179,30,4,141,5,40,7,255,120,223,28,252,21,8,362,4,5,2,285,18,76,5,73,
11,367,7,50,6,37,15,48,5,12,7,96)
You can optionally make this into a data frame to make it easier to work with and to hold any additional features, though it's not required.
library(ggplot2)
# Optional transformation to data.frame:
firstGrade_count <- as.data.frame(firstGrade_count)
# Index for x-coordinates
firstGrade_count$index <- seq(1:nrow(firstGrade_count))
# Plotting
c <- ggplot(firstGrade_count, aes(index,firstGrade_count))
c + geom_bar(stat = "identity")
Or,
firstGrade_count <- as.data.frame(firstGrade_count)
c <- ggplot(firstGrade_count, aes(factor(firstGrade_count)))
c + geom_bar()
The way I set it up shows you the count of each unique value. There are many variations and additional formats you could add:
If you don't want the count you can add the stat = option and change it from the default to something else.
Without creating any dataframe:
ggplot() +
geom_bar(aes(1:length(firstGrade_count),firstGrade_count), stat='identity') +
xlab('')

ggplot2: Inconsistent color from alpha

I am making several plots that have different x-axis limits, and I want to highlight a region of interest by adding a grey box. Even though I use the same geom_rect() command with the same alpha value in ggplot2, I get results with very different grey colors. I have looked here and here but so far have not figured out how to make these boxes the same level of transparency. Below is a reproducible example (with fake data) and the figures that it produces. Notice the different color of the grey rectangles. I want the grey to be the same across plots.
Data<-structure(list(X = c(34L, 27L, 28L, 47L, 26L, 3L, 13L, 31L, 39L,
16L, 45L, 5L, 49L, 17L, 29L, 43L, 1L, 35L, 41L, 10L, 48L, 24L,
12L, 11L, 30L, 40L, 8L, 4L, 20L, 25L, 50L, 22L, 9L, 21L, 18L,
7L, 15L, 44L, 6L, 36L, 46L, 33L, 2L, 37L, 23L, 14L, 42L, 38L,
19L, 32L, 34L, 27L, 28L, 47L, 26L, 3L, 13L, 31L, 39L, 16L, 45L,
5L, 49L, 17L, 29L, 43L, 1L, 35L, 41L, 10L, 48L, 24L, 12L, 11L,
30L, 40L, 8L, 4L, 20L, 25L, 50L, 22L, 9L, 21L, 18L, 7L, 15L,
44L, 6L, 36L, 46L, 33L, 2L, 37L, 23L, 14L, 42L, 38L, 19L, 32L
), Y = c(130L, 146L, 58L, 110L, 117L, 135L, 133L, 108L, 97L,
61L, 71L, 64L, 103L, 142L, 125L, 104L, 100L, 147L, 111L, 78L,
56L, 145L, 62L, 69L, 70L, 116L, 137L, 79L, 150L, 94L, 91L, 81L,
65L, 118L, 129L, 83L, 98L, 84L, 85L, 148L, 93L, 73L, 59L, 87L,
134L, 88L, 136L, 90L, 140L, 55L, 89L, 115L, 123L, 51L, 132L,
126L, 66L, 80L, 60L, 120L, 109L, 76L, 74L, 57L, 149L, 121L, 138L,
128L, 114L, 127L, 68L, 107L, 67L, 112L, 144L, 119L, 53L, 52L,
54L, 96L, 131L, 106L, 113L, 72L, 95L, 63L, 92L, 86L, 75L, 105L,
82L, 101L, 139L, 143L, 122L, 77L, 99L, 141L, 124L, 102L), B = structure(c(2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 1L), class = "factor", .Label = c("no", "yes"))), .Names = c("X",
"Y", "B"), row.names = c(NA, -100L), class = "data.frame")
Data2<-structure(list(variable = c(2.49676547444708, 0.67359598601097,
0.674751772966082, 0.0317590441796792, 0.485143583939748, 1.08231639527806,
0.0732344181040914, 1.62357048819912, 0.146833215667032, 0.823157103468943,
0.240761579418538, 1.37540376416553), DOY_mid_month = c(15, 46,
75, 106, 136, 167, 197, 228, 259, 289, 320, 350)), .Names = c("variable",
"DOY_mid_month"), row.names = c(NA, -12L), class = "data.frame")
test<-ggplot(data=Data) +
geom_rect(aes(xmin=5, xmax=30, ymin=1, ymax=40), alpha = 0.02) +
geom_point(aes(x = X, y = X, colour= B), data =Data, size=2) +
theme_bw()
test2 <-ggplot(data=Data2) +
geom_rect(aes(xmin=5, xmax=30, ymin=-Inf, ymax=Inf), alpha = 0.02) +
geom_point(aes(x = DOY_mid_month, y = variable), color="black", size=4) +
scale_x_continuous("Day of Year", limits = c(0, 366)) + # Use this to add back X-axis label for the bottom plot in panel
scale_y_continuous(expression(paste("Variable", sep=""))) +
theme_bw()
Plot result from first example:
Plot result from second example:
You are currently drawing one rectangle for each row of the dataset. The more rectangles you overlap, the darker they get, which is why the longer dataset has a darker rectangle. Use annotate instead of geom_rect to draw a single rectangle.
annotate(geom = "rect", xmin=5, xmax=30, ymin=-Inf, ymax=Inf, alpha = 0.2)
If you want to stick with geom_rect you can give a one row data.frame to that layer so that each rectangle is only drawn one time. Here I use a fake dataset, although you could put your rectangle limits in the data.frame, as well.
geom_rect(data = data.frame(fake = 1),
aes(xmin = 5, xmax= 30, ymin = -Inf, ymax = Inf), alpha = 0.2)

Resources