Related
I have a data.frame fish.test0 for which I want to grep specific variables (in varlist) matching the group column to create a sub-data.frame that will undergo a statistical test. The results of the test is saved in tests.res.t. I want to loop the varlist so that I get one results for each input in varlist
Script:
varlist <- c("Abiotrophia","Alphatorquevirus")
for (i in varlist) {
fish.test <- fish.test0[grep("i",fish.test0$group),]
column <- c("ACDC")
tests <- list()
dat_test <- sapply( column, function(colx)
lapply( unique(fish.test$Merge), function(x)
fisher.test( data.frame(
a=c(( fish.test[ which(fish.test$Merge %in% x)[2],"Present"] -
fish.test[ which(fish.test$Merge %in% x)[2], colx] ),fish.test[ which(fish.test$Merge %in% x)[2], colx]
),
b=c(( fish.test[ which(fish.test$Merge %in% x)[1],"NotPresent"] -
fish.test[ which(fish.test$Merge %in% x)[1], colx] ), fish.test[ which(fish.test$Merge %in% x)[1], colx]))) #,alternative = "greater"
) )
rownames(dat_test) <- unique(fish.test$Merge )
colnames(dat_test) <- column
tests.res <- sapply(dat_test[1:dim(dat_test)[1],1], function(x) {
c(x$estimate[1],
x$estimate[2],
ci.lower = x$conf.int[1],
ci.upper = x$conf.int[2],
p.value = x$p.value)
})
tests.res.t <- as.data.frame(t(tests.res))
}
test-data:
fish.test0 <- structure(list(Present = c(4L, 4L, 9L, 9L, 57L, 57L, 146L, 146L,
91L, 91L, 26L, 26L, 6L, 6L, 12L, 12L, 33L, 33L, 10L, 10L, 66L,
66L, 4L, 4L, 4L, 4L, 9L, 9L, 18L, 18L, 19L, 19L, 51L, 51L, 50L,
50L, 12L, 12L, 7L, 7L, 14L, 14L, 27L, 27L, 9L, 9L, 5L, 5L, 6L,
6L, 22L, 22L, 3L, 3L, 14L, 14L, 4L, 4L, 15L, 15L, 6L, 6L, 8L,
8L, 4L, 4L), NotPresent = c(11L, 11L, 44L, 44L, 126L, 126L, 532L,
532L, 382L, 382L, 97L, 97L, 14L, 14L, 43L, 43L, 85L, 85L, 41L,
41L, 336L, 336L, 19L, 19L, 27L, 27L, 67L, 67L, 108L, 108L, 81L,
81L, 240L, 240L, 258L, 258L, 47L, 47L, 31L, 31L, 82L, 82L, 110L,
110L, 63L, 63L, 178L, 178L, 672L, 672L, 451L, 451L, 120L, 120L,
104L, 104L, 47L, 47L, 387L, 387L, 94L, 94L, 300L, 300L, 133L,
133L), group = c("G__Abiotrophia_NotPresent_Anus", "G__Abiotrophia_Present_Anus",
"G__Abiotrophia_NotPresent_Bile duct", "G__Abiotrophia_Present_Bile duct",
"G__Abiotrophia_NotPresent_Bone/Soft tissue", "G__Abiotrophia_Present_Bone/Soft tissue",
"G__Abiotrophia_NotPresent_Breast", "G__Abiotrophia_Present_Breast",
"G__Abiotrophia_NotPresent_Colorectum", "G__Abiotrophia_Present_Colorectum",
"G__Abiotrophia_NotPresent_Esophagus", "G__Abiotrophia_Present_Esophagus",
"G__Abiotrophia_NotPresent_Gallbladder", "G__Abiotrophia_Present_Gallbladder",
"G__Abiotrophia_NotPresent_Head and neck", "G__Abiotrophia_Present_Head and neck",
"G__Abiotrophia_NotPresent_Kidney", "G__Abiotrophia_Present_Kidney",
"G__Abiotrophia_NotPresent_Liver", "G__Abiotrophia_Present_Liver",
"G__Abiotrophia_NotPresent_Lung", "G__Abiotrophia_Present_Lung",
"G__Abiotrophia_NotPresent_Lymphoid tissue", "G__Abiotrophia_Present_Lymphoid tissue",
"G__Abiotrophia_NotPresent_Mesothelium", "G__Abiotrophia_Present_Mesothelium",
"G__Abiotrophia_NotPresent_Nervous system", "G__Abiotrophia_Present_Nervous system",
"G__Abiotrophia_NotPresent_Ovary", "G__Abiotrophia_Present_Ovary",
"G__Abiotrophia_NotPresent_Pancreas", "G__Abiotrophia_Present_Pancreas",
"G__Abiotrophia_NotPresent_Prostate", "G__Abiotrophia_Present_Prostate",
"G__Abiotrophia_NotPresent_Skin", "G__Abiotrophia_Present_Skin",
"G__Abiotrophia_NotPresent_Small intestine", "G__Abiotrophia_Present_Small intestine",
"G__Abiotrophia_NotPresent_Stomach", "G__Abiotrophia_Present_Stomach",
"G__Abiotrophia_NotPresent_Unknown", "G__Abiotrophia_Present_Unknown",
"G__Abiotrophia_NotPresent_Urothelial tract", "G__Abiotrophia_Present_Urothelial tract",
"G__Abiotrophia_NotPresent_Uterus", "G__Abiotrophia_Present_Uterus",
"G__Alphatorquevirus_NotPresent_Bone/Soft tissue", "G__Alphatorquevirus_Present_Bone/Soft tissue",
"G__Alphatorquevirus_NotPresent_Breast", "G__Alphatorquevirus_Present_Breast",
"G__Alphatorquevirus_NotPresent_Colorectum", "G__Alphatorquevirus_Present_Colorectum",
"G__Alphatorquevirus_NotPresent_Esophagus", "G__Alphatorquevirus_Present_Esophagus",
"G__Alphatorquevirus_NotPresent_Kidney", "G__Alphatorquevirus_Present_Kidney",
"G__Alphatorquevirus_NotPresent_Liver", "G__Alphatorquevirus_Present_Liver",
"G__Alphatorquevirus_NotPresent_Lung", "G__Alphatorquevirus_Present_Lung",
"G__Alphatorquevirus_NotPresent_Pancreas", "G__Alphatorquevirus_Present_Pancreas",
"G__Alphatorquevirus_NotPresent_Skin", "G__Alphatorquevirus_Present_Skin",
"G__Alphatorquevirus_NotPresent_Urothelial tract", "G__Alphatorquevirus_Present_Urothelial tract"
), ABCD = c(3L, 2L, 17L, 6L, 34L, 18L, 240L, 53L, 321L, 73L,
87L, 25L, 6L, 3L, 20L, 8L, 15L, 7L, 19L, 4L, 265L, 42L, 6L, 1L,
4L, 2L, 22L, 4L, 70L, 13L, 54L, 12L, 116L, 33L, 58L, 11L, 6L,
2L, 26L, 6L, 42L, 8L, 74L, 18L, 19L, 3L, 52L, 0L, 288L, 5L, 377L,
17L, 110L, 2L, 19L, 3L, 21L, 2L, 298L, 9L, 60L, 6L, 68L, 1L,
89L, 3L), Total = c(15L, 15L, 53L, 53L, 183L, 183L, 678L, 678L,
473L, 473L, 123L, 123L, 20L, 20L, 55L, 55L, 118L, 118L, 51L,
51L, 402L, 402L, 23L, 23L, 31L, 31L, 76L, 76L, 126L, 126L, 100L,
100L, 291L, 291L, 308L, 308L, 59L, 59L, 38L, 38L, 96L, 96L, 137L,
137L, 72L, 72L, 183L, 183L, 678L, 678L, 473L, 473L, 123L, 123L,
118L, 118L, 51L, 51L, 402L, 402L, 100L, 100L, 308L, 308L, 137L,
137L), Merge = c("Abiotrophia_Anus", "Abiotrophia_Anus", "Abiotrophia_Bile duct",
"Abiotrophia_Bile duct", "Abiotrophia_Bone/Soft tissue", "Abiotrophia_Bone/Soft tissue",
"Abiotrophia_Breast", "Abiotrophia_Breast", "Abiotrophia_Colorectum",
"Abiotrophia_Colorectum", "Abiotrophia_Esophagus", "Abiotrophia_Esophagus",
"Abiotrophia_Gallbladder", "Abiotrophia_Gallbladder", "Abiotrophia_Head and neck",
"Abiotrophia_Head and neck", "Abiotrophia_Kidney", "Abiotrophia_Kidney",
"Abiotrophia_Liver", "Abiotrophia_Liver", "Abiotrophia_Lung",
"Abiotrophia_Lung", "Abiotrophia_Lymphoid tissue", "Abiotrophia_Lymphoid tissue",
"Abiotrophia_Mesothelium", "Abiotrophia_Mesothelium", "Abiotrophia_Nervous system",
"Abiotrophia_Nervous system", "Abiotrophia_Ovary", "Abiotrophia_Ovary",
"Abiotrophia_Pancreas", "Abiotrophia_Pancreas", "Abiotrophia_Prostate",
"Abiotrophia_Prostate", "Abiotrophia_Skin", "Abiotrophia_Skin",
"Abiotrophia_Small intestine", "Abiotrophia_Small intestine",
"Abiotrophia_Stomach", "Abiotrophia_Stomach", "Abiotrophia_Unknown",
"Abiotrophia_Unknown", "Abiotrophia_Urothelial tract", "Abiotrophia_Urothelial tract",
"Abiotrophia_Uterus", "Abiotrophia_Uterus", "Alphatorquevirus_Bone/Soft tissue",
"Alphatorquevirus_Bone/Soft tissue", "Alphatorquevirus_Breast",
"Alphatorquevirus_Breast", "Alphatorquevirus_Colorectum", "Alphatorquevirus_Colorectum",
"Alphatorquevirus_Esophagus", "Alphatorquevirus_Esophagus", "Alphatorquevirus_Kidney",
"Alphatorquevirus_Kidney", "Alphatorquevirus_Liver", "Alphatorquevirus_Liver",
"Alphatorquevirus_Lung", "Alphatorquevirus_Lung", "Alphatorquevirus_Pancreas",
"Alphatorquevirus_Pancreas", "Alphatorquevirus_Skin", "Alphatorquevirus_Skin",
"Alphatorquevirus_Urothelial tract", "Alphatorquevirus_Urothelial tract"
)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 10L, 9L, 12L,
11L, 13L, 14L, 16L, 15L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 28L, 27L, 29L, 30L, 31L, 32L, 34L, 33L, 35L, 36L, 38L,
37L, 40L, 39L, 42L, 43L, 45L, 44L, 47L, 46L, 1011L, 1012L, 1014L,
1013L, 1015L, 1016L, 1017L, 1018L, 1019L, 1020L, 1022L, 1021L,
1023L, 1024L, 1026L, 1025L, 1027L, 1028L, 1029L, 1030L), class = "data.frame")
This is probably not an answer but it should help to improve you code. If I'm terribly wrong, I'll remove my answer right away. I have loeft out the test business which I don't understand, but your problem seems to be extraction.
The first thing is that you need to remove the quotation marks in your grep command, try:
varlist <- c("Abiotrophia","Alphatorquevirus")
for( i in varlist )
{
# extract rows which contain the variable
fish.test <- fish.test0[ grep( i, fish.test0$group ), ]
print( head( fish.test ) )
}
From what I understand, you need to define column and tests outside your loop. Does that give you more of what you want:
varlist <- c("Abiotrophia","Alphatorquevirus")
column <- "ACDC"
tests <- list()
for( i in 1 : length( varlist ) ) # index can be used later to fill the list
{
# extract rows which contain the variable
fish.test <- fish.test0[ grep( varlist[ i ], fish.test0$group ), ]
# add a column with your name of choice
fish.test <- cbind( fish.test, c( 1: length( fish.test$group ) ) )
colnames( fish.test )[ length( fish.test ) ] <- column
# write each result into your defined list
tests[[ i ]] <- fish.test
}
I have found a useful mean imputation technique here
.
More specifically:
variable[is.na(variable)] <- rowMeans(cbind(variable[which(is.na(variable))-1],
variable[which(is.na(variable))+1]))
Which takes values before and after the missing one and imputes their mean.
However, since I have a large data frame with lots of variables I was wondering is there a way to iterate this function over every variable (column) in the df?
dput:
dput(head(politbar_timeseries,10))
structure(list(Month = structure(c(8401, 8432, 8460, 8491, 8521,
8552, 8582, 8613, 8644, 8674), class = "Date"), Intention_CDU = c(246L,
223L, 222L, 232L, 261L, 240L, 241L, NA, 234L, 211L), Intention_SPD = c(304L,
323L, 276L, 274L, 238L, 290L, 291L, NA, 284L, 296L), Intention_FDP = c(47L,
44L, 46L, 36L, 35L, 50L, 31L, NA, 33L, 40L), Intention_Green = c(112L,
90L, 108L, 97L, 92L, 93L, 80L, NA, 131L, 97L), Intention_PDS = c(1L,
2L, 1L, 4L, 2L, 4L, 6L, NA, 3L, 1L), Intention_Right = c(40L,
45L, 51L, 44L, 48L, 26L, 30L, NA, 33L, 39L), CDU_CSU_Scale = c(5.53364976051333,
5.41668954145634, 5.41361737597252, 5.53237142973321, 5.90556125077522,
5.65325991093138, 5.66581907651607, NA, 5.7568395653053, 5.56722081960557
), SPD_Scale = c(6.68501038883942, 7.0740019675866, 6.31415136355633,
6.52447895467401, 6.29176231355408, 6.52870415235848, 6.73302006301497,
NA, 7.12547563426403, 7.17833309669175), FDP_Scale = c(5.34570000100596,
5.73343004031828, 5.52174547729524, 5.39618098094715, 5.81980921102384,
5.64326882828348, 5.70136552543044, NA, 5.3836387964029, 5.73726720856055
), Grüne_Scale = c(5.73191750379599, 6.03715643205545, 6.19893648691653,
5.96106479727169, 5.78436018957346, 5.54482751153172, 5.6213169156508,
NA, 6.42776109093573, 6.33016932291559), Republikaner_Scale = c(2.33415238404679,
2.40200426439232, 2.50591428720572, 2.45599753445912, 2.61170073660812,
2.26120872300811, 2.24409536048212, NA, 2.29699201198203, 2.25876734042663
), PDS_Scale = c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NA, NaN,
NaN)), .Names = c("Month", "Intention_CDU", "Intention_SPD",
"Intention_FDP", "Intention_Green", "Intention_PDS", "Intention_Right",
"CDU_CSU_Scale", "SPD_Scale", "FDP_Scale", "Grüne_Scale", "Republikaner_Scale",
"PDS_Scale"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 249L,
8L, 9L), class = "data.frame")
I have interesting data that is not uniform. A group of items are listed under the category name, but it is all in the same column. I need to add a column with the row corresponding to the item's category that it belongs to (then remove the category heading). The only way to distinguish a new category is determining whether the value under the year is empty.... My dputs should explain my issue more clearly.
Before:
structure(list(X = structure(c(13L, 1L, 19L, 16L, 5L, 17L, 11L,
8L, 2L, 10L, 4L, 6L, 18L, 15L, 21L, 12L, 14L, 9L, 3L, 20L, 7L
), .Label = c("-Burgers", "-Cameras", "-Shirts", "+Laptops",
"+Salads", "+TVs", "Caps", "Cell", "Clothes:", "Desktops", "Electronics",
"Flowers", "Food", "Garden Nomes", "Grills", "Hotdogs", "Nachoes",
"Outdoors:", "Pizza", "Shorts", "Swimming Gear"), class = "factor"),
X2000 = c(NA, 104L, 159L, 184L, 189L, 182L, NA, 49L, 28L,
46L, 34L, 43L, NA, 129L, 190L, 189L, 119L, NA, 45L, 80L,
80L), X2001 = c(NA, 147L, 192L, 164L, 174L, 196L, NA, 40L,
34L, 43L, 35L, 22L, NA, 114L, 130L, 120L, 145L, NA, 56L,
35L, 54L), X2002 = c(NA, 163L, 172L, 138L, 146L, 190L, NA,
38L, 40L, 21L, 22L, 33L, NA, 186L, 172L, 139L, 119L, NA,
88L, 78L, 91L), X2003 = c(NA, 125L, 152L, 182L, 148L, 125L,
NA, 36L, 44L, 34L, 27L, 50L, NA, 119L, 115L, 188L, 166L,
NA, 91L, 77L, 77L), X2004 = c(NA, 116L, 111L, 120L, 153L,
199L, NA, 49L, 48L, 43L, 37L, 32L, NA, 159L, 116L, 143L,
153L, NA, 18L, 53L, 51L)), .Names = c("X", "X2000", "X2001",
"X2002", "X2003", "X2004"), class = "data.frame", row.names = c(NA,
-21L))
After:
structure(list(X = structure(c(1L, 15L, 13L, 5L, 14L, 8L, 2L,
9L, 4L, 6L, 12L, 17L, 10L, 11L, 3L, 16L, 7L), .Label = c("-Burgers",
"-Cameras", "-Shirts", "+Laptops", "+Salads", "+TVs", "Caps",
"Cell", "Desktops", "Flowers", "Garden Nomes", "Grills", "Hotdogs",
"Nachoes", "Pizza", "Shorts", "Swimming Gear"), class = "factor"),
X.1 = structure(c(3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 1L, 1L, 1L), .Label = c("Clothes:", "Electronics",
"Food", "Outdoors:"), class = "factor"), X2000 = c(104L,
159L, 184L, 189L, 182L, 49L, 28L, 46L, 34L, 43L, 129L, 190L,
189L, 119L, 45L, 80L, 80L), X2001 = c(147L, 192L, 164L, 174L,
196L, 40L, 34L, 43L, 35L, 22L, 114L, 130L, 120L, 145L, 56L,
35L, 54L), X2002 = c(163L, 172L, 138L, 146L, 190L, 38L, 40L,
21L, 22L, 33L, 186L, 172L, 139L, 119L, 88L, 78L, 91L), X2003 = c(125L,
152L, 182L, 148L, 125L, 36L, 44L, 34L, 27L, 50L, 119L, 115L,
188L, 166L, 91L, 77L, 77L), X2004 = c(116L, 111L, 120L, 153L,
199L, 49L, 48L, 43L, 37L, 32L, 159L, 116L, 143L, 153L, 18L,
53L, 51L)), .Names = c("X", "X.1", "X2000", "X2001", "X2002",
"X2003", "X2004"), class = "data.frame", row.names = c(NA, -17L
))
The items are arbitrarily have + or - signs...I need that to remain the same. Also, some category headers have : while others do not.
We create an index based on the 'NA' values in columns other than the 1st ('indx'). We split the dataset using the 'indx', remove the first row i.e. NA values from columns 2nd to the last, cbind with the 1st row, 1st column value, rearrange the columns and rbind.
indx <- cumsum(!rowSums(!is.na(df1[-1])))
res <- do.call(rbind,lapply(split(df1, indx), function(x)
cbind(x, X.1= x[1,1])[-1,c(1,7,2:6)]))
row.names(res) <- NULL
all.equal(res, out, check.attributes=FALSE)
#[1] TRUE
where 'out' is the dput output of the expected result
Update
If the columns have '' instead of NA,
indx <- cumsum(!rowSums(df1[-1]!=''))
and do the rest as above. Having said that, when we have '' in a numeric column, the class will be either factor or character based on whether you specify stringsAsFactors=FALSE or =TRUE in the read.table/read.csv. So, keeping the '' as such will get the output also a factor/character class. I would convert the columns to their correct class first which will also coerce the '' to NA, i.e.
df1[-1] <- lapply(df1[-1], function(x) as.numeric(as.character(x)))
The as.character is only needed if the columns are factor class.
Once, we have done the conversion, the first approach should work fine as well.
So I have R program, and am struggling with getting all points in map
library(ggmap)
library(ggplot2)
setwd("d:/GIS/")
sep <- read.csv("SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
Percent_SEP12_Assets <- ifelse(sep[,8] >= 50, "Over 50", "Under 50")
# get the map
map <- get_map("Kissena Park, Queens", zoom = 13, maptype = 'roadmap')
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
And here is output map
I wish to zoom in enough without cutting out data points, but no matter location I pick on map, the data keeps getting cut, i.e. Removed 2 rows containing missing values (geom_point).
Is there a way to set boundaries based on the extremities of latitude and longitude? The csv I import at
sep <- read.csv("SEP_assets_csv.csv")
Has list of latitude and longitude.
Help!
Coordinates
Latitude Longitude
40.758365 -73.824407
40.774168 -73.818543
40.761748 -73.811379
40.765602 -73.828293
40.751762 -73.81778
40.764834 -73.789712
40.777951 -73.842932
40.76501 -73.794319
40.785959 -73.817349
40.755764 -73.799256
40.745593 -73.829283
40.789929 -73.839501
40.760072 -73.783908
40.726437 -73.807592
40.741093 -73.808757
40.720926 -73.823358
40.729642 -73.81781
40.724191 -73.80937
40.782346 -73.77844
40.778164 -73.799841
40.775122 -73.8185
40.760344 -73.817909
40.792326 -73.809516
40.78322 -73.806977
40.73106 -73.805449
40.736521 -73.813001
40.783714 -73.795027
40.770194 -73.82762
40.735855 -73.823583
40.74943 -73.82141
40.769753 -73.832001
40.754465 -73.826204
40.738775 -73.823892
40.764868 -73.826819
40.738332 -73.82028
40.735017 -73.821339
40.72535 -73.811325
40.721466 -73.820401
dput
> dput(sep)
structure(list(School = structure(1:38, .Label = c("Queens\\25Q020",
"Queens\\25Q021", "Queens\\25Q022", "Queens\\25Q023", "Queens\\25Q024",
"Queens\\25Q025", "Queens\\25Q029", "Queens\\25Q032", "Queens\\25Q079",
"Queens\\25Q107", "Queens\\25Q120", "Queens\\25Q129", "Queens\\25Q130",
"Queens\\25Q154", "Queens\\25Q163", "Queens\\25Q164", "Queens\\25Q165",
"Queens\\25Q168", "Queens\\25Q169", "Queens\\25Q184", "Queens\\25Q185",
"Queens\\25Q189", "Queens\\25Q193", "Queens\\25Q194", "Queens\\25Q200",
"Queens\\25Q201", "Queens\\25Q209", "Queens\\25Q214", "Queens\\25Q219",
"Queens\\25Q237", "Queens\\25Q242", "Queens\\25Q244", "Queens\\25Q425",
"Queens\\25Q460", "Queens\\25Q499", "Queens\\25Q515", "Queens\\25Q707",
"Queens\\25Q792"), class = "factor"), Latitude = c(40.758365,
40.774168, 40.761748, 40.765602, 40.751762, 40.764834, 40.777951,
40.76501, 40.785959, 40.755764, 40.745593, 40.789929, 40.760072,
40.726437, 40.741093, 40.720926, 40.729642, 40.724191, 40.782346,
40.778164, 40.775122, 40.760344, 40.792326, 40.78322, 40.73106,
40.736521, 40.783714, 40.770194, 40.735855, 40.74943, 40.769753,
40.754465, 40.738775, 40.764868, 40.738332, 40.735017, 40.72535,
40.721466), Longitude = c(-73.824407, -73.818543, -73.811379,
-73.828293, -73.81778, -73.789712, -73.842932, -73.794319, -73.817349,
-73.799256, -73.829283, -73.839501, -73.783908, -73.807592, -73.808757,
-73.823358, -73.81781, -73.80937, -73.77844, -73.799841, -73.8185,
-73.817909, -73.809516, -73.806977, -73.805449, -73.813001, -73.795027,
-73.82762, -73.823583, -73.82141, -73.832001, -73.826204, -73.823892,
-73.826819, -73.82028, -73.821339, -73.811325, -73.820401), Windows.SEP.11 = c(48L,
154L, 11L, 62L, 20L, 72L, 9L, 37L, 8L, 22L, 9L, 47L, 44L, 99L,
78L, 91L, 42L, 122L, 55L, 14L, 162L, 108L, 89L, 87L, 23L, 14L,
75L, 74L, 141L, 73L, 43L, 14L, 534L, 189L, 128L, 10L, 79L, 38L
), Mac.SEP.11 = c(49L, 0L, 180L, 2L, 202L, 116L, 41L, 1L, 17L,
22L, 33L, 43L, 1L, 28L, 2L, 0L, 238L, 13L, 76L, 55L, 76L, 42L,
0L, 1L, 12L, 0L, 16L, 10L, 1L, 7L, 0L, 1L, 1L, 67L, 16L, 7L,
31L, 24L), Windows.SEP.12 = c(52L, 252L, 1L, 2L, 12L, 45L, 108L,
15L, 14L, 4L, 19L, 21L, 46L, 90L, 10L, 86L, 15L, 76L, 122L, 2L,
9L, 52L, 39L, 120L, 43L, 17L, 9L, 54L, 19L, 199L, 40L, 25L, 64L,
164L, 14L, 27L, 45L, 2L), Mac.SEP.12 = c(73L, 2L, 91L, 53L, 288L,
6L, 2L, 107L, 109L, 97L, 41L, 18L, 12L, 16L, 2L, 2L, 270L, 32L,
45L, 92L, 54L, 190L, 1L, 4L, 19L, 53L, 1L, 10L, 0L, 61L, 50L,
27L, 27L, 25L, 3L, 1L, 43L, 0L), newCol = c(56.3063063063063,
62.2549019607843, 32.5088339222615, 46.218487394958, 57.4712643678161,
21.3389121338912, 68.75, 76.25, 83.1081081081081, 69.6551724137931,
58.8235294117647, 30.2325581395349, 56.3106796116505, 45.4935622317597,
13.0434782608696, 49.1620111731844, 50.4424778761062, 44.4444444444444,
56.0402684563758, 57.6687116564417, 20.9302325581395, 61.734693877551,
31.0077519379845, 58.4905660377358, 63.9175257731959, 83.3333333333333,
9.9009900990099, 43.2432432432432, 11.8012422360248, 76.4705882352941,
67.6691729323308, 77.6119402985075, 14.5367412140575, 42.4719101123596,
10.5590062111801, 62.2222222222222, 44.4444444444444, 3.125)), .Names = c("School",
"Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12",
"Mac.SEP.12", "newCol"), row.names = c(NA, -38L), class = "data.frame")
You haven't provided us with any of the data, so I'm going to give an example using a dataset in the historydata package. Instead of getting a map based on a location and a zoom, you can get a map based on the bounding box of the latitudes and longitudes in your dataset.
library(historydata)
library(ggmap)
data("catholic_dioceses")
bbox <- make_bbox(catholic_dioceses$long, catholic_dioceses$lat, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=catholic_dioceses, aes(x = long, y = lat))
Note that the f = argument to make_bbox() lets you control how much padding there is around your map.
In your case, I think this will work:
library(ggmap)
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude,
color = Percent_SEP12_Assets),
size = 9, alpha = 0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
I would like to make a bubble plot using ggplot2 in R. My code and data are found below.
Please leave the colors as they are. I am having difficulties in scaling positive and negative values equally. For example, -3 is scaled smaller than +3. I would like negatives and positives to be scaled proportionately irrespective of sign.
Identify negative from positive values using some kind of outline linetype for bubbles and include it in the legend.
Also remove the "Mean" part of the legend.
Thanks very much for your great help.
#=====================================================================
library(ggplot2)
if (dev.cur() == 1) x11(width=8,height=6)
par(mfcol=c(1,1))
p<-ggplot(site.resiudal, aes(x=Eastings, y=Northings, size=Mean,label=site.resiudal$Site,legend = FALSE))+
#theme(legend.position="none")+
geom_point(shape=21)+
geom_point(aes(colour = factor(Region)))+
scale_area(range=c(1,15))+
scale_alpha(guide = 'none')+
scale_x_continuous(name="Longitude", limits=c(-120,-95))+
scale_y_continuous(name="Latitude", limits=c(48,61))+
geom_text(size=4)+
scale_colour_manual(name="Region",labels = c("A", "B","C","D", "E"),values = c("1" = "firebrick3","2" = "palegreen4","3" = "sandybrown","4" = "red","5" = "gray0"))+
theme(legend.title = element_text(colour="black", size=16, face="plain"))+
theme(legend.text = element_text(colour="black", size = 16, face = "plain"))
p
#Data[["sign"]] = ifelse(Data[["Mean"]] >= 0, "positive", "negative")
#=================================================
structure(list(Site = structure(c(101L, 102L, 105L, 107L, 108L,
110L, 111L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 16L, 20L, 47L, 52L, 53L, 55L, 91L, 92L, 93L, 94L, 95L,
96L, 99L, 15L, 17L, 18L, 19L, 21L, 114L, 23L, 26L, 36L, 59L,
60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L,
73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L,
86L, 87L, 88L, 89L, 98L, 100L, 103L, 104L, 106L, 109L, 112L,
113L, 115L, 116L, 117L, 119L, 42L, 44L, 46L, 48L, 49L, 50L, 51L,
54L, 56L, 57L, 58L, 90L, 97L, 118L, 120L, 22L, 24L, 25L, 27L,
28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 37L, 38L, 39L, 40L, 41L,
43L, 45L), .Label = c("G100", "G101", "G102", "G103", "G104",
"G105", "G106", "G107", "G108", "G109", "G110", "G111", "G112",
"G113", "G114", "G115", "G116", "G117", "G118", "G119", "G120",
"GG10", "GG11", "GG12", "GG13", "GG14", "GG15", "GG16", "GG17",
"GG18", "GG19", "GG20", "GG21", "GG22", "GG23", "GG24", "GG25",
"GG26", "GG27", "GG28", "GG29", "GG30", "GG31", "GG32", "GG33",
"GG34", "GG35", "GG36", "GG37", "GG38", "GG39", "GG40", "GG41",
"GG42", "GG43", "GG44", "GG45", "GG46", "GG47", "GG48", "GG49",
"GG50", "GG51", "GG52", "GG53", "GG54", "GG55", "GG56", "GG57",
"GG58", "GG59", "GG60", "GG61", "GG62", "GG63", "GG64", "GG65",
"GG66", "GG67", "GG68", "GG69", "GG70", "GG71", "GG72", "GG73",
"GG74", "GG75", "GG76", "GG77", "GG78", "GG79", "GG80", "GG81",
"GG82", "GG83", "GG84", "GG85", "GG86", "GG87", "GG88", "GG89",
"GG90", "GG91", "GG92", "GG93", "GG94", "GG95", "GG96", "GG97",
"GG98", "GG99", "GGG1", "GGG2", "GGG3", "GGG4", "GGG5", "GGG6",
"GGG7", "GGG8", "GGG9"), class = "factor"), Name = structure(c(53L,
87L, 29L, 92L, 36L, 76L, 102L, 103L, 119L, 2L, 9L, 11L, 45L,
47L, 49L, 54L, 90L, 30L, 105L, 66L, 78L, 107L, 81L, 42L, 41L,
43L, 59L, 110L, 24L, 27L, 56L, 61L, 64L, 118L, 40L, 21L, 44L,
70L, 108L, 25L, 58L, 98L, 83L, 5L, 19L, 26L, 31L, 38L, 55L, 60L,
71L, 74L, 75L, 85L, 95L, 120L, 109L, 1L, 67L, 20L, 50L, 63L,
106L, 111L, 116L, 62L, 6L, 99L, 114L, 73L, 84L, 89L, 93L, 97L,
115L, 80L, 10L, 12L, 88L, 79L, 15L, 17L, 33L, 35L, 94L, 100L,
3L, 16L, 37L, 101L, 117L, 8L, 39L, 48L, 86L, 113L, 23L, 13L,
69L, 96L, 104L, 32L, 65L, 82L, 14L, 22L, 18L, 46L, 68L, 72L,
77L, 91L, 112L, 4L, 7L, 28L, 51L, 57L, 52L, 34L), .Label = c("ANEROID",
"ARBORG", "ATHABASCA", "BANFF", "BANGOR", "BATTLEFORD", "BEAVER MINES",
"BEAVERLODGE", "BERENS RIVER", "BIRTLE", "BISSETT", "BRANDON",
"BUFFALO NARROWS", "CALGARY", "CALMAR", "CAMPSIE", "CAMROSE",
"CARWAY", "CEYLON", "CHAPLIN", "CHURCHILL", "CLARESHOLM", "COLD LAKE",
"COLLINS BAY", "CORONATION", "COTE", "CREE LAKE", "CROWSNEST",
"CYPRESS RIVER", "DAUPHIN", "DAVIDSON", "DRUMHELLER", "EDMONTON",
"EDSON", "ELK POINT", "EMERSON AUT", "ENILDA-BERG", "ESTEVAN",
"FAIRVIEW", "FLIN FLON", "FORT CHIPEWYAN", "FORT MCMURRAY", "FORT VERMILION",
"GILLAM", "GIMLI", "GLEICHEN", "GRAND RAPIDS", "GRANDE PRAIRIE",
"GREAT FALLS", "HIGH POINT", "HIGHWOOD", "HINTON VALLEY", "HUDSON BAY",
"INDIAN BAY", "INDIAN HEAD", "ISLAND FALLS", "JASPER WARDEN",
"JENNER", "KEG RIVER RS", "KELLIHER", "KEY LAKE", "KINDERSLEY",
"KLINTONEL", "LA RONGE", "LACOMBE 2", "LANGRUTH WEST", "LEADER",
"LETHBRIDGE", "LOON LAKE", "LYNN LAKE", "MANOR", "MEDICINE HAT",
"MELFORT", "MOOSE JAW", "MOOSOMIN", "MORDEN", "MOUNTAIN VIEW",
"NEEPAWA MURRAY", "NINETTE", "NIPAWIN", "NORWAY HOUSE", "OLDS",
"ONEFOUR", "OUTLOOK", "PASWEGIN", "PEACE RIVER", "PELLY", "PIERSON",
"PILGER", "PINAWA WNRE", "PINCHER CREEK ", "PORTAGE PRAIRIE",
"PRINCE ALBERT", "RANFURLY", "REGINA", "ROCKY MT HOUSE ", "SASKATOON",
"SCOTFIELD", "SCOTT", "SION", "SLAVE LAKE", "SPRAGUE", "STEINBACH",
"STETTLER NORTH", "SWAN RIVER", "SWIFT CURRENT", "THE PAS", "THOMPSON",
"TONKIN", "URANIUM CITY ", "VAL-MARIE", "VAUXHALL", "WABASCA RS",
"WASECA", "WASKESIU LAKE", "WEST POPLAR", "WHITECOURT", "WHITESAND DAM",
"WINNIPEG", "YELLOW GRASS"), class = "factor"), Mean = c(-0.020525899,
0.333863493, 0.210353772, NA, NA, 0.093520458, 0.341295298, NA,
-0.175074657, 0.09834825, 0.075610648, NA, -0.117503802, 0.18309367,
0.25246942, 0.221329766, 0.072167004, -0.094766032, NA, NA, 0.19783711,
-0.166351357, -0.0996169, -0.038555432, -0.028092042, 0.297855371,
0.108263891, 0.002057761, 0.327731415, NA, 0.180100638, 0.193837736,
-0.003306948, 0.178881894, 0.3655509, -0.235975798, -0.176154056,
-0.080433735, -0.110955273, -0.228010105, 0.048103255, -0.116681527,
-0.073042421, NA, NA, 0.035356012, 0.297171565, -0.197834719,
0.036412958, 0.055218077, NA, -0.236229087, 0.265211081, 0.271625885,
-0.293179359, 0.113744571, -0.207770026, 0.100471248, -0.071569464,
NA, NA, NA, -0.052716493, 0.057385851, 0.090340517, -0.30456625,
-0.234420722, 0.082287977, 0.009973663, NA, -0.06405062, 0.074703356,
-0.208329196, -0.272401078, 0.217991554, -0.043619919, -0.208901155,
-0.020022401, 0.111495318, NA, 0.38239749, 0.199136959, -0.177740258,
NA, 0.147515615, 0.309306538, 0.298741467, 0.068170296, NA, -0.02102765,
0.001754313, -0.010196512, 0.108254156, -0.228183063, -0.196261239,
NA, -0.167054722, 0.039949534, 0.154337034, -0.020855461, 0.136010278,
NA, 0.096997744, NA, -0.241963754, 0.660176529, 0.423554314,
0.190305726, -0.210778787, -0.261148915, NA, 0.054264129, -0.098706619,
-0.138776994, NA, NA, NA, -0.113823745, 0.373292721, -0.047060083
), Eastings = c(-102.5800018, -101.8700027, -99.08000183, -98.26999664,
-97.23000336, -98.08000183, -95.59999847, -96.76999664, -97.23000336,
-97.08000183, -97.02999878, -95.69999695, -97.01999664, -99.27999878,
-96, -95.19999695, -96.06999969, -100.0500031, -101.2300034,
-98.80000305, -99.56999969, -101.0999985, -97.84999847, -111.2200012,
-111.1200027, -116.0299988, -117.6200027, -108.4800034, -103.6999969,
-107.1299973, -102.3499985, -105.6200027, -105.2699966, -103.1500015,
-101.8799973, -94.06999969, -94.72000122, -101.0800018, -97.87000275,
-111.4499969, -111.1999969, -111.3499985, -110.4700012, -102.2799988,
-104.6500015, -101.7799988, -105.9800034, -102.9700012, -103.6500015,
-103.75, -102.0999985, -105.5500031, -101.6699982, -103.9199982,
-104.6699982, -104.1800003, -102.2300034, -107.3000031, -109.5,
-106.6500015, -107.9300003, -108.9199982, -107.7300034, -107.8499985,
-106.3799973, -109.1800003, -108.25, -108.8300018, -109.4000015,
-104.5999985, -107.0500031, -105.1500015, -105.6699982, -106.7200012,
-106.0699997, -104, -101.0500031, -99.94999695, -101.2699966,
-99.65000153, -113.8499985, -112.8199997, -113.5800018, -111.0699997,
-111.7300034, -114.1200027, -113.2799988, -114.6800003, -116.3000031,
-114.7799988, -115.7799988, -119.4000015, -118.5299988, -118.8799973,
-117.4499969, -113.8300018, -110.2799988, -108.4300003, -109.0999985,
-114.9199982, -112.7200012, -112.8700027, -113.75, -114.0999985,
-114.0199966, -113.7300034, -113.3799973, -113.0500031, -112.8000031,
-110.7200012, -113.6299973, -113.9800034, -112.1299973, -115.5500031,
-114.1800003, -114.4800034, -114.3700027, -118.0299988, -117.5299988,
-116.4499969), Northings = c(52.88000107, 52.08000183, 49.54999924,
49.95000076, 49, 49.18000031, 49.02000046, 49.52999878, 49.91999817,
50.93000031, 52.34999847, 51.02999878, 50.63000107, 53.15000153,
50.47000122, 49.61999893, 50.18000031, 51.09999847, 52.11999893,
50.41999817, 50.15000153, 53.97000122, 53.97000122, 56.65000153,
58.77000046, 58.38000107, 57.75, 59.56999969, 58.18000031, 57.34999847,
55.52999878, 57.25, 55.15000153, 56.22999954, 54.77000046, 58.72999954,
56.34999847, 56.86999893, 55.79999924, 52.06999969, 50.72000122,
51.58000183, 49.11999893, 50.90000153, 49.38000107, 51.52000046,
51.27000046, 49.22000122, 50.54999924, 51.25, 49.61999893, 50.33000183,
50.13000107, 51.97999954, 50.43000031, 49.81999969, 51.20000076,
49.72000122, 50.90000153, 50.47000122, 50.97999954, 49.68000031,
50.27000046, 49.36999893, 49, 51.52000046, 52.77000046, 52.36999893,
53.13000107, 52.81999969, 51.47999954, 52.41999817, 53.22000122,
52.16999817, 53.91999817, 53.33000183, 50.43000031, 49.91999817,
49.18000031, 49.41999817, 53.27999878, 53.02999878, 53.31999969,
53.88000107, 53.41999817, 53.88000107, 54.72000122, 54.13000107,
55.41999817, 55.27999878, 54.15000153, 55.20000076, 56.08000183,
55.18000031, 56.22999954, 55.97000122, 54.41999817, 55.83000183,
54.04999924, 52.41999817, 52.33000183, 51.47000122, 52.45000076,
51.77999878, 51.11999893, 49.93000031, 49, 50.88000107, 49.63000107,
50.02000046, 49.13000107, 49.52000046, 50.04999924, 51.20000076,
49.47000122, 49.63000107, 50.54999924, 52.93000031, 53.40000153,
53.58000183), Region = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L)), .Names = c("Site",
"Name", "Mean", "Eastings", "Northings", "Region"), class = "data.frame", row.names = c(NA,
-120L))
#============================================
use abs()
eg
instead of size=Mean use size=abs(Mean)
Then, you can track the sign using shape (or some other aesthetic, but color and size are already taken)
replace:
geom_point(shape=21)+
geom_point(aes(colour = factor(Region))) +
with the single line
geom_point(aes(shape=factor(sign(Mean)), colour = factor(Region))) +
If you'd like, you can also add lines such as
scale_shape_discrete(name="Mean Is", breaks=c(-1, 1), labels=c("Negative", "Positive"))
guides(size=FALSE)