How to change values more efficiently - r
I want to replace the , with a . as the decimal point in a dataframe. I can do this with df$X2005 <- as.numeric(gsub(',', '.', df$X2005)) for each variable. Is there a more efficient way to do this for the whole dataframe at once?
Some example data:
df <- structure(list(country = structure(1:6, .Label = c("Australia", "Austria", "Belgium", "Canada", "Chile", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Israel", "Italy", "Japan", "Korea", "Luxembourg", "Mexico", "Netherlands", "New Zealand", "Norway", "Poland", "Portugal", "Slovak Republic", "Slovenia", "Spain", "Sweden", "Switzerland", "Turkey", "United Kingdom", "United States"), class = "factor"), X2005 = structure(c(26L, 2L, 34L, 33L, 13L, 14L), .Label = c("10,3533", "10,4187", "10,8089", "10,8629", "10,882", "11,0173", "15,8399", "5,0226", "5,4488", "5,6273", "5,8713", "6,2137", "6,6397", "6,9339", "7,0448", "7,5719", "7,8534", "7,9457", "8,1819", "8,2668", "8,2883", "8,3556", "8,394", "8,4295", "8,4456", "8,4794", "8,7437", "9,0304", "9,0615", "9,4427", "9,6618", "9,77", "9,8295", "9,9833"), class = "factor"), X2006 = structure(c(25L, 2L, 31L, 34L, 13L, 14L), .Label = c("10,0326", "10,2177", "10,3877", "10,6374", "10,7468", "10,9516", "15,9368", "5,0169", "5,6845", "5,8109", "6,1019", "6,2008", "6,285", "6,6937", "7,3477", "7,5148", "7,5836", "7,7495", "8,1986", "8,2586", "8,2807", "8,3448", "8,39", "8,4289", "8,5204", "8,564", "8,8247", "8,8401", "8,948", "9,1292", "9,4811", "9,7487", "9,9243", "9,9621"), class = "factor"), X2007 = structure(c(27L, 3L, 31L, 1L, 14L, 13L), .Label = c("10,0263", "10,2099", "10,2617", "10,4771", "10,7642", "10,8754", "16,1608", "5,1597", "5,7779", "6,0372", "6,3331", "6,3858", "6,5223", "6,5494", "7,1288", "7,6299", "7,6744", "7,7553", "7,8565", "7,9023", "8,043", "8,2295", "8,4769", "8,4908", "8,5014", "8,504", "8,5531", "8,746", "8,9172", "9,0913", "9,5254", "9,8104", "9,9873", "9,9942"), class = "factor"), X2008 = structure(c(26L, 6L, 34L, 4L, 17L, 15L), .Label = c("10,1268", "10,183", "10,2189", "10,2537", "10,289", "10,4896", "10,7042", "10,9909", "11,0232", "16,6201", "5,8474", "6,0577", "6,0745", "6,586", "6,8189", "6,8863", "7,1361", "7,1819", "7,4631", "7,7052", "8,0208", "8,3068", "8,3457", "8,5513", "8,605", "8,751", "8,8915", "8,9402", "8,9521", "9,0591", "9,1344", "9,2284", "9,3051", "9,9128"), class = "factor"), X2009 = structure(c(24L, 8L, 5L, 9L, 21L, 22L), .Label = c("", "10,0115", "10,0496", "10,1957", "10,5938", "10,8137", "11,0005", "11,1729", "11,3992", "11,4722", "11,7314", "11,7516", "11,8823", "17,6706", "6,4098", "7,039", "7,1018", "7,2127", "7,6797", "7,7356", "7,8649", "7,9514", "7,9657", "9,0423", "9,152", "9,17", "9,1947", "9,4037", "9,5258", "9,6247", "9,636", "9,6743", "9,9056", "9,939"), class = "factor"), X2010 = structure(c(23L, 6L, 3L, 8L, 18L, 19L), .Label = c("", "10,1995", "10,503", "10,797", "10,8817", "11,0318", "11,0751", "11,3738", "11,5495", "11,677", "12,0661", "17,6911", "6,1782", "6,3394", "7,0229", "7,1675", "7,2911", "7,37", "7,4319", "7,6856", "8,0302", "8,8718", "8,9481", "8,9888", "8,995", "9,2925", "9,312", "9,4079", "9,4224", "9,4688", "9,5277", "9,5504", "9,589", "9,6074"), class = "factor"), X2011 = structure(c(NA, 5L, 4L, 8L, 18L, 17L), .Label = c("", "10,2345", "10,2844", "10,5139", "10,7769", "10,8671", "11,0148", "11,1784", "11,3323", "11,6343", "11,9369", "17,683", "5,922", "6,6464", "6,8709", "7,3692", "7,5011", "7,5206", "7,7333", "7,8877", "7,9416", "8,8501", "8,9042", "9,0019", "9,027", "9,1296", "9,2256", "9,2837", "9,2969", "9,4184", "9,4661"), class = "factor"), X2012 = structure(c(NA, NA, NA, 2L, 5L, NA), .Label = c("", "11,2132", "11,2955", "7,5249", "7,6077", "7,8226", "8,7596", "8,923", "9,148", "9,167", "9,3722"), class = "factor")), .Names = c("country", "X2005", "X2006", "X2007", "X2008", "X2009", "X2010", "X2011", "X2012"), row.names = c(NA, 6L), class = "data.frame")
You can instead read your data in the correct format.
If you use read.table for instance, change the dec = "," parameter. You'll probably need to change the na.strings = "<NA>" parameter as well.
You could use a for loop:
for (col in names(df[,-1])) {
df[,col] <- as.numeric(gsub(",", ".", df[,col]))
}
Related
How can I make several lines in ggplot with several group layers?
I have divided my plots into 2 based on Sportbook and Casino. How is it possible to also split line into several lines (different colors) to show different markets? I tried to use fill=market at the end of ggplot function, however it did not help. library(ggplot2) data<-structure(list(wday = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"), class = "factor"), market = c("France", "France", "Germany", "Germany", "Poland", "Poland", "France", "France", "Germany", "Germany", "Poland", "Poland", "France", "France", "Germany", "Germany", "Poland", "Poland", "France", "France", "Germany", "Germany", "Poland", "Poland", "France", "France", "Germany", "Germany", "Poland", "Poland", "France", "France", "Germany", "Germany", "Poland", "Poland", "France", "France", "Germany", "Germany", "Poland", "Poland"), product_preference = c("Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook" ), ggr = c(3349.80897892753, 161.917715712988, 17700.4568364611, -123.342131455399, 17208.7731385281, 3128.51277864992, 2877.17330617787, 28.5162781278127, 13453.7092912371, -82.8980672268908, 13611.1197727273, 9910.32070866143, 3939.20578803854, 126.311590466926, 19097.2664228723, -94.5491666666667, 16706.9427008929, 2636.63687707641, 3393.43150322119, 176.953280238925, 23414.9515950069, -72.4428986866791, 16140.8680085653, 5618.00758333333, 3007.18322084806, 69.4383454281568, 18018.1755748663, -77.87698, 19889.0339183673, 5561.69038585209, 4205.12735472371, -16.0552268431002, 17166.1121932115, -117.149356025759, 18527.8546597938, 6806.36808346213, 3446.70375835385, 56.6674850849013, 18026.2400535475, -67.3431629701062, 13641.4965135699, 11470.3083969466)), row.names = c(NA, -42L), groups = structure(list(wday = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L), .Label = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"), class = "factor"), market = c("France", "Germany", "Poland", "France", "Germany", "Poland", "France", "Germany", "Poland", "France", "Germany", "Poland", "France", "Germany", "Poland", "France", "Germany", "Poland", "France", "Germany", "Poland"), .rows = structure(list(1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18, 19:20, 21:22, 23:24, 25:26, 27:28, 29:30, 31:32, 33:34, 35:36, 37:38, 39:40, 41:42), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), row.names = c(NA, -21L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE), na.action = structure(43:46, .Names = c("43", "44", "45", "46"), class = "omit"), class = c("grouped_df", "tbl_df", "tbl", "data.frame")) ggplot() + geom_line(data = data,aes(x = wday, y = ggr,group = product_preference))+ facet_grid(.~product_preference,scales="free")
You can define the color of your lines by adding color = <grouping variable>. Also, you already do a facet grid on product_preference, so there seems to be no need to define group = product_preference. Try this: ggplot() + geom_line(data = data,aes(x = wday, y = ggr,color = market, group = market)) + facet_grid(.~product_preference,scales="free")
I am not sure this is what you wanted, but I would replace geom_line with 'geom_col'. 'geom_col' also takes the 'fill' attribute, while 'geom_line' would require 'color'. So my suggestion would be the following: ggplot() + geom_col(data = data,aes(x = wday, y = ggr, group = product_preference, fill = market))+ facet_grid(.~product_preference,scales="free") This results in the following plot:
How to run a command for each group in a column of a datframe?
I have a dataframe like the following: df <- structure(list(StudyID = c("Australia", "Australia", "Australia", "Australia", "Australia", "Australia", "Australia", "Australia", "Australia", "Australia", "Australia", "Australia", "Australia", "Australia", "Australia", "Australia", "Newzealand", "Newzealand", "Newzealand", "Newzealand"), variable_control = c("Mathew", "Mathew", "Mathew", "Mathew", "Simp", "Simp", "Simp", "Simp", "hutten", "hutten", "hutten", "hutten", "AB", "AB", "AB", "AB", "Mathew", "Mathew", "Mathew", "Mathew"), mean_control = c(103.125, 103.125, 103.125, 103.125, 3.20026104175422, 3.20026104175422, 3.20026104175422, 3.20026104175422, 0.655931978647405, 0.655931978647405, 0.655931978647405, 0.655931978647405, 5.6147916190302, 5.6147916190302, 5.6147916190302, 5.6147916190302, 86.3478260869565, 86.3478260869565, 86.3478260869565, 86.3478260869565), sd_control = c(14.0136837888306, 14.0136837888306, 14.0136837888306, 14.0136837888306, 0.278705816120361, 0.278705816120361, 0.278705816120361, 0.278705816120361, 0.0899465623045049, 0.0899465623045049, 0.0899465623045049, 0.0899465623045049, 6.28461426690684, 6.28461426690684, 6.28461426690684, 6.28461426690684, 22.9900629657485, 22.9900629657485, 22.9900629657485, 22.9900629657485), n_control = c(16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 120L, 120L, 120L, 120L, 15L, 15L, 15L, 15L, 46L, 46L, 46L, 46L), se_control = c(3.50342094720765, 3.50342094720765, 3.50342094720765, 3.50342094720765, 0.0696764540300902, 0.0696764540300902, 0.0696764540300902, 0.0696764540300902, 0.00821096019070354, 0.00821096019070354, 0.00821096019070354, 0.00821096019070354, 1.62268042620452, 1.62268042620452, 1.62268042620452, 1.62268042620452, 3.38969985579422, 3.38969985579422, 3.38969985579422, 3.38969985579422 ), lowCI_control = c(95.6576350141697, 95.6576350141697, 95.6576350141697, 95.6576350141697, 3.05174919547557, 3.05174919547557, 3.05174919547557, 3.05174919547557, 0.6396734573882, 0.6396734573882, 0.6396734573882, 0.6396734573882, 2.13448824216196, 2.13448824216196, 2.13448824216196, 2.13448824216196, 79.5206201201125, 79.5206201201125, 79.5206201201125, 79.5206201201125), hiCI_control = c(110.59236498583, 110.59236498583, 110.59236498583, 110.59236498583, 3.34877288803287, 3.34877288803287, 3.34877288803287, 3.34877288803287, 0.67219049990661, 0.67219049990661, 0.67219049990661, 0.67219049990661, 9.09509499589843, 9.09509499589843, 9.09509499589843, 9.09509499589843, 93.1750320538006, 93.1750320538006, 93.1750320538006, 93.1750320538006), Variable_test = c("Mathew", "Simp", "hutten", "AB", "Mathew", "Simp", "hutten", "AB", "Mathew", "Simp", "hutten", "AB", "Mathew", "Simp", "hutten", "AB", "Mathew", "Simp", "hutten", "AB"), mean_test = c(94.85, 3.11469350939242, 0.638430601983837, 6.91683472226622, 94.85, 3.11469350939242, 0.638430601983837, 6.91683472226622, 94.85, 3.11469350939242, 0.638430601983837, 6.91683472226622, 94.85, 3.11469350939242, 0.638430601983837, 6.91683472226622, 86.0222222222222, 2.6390595091832, 0.759731493048648, 0.73921233900569), sd_test = c(17.2909316055982, 0.202930255027022, 0.0919447770686176, 8.14939209465999, 17.2909316055982, 0.202930255027022, 0.0919447770686176, 8.14939209465999, 17.2909316055982, 0.202930255027022, 0.0919447770686176, 8.14939209465999, 17.2909316055982, 0.202930255027022, 0.0919447770686176, 8.14939209465999, 21.4163158796882, 0.451677388306635, 0.138197141547272, 0.908654293596164), n_test = c(20L, 20L, 190L, 21L, 20L, 20L, 190L, 21L, 20L, 20L, 190L, 21L, 20L, 20L, 190L, 21L, 45L, 45L, 990L, 45L), se_test = c(3.86636984644171, 0.0453765844931791, 0.00667037520849417, 1.77834314960258, 3.86636984644171, 0.0453765844931791, 0.00667037520849417, 1.77834314960258, 3.86636984644171, 0.0453765844931791, 0.00667037520849417, 1.77834314960258, 3.86636984644171, 0.0453765844931791, 0.00667037520849417, 1.77834314960258, 3.1925558756394, 0.0673320896102137, 0.00439219348020005, 0.135454184568538), lowCI_test = c(86.7575949081585, 3.01971922654131, 0.625272652671813, 3.20727591549959, 86.7575949081585, 3.01971922654131, 0.625272652671813, 3.20727591549959, 86.7575949081585, 3.01971922654131, 0.625272652671813, 3.20727591549959, 86.7575949081585, 3.01971922654131, 0.625272652671813, 3.20727591549959, 79.5880486308587, 2.50336059906323, 0.751112403965226, 0.466222367603537), hiCI_test = c(102.942405091841, 3.20966779224354, 0.651588551295861, 10.6263935290328, 102.942405091841, 3.20966779224354, 0.651588551295861, 10.6263935290328, 102.942405091841, 3.20966779224354, 0.651588551295861, 10.6263935290328, 102.942405091841, 3.20966779224354, 0.651588551295861, 10.6263935290328, 92.4563958135858, 2.77475841930317, 0.768350582132071, 1.01220231040784)), row.names = c(NA, 20L), class = "data.frame") I want to run the following command (from package metafor) on this dataframe df with a condition: my_data <- escalc(n1i = n_control, n2i = n_test, m1i = mean_control, m2i = mean_test, sd1i = sd_control, sd2i = sd_test, data = df, measure = "SMD", append = TRUE) I want to run the command 4 times for four different subsets from the dataframe (df). First, it will run on a subset dtaframe that will contain variable column value Mathew only and get an output. Next it will run on a dataframe subset that will contain variable column value AB only and get an output and so on.. Thanks in advance, DC7
You can use split() and then lapply(). This will return a list with an entry for each distinct value of variable_control: split_data <- split(df, df$variable_control) lapply(split_data, function(subset) { metafor::escalc(n1i = n_control, n2i = n_test, m1i = mean_control, m2i = mean_test, sd1i = sd_control, sd2i = sd_test, data = subset, measure = "SMD", append = TRUE) })
Country Flags are incorrectly shown in the graph
I'm trying to display the flags for each country. But it seems most of the flags are incorrect according to my code. I want them to be displayed only at the end of the geom line, add the flag to the legend, and make the flag more visible than how it displays now. categ_top10EnergyModf %>% mutate(country = tolower(country)) %>% ggplot(aes(x= year, y=ggwt_hours, country = country, color=country, group=country))+ geom_line(size=1.5)+ geom_point(size=3)+ geom_flag(aes(country = factor(country), size = 4))+ scale_y_continuous(labels = scales::comma)+ facet_wrap(~type2,scale='free')+ labs(x= "Year", y= "Energy Production (GWh)", title = "Analysis of the Growth of Renewable/Non-Renewable Energy Production", color="Country",fill = "country" )+ scale_color_discrete(name = "Country", labels= c("Germany", "Spain", "France", "Italy", "Norway", "Poland", "Sweden", "Turkey", "Ukraine", "United Kingdom") )+ theme_grey() + theme(plot.title = element_text(hjust = 0.5)) > dput(head(categ_top10EnergyModf)) structure(list(country = c("de", "de", "fr", "fr", "fr", "de" ), country_name = c("Germany", "Germany", "France", "France", "France", "Germany"), type2 = c("Non-Renewable", "Non-Renewable", "Non-Renewable", "Non-Renewable", "Non-Renewable", "Non-Renewable" ), year = structure(c(1L, 2L, 2L, 3L, 1L, 3L), .Label = c("2016", "2017", "2018"), class = "factor"), ggwt_hours = c(471984, 449906, 448690.614, 447109.694, 445175.494, 393234.585)), row.names = c(NA, -6L), groups = structure(list(country = c("de", "de", "de", "fr", "fr", "fr"), country_name = c("Germany", "Germany", "Germany", "France", "France", "France"), year = structure(c(1L, 2L, 3L, 1L, 2L, 3L), .Label = c("2016", "2017", "2018"), class = "factor"), .rows = structure(list(1L, 2L, 6L, 5L, 3L, 4L), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), row.names = c(NA, 6L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"))
You could display a flag as the last point: geom_flag(aes(x = ifelse(year == 2018, year, NA)), size = 4) I also set the size not as an aesthetic. You dont have to use factor(country). You defined your country aesthetic in ggplot already.
After executing the bake(), it doesn't show all the predictors and the outcome in the result
I am writing a model for my dataset. Once the bake() is executed, the result has one missing predictor and the outcome. This happens after writing the recipe steps. Is there any way to resolve this issue? top10_renewableEnergyProd_split <- initial_split(top10_renewableEnergyProd) top10_renewableEnergyProd_train <- training(top10_renewableEnergyProd_split) top10_renewableEnergyProd_test <- testing(top10_renewableEnergyProd_split) top10_renewableEnergyProd_recipe <- recipe(energyProd_2018 ~ country_name + energyProd_2016 + energyProd_2017 , data = top10_renewableEnergyProd_train) #recipe steps top10_renewableEnergyProd_recipe <- top10_renewableEnergyProd_recipe %>% step_center(all_numeric(), -all_outcomes()) %>% step_scale(all_numeric(), -all_outcomes()) %>% step_corr(all_numeric()) top10_renewableEnergyProd_prep <- prep(top10_renewableEnergyProd_recipe, training = top10_renewableEnergyProd_train) top10_renewableEnergyProd_bake <- bake(top10_renewableEnergyProd_prep, top10_renewableEnergyProd_train) top10_renewableEnergyProd_bake > dput(top10_renewableEnergyProd) structure(list(type2 = c("Renewable", "Renewable", "Renewable", "Renewable", "Renewable", "Renewable", "Renewable", "Renewable", "Renewable", "Renewable"), country = c("DE", "ES", "FR", "IT", "NO", "PL", "SE", "TR", "UA", "UK"), country_name = c("Germany", "Spain", "France", "Italy", "Norway", "Poland", "Sweden", "Turkey", "Ukraine", "United Kingdom"), energyProd_2016 = c(147622, 103353, 99885.054, 90756.826, 146557, 15468, 77505, 87090, 12097, 58909.047 ), energyProd_2017 = c(175063, 84664, 93907.184, 86786.294, 146285, 18187.708, 82540, 83536.342, 12082.6, 73113.964), energyProd_2018 = c(185226.211, 99725.566, 113658.177, 96820, 146878.825, 15541.473, 77615.947, 93425.906, 13843.9, 79955.967)), row.names = c(NA, -10L), groups = structure(list( country = c("DE", "ES", "FR", "IT", "NO", "PL", "SE", "TR", "UA", "UK"), country_name = c("Germany", "Spain", "France", "Italy", "Norway", "Poland", "Sweden", "Turkey", "Ukraine", "United Kingdom"), type2 = c("Renewable", "Renewable", "Renewable", "Renewable", "Renewable", "Renewable", "Renewable", "Renewable", "Renewable", "Renewable"), .rows = structure(list(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame")) > dput(top10_renewableEnergyProd_bake) structure(list(country_name = structure(c(2L, 5L, 1L, 3L, 4L, 6L, 7L, 8L), .Label = c("France", "Germany", "Italy", "Norway", "Spain", "Sweden", "Turkey", "United Kingdom"), class = "factor"), energyProd_2016 = c(1.47285970883518, 0.0604065991667829, -0.0502421869332435, -0.341488748282317, 1.4388796649716, -0.764303423167326, -0.458483028395103, -1.35762858619557 )), row.names = c(NA, -8L), class = c("tbl_df", "tbl", "data.frame" ))
Mapping points in a map
I try to show country data in a map using points in the map. Here the dataframe: > dput(countries) structure(list(country = structure(c(5L, 6L, 3L, 4L, 10L, 8L, 11L, 7L, 1L, 13L, 9L, 12L, 2L), .Label = c("Australia", "China", "France", "Georgia", "India", "Ireland", "Malaysia", "Poland", "Qatar", "Singapore", "South Africa", "Spain", "USA"), class = "factor"), Latitude = c(20.593684, 53.142367, 46.227638, 32.165622, 1.352083, 51.919438, -30.559482, 4.210484, -25.274398, 37.09024, 25.354826, 40.463667, 35.86166), Longitude = c(78.96288, -7.692054, 2.213749, -82.900075, 103.819836, 19.145136, 22.937506, 101.975766, 133.775136, -95.712891, 51.183884, -3.74922, 104.195397), Value = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L)), .Names = c("country", "Latitude", "Longitude", "Value"), class = "data.frame", row.names = c(NA, -13L)) The code from here: library(maps) library(ggplot2) base_world <- map_data("world") map_data_coloured <- base_world + geom_point(data=countries, aes(x=Longitude, y=Latitude, colour=Value), size=5, alpha=I(0.7)) But I receive this error: Error in as.vector(x, mode) : cannot coerce type 'environment' to vector of type 'any'
you need to pass the geom_polygon argument to map your base_world object ggplot() + geom_polygon(data=base_world, aes(x=long, y=lat, group=group)) + geom_point(data=countries, aes(x=Longitude, y=Latitude, colour=Value), size=5, alpha=I(0.7))