Country Flags are incorrectly shown in the graph - r

I'm trying to display the flags for each country. But it seems most of the flags are incorrect according to my code. I want them to be displayed only at the end of the geom line, add the flag to the legend, and make the flag more visible than how it displays now.
categ_top10EnergyModf %>%
mutate(country = tolower(country)) %>%
ggplot(aes(x= year, y=ggwt_hours, country = country, color=country, group=country))+
geom_line(size=1.5)+
geom_point(size=3)+
geom_flag(aes(country = factor(country), size = 4))+
scale_y_continuous(labels = scales::comma)+
facet_wrap(~type2,scale='free')+
labs(x= "Year", y= "Energy Production (GWh)", title = "Analysis of the Growth of Renewable/Non-Renewable Energy Production",
color="Country",fill = "country" )+
scale_color_discrete(name = "Country",
labels= c("Germany",
"Spain",
"France",
"Italy",
"Norway",
"Poland",
"Sweden",
"Turkey",
"Ukraine",
"United Kingdom")
)+
theme_grey() +
theme(plot.title = element_text(hjust = 0.5))
> dput(head(categ_top10EnergyModf))
structure(list(country = c("de", "de", "fr", "fr", "fr", "de"
), country_name = c("Germany", "Germany", "France", "France",
"France", "Germany"), type2 = c("Non-Renewable", "Non-Renewable",
"Non-Renewable", "Non-Renewable", "Non-Renewable", "Non-Renewable"
), year = structure(c(1L, 2L, 2L, 3L, 1L, 3L), .Label = c("2016",
"2017", "2018"), class = "factor"), ggwt_hours = c(471984, 449906,
448690.614, 447109.694, 445175.494, 393234.585)), row.names = c(NA,
-6L), groups = structure(list(country = c("de", "de", "de", "fr",
"fr", "fr"), country_name = c("Germany", "Germany", "Germany",
"France", "France", "France"), year = structure(c(1L, 2L, 3L,
1L, 2L, 3L), .Label = c("2016", "2017", "2018"), class = "factor"),
.rows = structure(list(1L, 2L, 6L, 5L, 3L, 4L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))

You could display a flag as the last point:
geom_flag(aes(x = ifelse(year == 2018, year, NA)), size = 4)
I also set the size not as an aesthetic.
You dont have to use factor(country). You defined your country aesthetic in ggplot already.

Related

How can I make several lines in ggplot with several group layers?

I have divided my plots into 2 based on Sportbook and Casino. How is it possible to also split line into several lines (different colors) to show different markets? I tried to use fill=market at the end of ggplot function, however it did not help.
library(ggplot2)
data<-structure(list(wday = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 7L), .Label = c("Monday", "Tuesday", "Wednesday", "Thursday",
"Friday", "Saturday", "Sunday"), class = "factor"), market = c("France",
"France", "Germany", "Germany", "Poland", "Poland", "France",
"France", "Germany", "Germany", "Poland", "Poland", "France",
"France", "Germany", "Germany", "Poland", "Poland", "France",
"France", "Germany", "Germany", "Poland", "Poland", "France",
"France", "Germany", "Germany", "Poland", "Poland", "France",
"France", "Germany", "Germany", "Poland", "Poland", "France",
"France", "Germany", "Germany", "Poland", "Poland"), product_preference = c("Casino",
"Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook",
"Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook",
"Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook",
"Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook",
"Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook",
"Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook",
"Casino", "Sportsbook", "Casino", "Sportsbook", "Casino", "Sportsbook"
), ggr = c(3349.80897892753, 161.917715712988, 17700.4568364611,
-123.342131455399, 17208.7731385281, 3128.51277864992, 2877.17330617787,
28.5162781278127, 13453.7092912371, -82.8980672268908, 13611.1197727273,
9910.32070866143, 3939.20578803854, 126.311590466926, 19097.2664228723,
-94.5491666666667, 16706.9427008929, 2636.63687707641, 3393.43150322119,
176.953280238925, 23414.9515950069, -72.4428986866791, 16140.8680085653,
5618.00758333333, 3007.18322084806, 69.4383454281568, 18018.1755748663,
-77.87698, 19889.0339183673, 5561.69038585209, 4205.12735472371,
-16.0552268431002, 17166.1121932115, -117.149356025759, 18527.8546597938,
6806.36808346213, 3446.70375835385, 56.6674850849013, 18026.2400535475,
-67.3431629701062, 13641.4965135699, 11470.3083969466)), row.names = c(NA,
-42L), groups = structure(list(wday = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L,
7L, 7L), .Label = c("Monday", "Tuesday", "Wednesday", "Thursday",
"Friday", "Saturday", "Sunday"), class = "factor"), market = c("France",
"Germany", "Poland", "France", "Germany", "Poland", "France",
"Germany", "Poland", "France", "Germany", "Poland", "France",
"Germany", "Poland", "France", "Germany", "Poland", "France",
"Germany", "Poland"), .rows = structure(list(1:2, 3:4, 5:6, 7:8,
9:10, 11:12, 13:14, 15:16, 17:18, 19:20, 21:22, 23:24, 25:26,
27:28, 29:30, 31:32, 33:34, 35:36, 37:38, 39:40, 41:42), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -21L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), na.action = structure(43:46, .Names = c("43",
"44", "45", "46"), class = "omit"), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
ggplot() +
geom_line(data = data,aes(x = wday, y = ggr,group = product_preference))+
facet_grid(.~product_preference,scales="free")
You can define the color of your lines by adding color = <grouping variable>.
Also, you already do a facet grid on product_preference, so there seems to be no need to define group = product_preference.
Try this:
ggplot() +
geom_line(data = data,aes(x = wday, y = ggr,color = market, group = market)) +
facet_grid(.~product_preference,scales="free")
I am not sure this is what you wanted, but I would replace geom_line with 'geom_col'. 'geom_col' also takes the 'fill' attribute, while 'geom_line' would require 'color'.
So my suggestion would be the following:
ggplot() +
geom_col(data = data,aes(x = wday, y = ggr, group = product_preference, fill = market))+
facet_grid(.~product_preference,scales="free")
This results in the following plot:

After executing the bake(), it doesn't show all the predictors and the outcome in the result

I am writing a model for my dataset. Once the bake() is executed, the result has one missing predictor and the outcome.
This happens after writing the recipe steps. Is there any way to resolve this issue?
top10_renewableEnergyProd_split <- initial_split(top10_renewableEnergyProd)
top10_renewableEnergyProd_train <- training(top10_renewableEnergyProd_split)
top10_renewableEnergyProd_test <- testing(top10_renewableEnergyProd_split)
top10_renewableEnergyProd_recipe <- recipe(energyProd_2018 ~ country_name + energyProd_2016 + energyProd_2017 , data = top10_renewableEnergyProd_train)
#recipe steps
top10_renewableEnergyProd_recipe <- top10_renewableEnergyProd_recipe %>%
step_center(all_numeric(), -all_outcomes()) %>%
step_scale(all_numeric(), -all_outcomes()) %>%
step_corr(all_numeric())
top10_renewableEnergyProd_prep <- prep(top10_renewableEnergyProd_recipe, training = top10_renewableEnergyProd_train)
top10_renewableEnergyProd_bake <- bake(top10_renewableEnergyProd_prep, top10_renewableEnergyProd_train)
top10_renewableEnergyProd_bake
> dput(top10_renewableEnergyProd)
structure(list(type2 = c("Renewable", "Renewable", "Renewable",
"Renewable", "Renewable", "Renewable", "Renewable", "Renewable",
"Renewable", "Renewable"), country = c("DE", "ES", "FR", "IT",
"NO", "PL", "SE", "TR", "UA", "UK"), country_name = c("Germany",
"Spain", "France", "Italy", "Norway", "Poland", "Sweden", "Turkey",
"Ukraine", "United Kingdom"), energyProd_2016 = c(147622, 103353,
99885.054, 90756.826, 146557, 15468, 77505, 87090, 12097, 58909.047
), energyProd_2017 = c(175063, 84664, 93907.184, 86786.294, 146285,
18187.708, 82540, 83536.342, 12082.6, 73113.964), energyProd_2018 = c(185226.211,
99725.566, 113658.177, 96820, 146878.825, 15541.473, 77615.947,
93425.906, 13843.9, 79955.967)), row.names = c(NA, -10L), groups = structure(list(
country = c("DE", "ES", "FR", "IT", "NO", "PL", "SE", "TR",
"UA", "UK"), country_name = c("Germany", "Spain", "France",
"Italy", "Norway", "Poland", "Sweden", "Turkey", "Ukraine",
"United Kingdom"), type2 = c("Renewable", "Renewable", "Renewable",
"Renewable", "Renewable", "Renewable", "Renewable", "Renewable",
"Renewable", "Renewable"), .rows = structure(list(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
> dput(top10_renewableEnergyProd_bake)
structure(list(country_name = structure(c(2L, 5L, 1L, 3L, 4L,
6L, 7L, 8L), .Label = c("France", "Germany", "Italy", "Norway",
"Spain", "Sweden", "Turkey", "United Kingdom"), class = "factor"),
energyProd_2016 = c(1.47285970883518, 0.0604065991667829,
-0.0502421869332435, -0.341488748282317, 1.4388796649716,
-0.764303423167326, -0.458483028395103, -1.35762858619557
)), row.names = c(NA, -8L), class = c("tbl_df", "tbl", "data.frame"
))

Why can't rbind append these two datasets?

I have these two datasets that I am trying to append:
data1 = structure(list(year = c(2017, 2018), flow = c("Export", "Export"
), EUR = c(4, 3.44), Home = c(3.09, 3.03), Not_reported = c(0.12,
0), USD = c(92.29, 93.04), country = c("Brazil", "Brazil"), Other = c(0.499999999999994,
0.489999999999994)), row.names = c(NA, -2L), vars = c("year",
"flow"), drop = TRUE, indices = list(0L, 1L), group_sizes = c(1L,
1L), biggest_group_size = 1L, labels = structure(list(year = c(2017,
2018), flow = c("Export", "Export")), row.names = c(NA, -2L), vars = c("year",
"flow"), drop = TRUE, indices = list(0L, 1L), group_sizes = c(1L,
1L), biggest_group_size = 1L, labels = structure(list(year = c(2017,
2018), flow = c("EXP", "EXP")), class = "data.frame", row.names = c(NA,
-2L), vars = c("year", "flow"), drop = TRUE), class = "data.frame"), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
data2 = structure(list(flow = c("Export", "Export", "Export", "Export",
"Export", "Import"), country = structure(c(6L, 6L, 6L, 6L, 6L,
6L), .Label = c("Algeria", "Argentina", "Australia", "Austria",
"Belgium", "Brazil", "Bulgaria", "Canada", "China", "Colombia",
"Cyprus", "Czech Republic", "Denmark", "Estonia", "Euro", "Finland",
"France", "Germany", "Greece", "Hungary", "Iceland", "India",
"Indonesia", "Ireland", "Israel", "Italy", "Japan", "Latvia",
"Lithuania", "Luxembourg", "Malaysia", "Malta", "Morocco", "Netherlands",
"Pakistan", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia",
"South Africa", "South Korea", "Spain", "Sweden", "Switzerland",
"Thailand", "Ukraine", "United Kingdom", "United States"), class = "factor"),
year = c(2007, 2008, 2009, 2010, 2011, 2007), EUR = c(4.76,
4.95, 4.51, 4.28, 3.8, 11.1), Home = c(0.13, 0.16, 1.11,
0.82, NA, 0.48), Not_reported = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), USD = c(94.7, 94.4, 93.8,
94.3, 94.5, 85.5), Other = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_)), row.names = c(NA, 6L), class = "data.frame")
When I tried:
rbind(data1, data2)
I got a list instead of a dataframe. I have checked the class of each column and they seem consistent with each other. Can someone explain to me? Thanks!

How to change values more efficiently

I want to replace the , with a . as the decimal point in a dataframe. I can do this with df$X2005 <- as.numeric(gsub(',', '.', df$X2005)) for each variable. Is there a more efficient way to do this for the whole dataframe at once?
Some example data:
df <- structure(list(country = structure(1:6, .Label = c("Australia", "Austria", "Belgium", "Canada", "Chile", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Israel", "Italy", "Japan", "Korea", "Luxembourg", "Mexico", "Netherlands", "New Zealand", "Norway", "Poland", "Portugal", "Slovak Republic", "Slovenia", "Spain", "Sweden", "Switzerland", "Turkey", "United Kingdom", "United States"), class = "factor"), X2005 = structure(c(26L, 2L, 34L, 33L, 13L, 14L), .Label = c("10,3533", "10,4187", "10,8089", "10,8629", "10,882", "11,0173", "15,8399", "5,0226", "5,4488", "5,6273", "5,8713", "6,2137", "6,6397", "6,9339", "7,0448", "7,5719", "7,8534", "7,9457", "8,1819", "8,2668", "8,2883", "8,3556", "8,394", "8,4295", "8,4456", "8,4794", "8,7437", "9,0304", "9,0615", "9,4427", "9,6618", "9,77", "9,8295", "9,9833"), class = "factor"), X2006 = structure(c(25L, 2L, 31L, 34L, 13L, 14L), .Label = c("10,0326", "10,2177", "10,3877", "10,6374", "10,7468", "10,9516", "15,9368", "5,0169", "5,6845", "5,8109", "6,1019", "6,2008", "6,285", "6,6937", "7,3477", "7,5148", "7,5836", "7,7495", "8,1986", "8,2586", "8,2807", "8,3448", "8,39", "8,4289", "8,5204", "8,564", "8,8247", "8,8401", "8,948", "9,1292", "9,4811", "9,7487", "9,9243", "9,9621"), class = "factor"), X2007 = structure(c(27L, 3L, 31L, 1L, 14L, 13L), .Label = c("10,0263", "10,2099", "10,2617", "10,4771", "10,7642", "10,8754", "16,1608", "5,1597", "5,7779", "6,0372", "6,3331", "6,3858", "6,5223", "6,5494", "7,1288", "7,6299", "7,6744", "7,7553", "7,8565", "7,9023", "8,043", "8,2295", "8,4769", "8,4908", "8,5014", "8,504", "8,5531", "8,746", "8,9172", "9,0913", "9,5254", "9,8104", "9,9873", "9,9942"), class = "factor"), X2008 = structure(c(26L, 6L, 34L, 4L, 17L, 15L), .Label = c("10,1268", "10,183", "10,2189", "10,2537", "10,289", "10,4896", "10,7042", "10,9909", "11,0232", "16,6201", "5,8474", "6,0577", "6,0745", "6,586", "6,8189", "6,8863", "7,1361", "7,1819", "7,4631", "7,7052", "8,0208", "8,3068", "8,3457", "8,5513", "8,605", "8,751", "8,8915", "8,9402", "8,9521", "9,0591", "9,1344", "9,2284", "9,3051", "9,9128"), class = "factor"), X2009 = structure(c(24L, 8L, 5L, 9L, 21L, 22L), .Label = c("", "10,0115", "10,0496", "10,1957", "10,5938", "10,8137", "11,0005", "11,1729", "11,3992", "11,4722", "11,7314", "11,7516", "11,8823", "17,6706", "6,4098", "7,039", "7,1018", "7,2127", "7,6797", "7,7356", "7,8649", "7,9514", "7,9657", "9,0423", "9,152", "9,17", "9,1947", "9,4037", "9,5258", "9,6247", "9,636", "9,6743", "9,9056", "9,939"), class = "factor"), X2010 = structure(c(23L, 6L, 3L, 8L, 18L, 19L), .Label = c("", "10,1995", "10,503", "10,797", "10,8817", "11,0318", "11,0751", "11,3738", "11,5495", "11,677", "12,0661", "17,6911", "6,1782", "6,3394", "7,0229", "7,1675", "7,2911", "7,37", "7,4319", "7,6856", "8,0302", "8,8718", "8,9481", "8,9888", "8,995", "9,2925", "9,312", "9,4079", "9,4224", "9,4688", "9,5277", "9,5504", "9,589", "9,6074"), class = "factor"), X2011 = structure(c(NA, 5L, 4L, 8L, 18L, 17L), .Label = c("", "10,2345", "10,2844", "10,5139", "10,7769", "10,8671", "11,0148", "11,1784", "11,3323", "11,6343", "11,9369", "17,683", "5,922", "6,6464", "6,8709", "7,3692", "7,5011", "7,5206", "7,7333", "7,8877", "7,9416", "8,8501", "8,9042", "9,0019", "9,027", "9,1296", "9,2256", "9,2837", "9,2969", "9,4184", "9,4661"), class = "factor"), X2012 = structure(c(NA, NA, NA, 2L, 5L, NA), .Label = c("", "11,2132", "11,2955", "7,5249", "7,6077", "7,8226", "8,7596", "8,923", "9,148", "9,167", "9,3722"), class = "factor")), .Names = c("country", "X2005", "X2006", "X2007", "X2008", "X2009", "X2010", "X2011", "X2012"), row.names = c(NA, 6L), class = "data.frame")
You can instead read your data in the correct format.
If you use read.table for instance, change the dec = "," parameter. You'll probably need to change the na.strings = "<NA>" parameter as well.
You could use a for loop:
for (col in names(df[,-1])) {
df[,col] <- as.numeric(gsub(",", ".", df[,col]))
}

showing vertex labels inside vertexes only

I have this data frame:
dput(df2)
structure(list(Receiver = structure(c(4L, 3L, 2L, 1L), .Label = c("Australia",
"United Arab Emirates", "United Kingdom", "United States of America"
), class = "factor"), Sender = structure(c(1L, 1L, 1L, 1L), .Label = "United States of America", class = "factor")), .Names = c("Receiver",
"Sender"), row.names = c(NA, -4L), class = "data.frame")
I would like to draw and igraph as this:
library(igraph)
g<-graph.data.frame(df2)
plot(g, layout = layout.kamada.kawai, vertex.label = V(g)$name,
vertex.label.color= "red", edge.arrow.size=0.8,
edge.curved=T, edge.label.color="white",
edge.label.cex=0.8,vertex.shape="circle",edge.color="pink",
vertex.color="lightblue", asp=0, margin=0)
I would like to show vertex lables inside the verexes, without increasing the size of the vertexes. Any ideas how I can do this?
You can do something like this before the call plot:
V(g)$label.cex <- 0.5
But why not to use a shortcut of the names?
V(g)$name<-c('USA','UK','UAE','Aus')
dat <- structure(list(Receiver = structure(c(4L, 3L, 2L, 1L), .Label = c("Australia",
"United Arab \nEmirates", "United \nKingdom", "United \nStates of \nAmerica"
), class = "factor"), Sender = structure(c(1L, 1L, 1L, 1L), .Label = "United \nStates of \nAmerica", class = "factor")), .Names = c("Receiver",
"Sender"), row.names = c(NA, -4L), class = "data.frame")
library(igraph)
g<-graph.data.frame(dat)
V(g)$label.cex <- 0.6
plot(g, layout = layout.kamada.kawai, vertex.label = V(g)$name,
vertex.label.color= "red", edge.arrow.size=0.8,
edge.curved=T, edge.label.color="white",
edge.label.cex=0.8,vertex.shape="circle",edge.color="pink",
vertex.color="lightblue", asp=0, margin=0)
The following might help.
# size the network nodes based on their centrality
deg = igraph::degree(graph = g, v = V(g), mode = "all", loops = TRUE, normalized = FALSE);
igraph::V(g)$size = deg*3;
# set the label size based on the node size
igraph::V(g)$label.cex = igraph::V(g)$size/max(igraph::V(g)$size);

Resources