position=dodge in geom_col in barplot - r

here is a dataset of soccer players that I need to visualise the total number of yellow cards received next to the number of games played per country in one bar plot. SO I need to calculate the total number of yellow cards and the total number of games per league country and bring the data into long format.
dput(head(new_soccer_referee))
structure(list(playerShort = c("lucas-wilchez", "john-utaka",
"abdon-prats", "pablo-mari", "ruben-pena", "aaron-hughes"), player = c("Lucas Wilchez",
"John Utaka", " Abdón Prats", " Pablo Marí", " Rubén Peña", "Aaron Hughes"
), club = c("Real Zaragoza", "Montpellier HSC", "RCD Mallorca",
"RCD Mallorca", "Real Valladolid", "Fulham FC"), leagueCountry = c("Spain",
"France", "Spain", "Spain", "Spain", "England"), birthday = structure(c(4990,
4390, 8386, 8643, 7868, 3598), class = "Date"), height = c(177L,
179L, 181L, 191L, 172L, 182L), weight = c(72L, 82L, 79L, 87L,
70L, 71L), position = c("Attacking Midfielder", "Right Winger",
NA, "Center Back", "Right Midfielder", "Center Back"), games = c(1L,
1L, 1L, 1L, 1L, 1L), victories = c(0L, 0L, 0L, 1L, 1L, 0L), ties = c(0L,
0L, 1L, 0L, 0L, 0L), defeats = c(1L, 1L, 0L, 0L, 0L, 1L), goals = c(0L,
0L, 0L, 0L, 0L, 0L), yellowCards = c(0L, 1L, 1L, 0L, 0L, 0L),
yellowReds = c(0L, 0L, 0L, 0L, 0L, 0L), redCards = c(0L,
0L, 0L, 0L, 0L, 0L), photoID = c("95212.jpg", "1663.jpg",
NA, NA, NA, "3868.jpg"), rater1 = c(0.25, 0.75, NA, NA, NA,
0.25), rater2 = c(0.5, 0.75, NA, NA, NA, 0), refNum = c(1L,
2L, 3L, 3L, 3L, 4L), refCountry = c(1L, 2L, 3L, 3L, 3L, 4L
), Alpha_3 = c("GRC", "ZMB", "ESP", "ESP", "ESP", "LUX"),
meanIAT = c(0.326391469021736, 0.203374724564378, 0.369893594187172,
0.369893594187172, 0.369893594187172, 0.325185154120009),
nIAT = c(712L, 40L, 1785L, 1785L, 1785L, 127L), seIAT = c(0.000564112354334542,
0.0108748941063986, 0.000229489640866464, 0.000229489640866464,
0.000229489640866464, 0.00329680952361961), meanExp = c(0.396,
-0.204081632653061, 0.588297311544544, 0.588297311544544,
0.588297311544544, 0.538461538461538), nExp = c(750L, 49L,
1897L, 1897L, 1897L, 130L), seExp = c(0.0026964901062936,
0.0615044043187379, 0.00100164730649311, 0.00100164730649311,
0.00100164730649311, 0.013752210497518), BMI = c(22.98190175237,
25.5922099809619, 24.1140380330271, 23.8480304816206, 23.6614386154678,
21.4346093466973), position_new = c("Offense", "Offense",
"Goalkeeper", "Defense", "Midfield", "Defense"), rater_mean = c(0.375,
0.75, NA, NA, NA, 0.125), ageinyear = c(28, 30, 19, 18, 20,
32), ageinyears = c(28, 30, 19, 18, 20, 32)), row.names = c(NA,
6L), class = "data.frame")
Use the data to draw a bar plot with the following characteristics:
– The x-axis displays the league country while the y-axis displays the number of games and the number of cards
– For each country there are two bars next to each other: one for the games played and one for the cards received
barplot <- ggplot(new_soccer_referee,aes(x=leagueCountry,y=number))
barplot +
geom_bar(fill=c("games","yellowCards")) +
geom_col(Position="dodge") +
labels(x="leagueCountry", y="number")
ggplot
`
I know it is pretty messy but I am really confused how to build up the layers with ggplot and how to work out the long format, can anyone help?

One option would be to first aggregate your data to compute the number of yellowCards and games by leagueCountry. Afterwards you could convert to long which makes it easy to plot via ggplot2.
Using some fake random example data to mimic your real data:
set.seed(123)
new_soccer_referee <- data.frame(
player = sample(letters, 20),
leagueCountry = sample(c("Spain", "France", "England", "Italy"), 20, replace = TRUE),
yellowCards = sample(1:5, 20, replace = TRUE),
games = sample(1:20, 20, replace = TRUE)
)
library(dplyr)
library(tidyr)
library(ggplot2)
new_soccer_referee_long <- new_soccer_referee %>%
group_by(leagueCountry) %>%
summarise(across(c(yellowCards, games), sum)) %>%
pivot_longer(-leagueCountry, names_to = "variable", values_to = "number")
ggplot(new_soccer_referee_long, aes(leagueCountry, number, fill = variable)) +
geom_col(position = "dodge")

Something like this:
library(tidyverse)
new_soccer_referee %>%
select(leagueCountry, games, yellowCards) %>%
group_by(leagueCountry) %>%
summarise(games = sum(games),
yellowCars = sum(yellowCards)
) %>%
pivot_longer(-leagueCountry) %>%
ggplot(aes(x=leagueCountry, fill=name, y=value)) +
geom_col(position = position_dodge())

Related

Looking to create a network based on bee floral visitation in Rstudio

I am looking to create a network showing different bees visiting different flowers. I have been using the igraph r package and believe it should be a bipartite network.
looking for something that looks like this with thicker bars indicating more visitations and thinner bars showing less visitations:
Part of my dataset is shown here with floral species on the left and bee species along the top with visitation numbers shown in the table.
I am not sure if this is the most efficient way to set up my data table or how to go about getting my bipartite network with bee species on one side and floral species on the other. Is using the igraph package the best way to do this?
This was the code used to get the network:
bg=graph_from_incidence_matrix(floral_network_)
bg
V(bg)$type
V(bg)$type+1
V(bg)$shape=c("square","circle")[V(bg)$type+1] V(bg)$color=c("tomato","lightblue")[V(bg)$type+1] plot(bg)
plot(bg, layout=layout_as_bipartite(bg), edge.color="black", edge.width=4,vertex.label.dist=1, vertex.label.degree=c(rep(-pi/2, 4), rep(pi/2, 5)))
Your sample data is nor reproducible (since it is an image), but here is a reproducible version:
floral_network_ <- structure(list(bimac = c(2L, 16L, 9L, 4L, 1L), borealis = c(0L,
7L, 0L, 0L, 1L), ferv = c(0L, 2L, 0L, 0L, 0L), griseocolli = c(0L,
0L, 0L, 0L, 0L), impatiens = c(9L, 2L, 4L, 1L, 0L), perplexus = c(0L,
0L, 0L, 0L, 0L), rufocinctu = c(0L, 5L, 0L, 0L, 0L), ternarius = c(0L,
0L, 2L, 0L, 0L), vagans = c(0L, 1L, 3L, 0L, 0L)), class = "data.frame",
row.names = c("Elaeagnus_umbellata",
"Symphytum_officinale", "Hypericum_perforatum", "Glechoma_hederacea",
"Trifolium_pratense"))
To get the freedom to plot this as you would like, I think it would be best to reshape your data and plot with ggplot
library(tidyverse)
as.data.frame(as.table(as.matrix(floral_network_))) %>%
filter(Freq > 0) %>%
rename(Flower = Var1, Bee = Var2) %>%
mutate(bee_pos_x = as.numeric(factor(Bee)),
flower_pos_x = as.numeric(factor(Flower)) * 6/4 - 0.5,
bee_pos_y = 2, flower_pos_y = 1) %>%
ggplot(aes(bee_pos_x, bee_pos_y)) +
geom_segment(aes(x = bee_pos_x, y = bee_pos_y, xend = flower_pos_x,
yend = flower_pos_y, size = Freq), colour = "red",
alpha = 0.2, lineend = "round") +
geom_point(shape = 21, fill = "yellow2", size = 8) +
geom_point(aes(flower_pos_x, flower_pos_y), size = 9, shape = 21,
fill = "deepskyblue2") +
geom_text(aes(label = Bee), nudge_y = 0.1) +
geom_text(aes(flower_pos_x, flower_pos_y, label = Flower), nudge_y = -0.1) +
scale_y_continuous(limits = c(0.5, 2.5)) +
scale_x_continuous(limits = c(0, 8)) +
theme_void() +
theme(legend.position = "none")

Build Decision Tree Classification

I have two datasets , partb_data1 and partb_data2 . Given sample of customers of a bank that reflects the characteristics of the clients and whether the bank continues to work with them or not (Churn). Exited: Churn (1 if he has left the bank and 0 if he continues to work with it). Im using partb_data1 as train set and partb_data2 as test set.
Here is my data :
> dput(head(partb_data1))
structure(list(RowNumber = 1:6, CustomerId = c(15634602L, 15647311L,
15619304L, 15701354L, 15737888L, 15574012L), Surname = c("Hargrave",
"Hill", "Onio", "Boni", "Mitchell", "Chu"), CreditScore = c(619L,
608L, 502L, 699L, 850L, 645L), Geography = c("France", "Spain",
"France", "France", "Spain", "Spain"), Gender = c("Female", "Female",
"Female", "Female", "Female", "Male"), Age = c(42L, 41L, 42L,
39L, 43L, 44L), Tenure = c(2L, 1L, 8L, 1L, 2L, 8L), Balance = c(0,
83807.86, 159660.8, 0, 125510.82, 113755.78), NumOfProducts = c(1L,
1L, 3L, 2L, 1L, 2L), HasCrCard = c(1L, 0L, 1L, 0L, 1L, 1L), IsActiveMember = c(1L,
1L, 0L, 0L, 1L, 0L), EstimatedSalary = c(101348.88, 112542.58,
113931.57, 93826.63, 79084.1, 149756.71), Exited = c(1L, 0L,
1L, 0L, 0L, 1L)), row.names = c(NA, 6L), class = "data.frame")
> dput(head(partb_data2))
structure(list(RowNumber = 8001:8006, CustomerId = c(15629002L,
15798053L, 15753895L, 15595426L, 15645815L, 15632848L), Surname = c("Hamilton",
"Nnachetam", "Blue", "Madukwe", "Mills", "Ferrari"), CreditScore = c(747L,
707L, 590L, 603L, 615L, 634L), Geography = c("Germany", "Spain",
"Spain", "Spain", "France", "France"), Gender = c("Male", "Male",
"Male", "Male", "Male", "Female"), Age = c(36L, 32L, 37L, 57L,
45L, 36L), Tenure = c(8L, 9L, 1L, 6L, 5L, 1L), Balance = c(102603.3,
0, 0, 105000.85, 0, 69518.95), NumOfProducts = c(2L, 2L, 2L,
2L, 2L, 1L), HasCrCard = c(1L, 1L, 0L, 1L, 1L, 1L), IsActiveMember = c(1L,
0L, 0L, 1L, 1L, 0L), EstimatedSalary = c(180693.61, 126475.79,
133535.99, 87412.24, 164886.64, 116238.39), Exited = c(0L, 0L,
0L, 1L, 0L, 0L)), row.names = c(NA, 6L), class = "data.frame")
I have created Classification trees in order to predict churn . Here follows the code:
library(tidyverse)
library(caret)
library(rpart)
library(rpart.plot)
# Split the data into training and test set
train.data <- head(partb_data1, 500)
test.data <- tail(partb_data2, 150)
# Build the model
modelb <- rpart(Exited ~., data = train.data, method = "class")
# Visualize the decision tree with rpart.plot
rpart.plot(modelb)
# Make predictions on the test data
predicted.classes <- modelb %>%
predict(test.data, type = "class")
head(predicted.classes)
# Compute model accuracy rate on test data
mean(predicted.classes == test.data$Exited)
### Pruning the tree :
# Fit the model on the training set
modelb2 <- train(
Exited ~., data = train.data, method = "rpart",
trControl = trainControl("cv", number = 10),
tuneLength = 10
)
# Plot model accuracy vs different values of
# cp (complexity parameter)
plot(modelb2)
# Print the best tuning parameter cp that
# maximizes the model accuracy
modelb2$bestTune
# Plot the final tree model
plot(modelb2$finalModel)
# Make predictions on the test data
predicted.classes <- modelb2 %>% predict(test.data)
# Compute model accuracy rate on test data
mean(predicted.classes == test.data$Exited)
Note: I have made test set from the partb_data2.
Is the procedure i follow right? I must make any changes in order to accomplish my target which is classification trees ? Your help would be trully welcome !
EDITED !!!
Your head(partb_data1$Exited, 500) isn't a data.frame. Because of the $ you take a subset of your partb_data1 data. It's only an integer-vector, so that can't work.
class(head(partb_data1$Exited, 500))
[1] "integer"
There are always a lot of procedure options.
But you're right with sepreate your Data into a trainings, and a testdataset. Its also possible to use a crossvalidation instead. You're using a crossvalidation on your trainingsset, thats normally not necessary, but also possible.
I think using your complete Data for the cv should also work, but what you're doing isn't wrong.

Why are my ggplot2 aesthetics the wrong length?

I am trying to average reps of data, subset one treatment, then make a bar graph of the response and another factor. My plot ends up not working. Any help would be much appreciated.
My data:
data <- structure(list(Sample = c(1011L, 1012L, 1014L, 1024L, 1025L,
1026L), Collection = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2"), class = "factor"), Irrigation = structure(c(3L, 3L, 3L,
5L, 5L, 5L), .Label = c("Rate1", "Rate2", "Rate3", "Rate4", "Rate5"
), class = "factor"), Variety = structure(c(2L, 1L, 3L, 3L, 2L,
1L), .Label = c("Hodag", "Lamoka", "Snowden"), class = "factor"),
Suc = c(0.7333, 0.4717, 0.5883, 0.6783, 0.8283, 0.6833),
Gluc = c(0.03, 0.04, 0.043, 0.075, 0.057, 0.087), L = c(59.48,
57.59, 59.25, 66.45, 68.29, 65.65), a = c(4.36, 6.85, 3.43,
1.7, 0.78, 2.84), b = c(26.82, 27.6, 26.2, 26.14, 25.37,
27.19), NoDefect = c(100L, 100L, 100L, 92L, 100L, 100L),
Defect = c(0L, 0L, 0L, 8L, 0L, 0L)), row.names = c(NA, 6L
), class = "data.frame")
Averaging between reps:
dataAvgSuc <- data %>%
dplyr::group_by(Collection, Irrigation, Variety) %>%
dplyr::summarise(meanSuc=mean(Suc))
Made 'Collection' a factor:
dataAvgSuc$Collection <- as.factor(dataAvgSuc$Collection)
Subset by variety:
subLamoka <- subset(dataAvgSuc, Variety=="Lamoka")
subHodag <- subset(dataAvgSuc, Variety=="Hodag")
subSnowden <- subset(dataAvgSuc, Variety=="Snowden")
Attempted ggplot:
sucPlot <-ggplot(data=subLamoka, aes(x=dataAvgSuc$Collection,
y=meanSuc)) + geom_bar(stat="identity")
Error code:
Error: Aesthetics must be either length 1 or the same as the data (10):
x, y
However, both the x and y have 30 entries when I look at them.
Trev,
Had some trouble re-generating the issue as the sample data provided are for just 6 observations, not 30. So not sure if the below solution would work for you or not.
I used the code you supplied to create the dataframe:
data <- structure(list(Sample = c(1011L, 1012L, 1014L, 1024L, 1025L, 1026L),
Collection = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2"), class = "factor"),
Irrigation = structure(c(3L, 3L, 3L,5L, 5L, 5L), .Label = c("Rate1", "Rate2",
"Rate3", "Rate4", "Rate5"
), class = "factor"), Variety = structure(c(2L, 1L, 3L, 3L, 2L,
1L), .Label = c("Hodag", "Lamoka", "Snowden"), class = "factor"),
Suc = c(0.7333, 0.4717, 0.5883, 0.6783, 0.8283, 0.6833),
Gluc = c(0.03, 0.04, 0.043, 0.075, 0.057, 0.087),
L = c(59.48, 57.59, 59.25, 66.45, 68.29, 65.65),
a = c(4.36, 6.85, 3.43, 1.7, 0.78, 2.84),
b = c(26.82, 27.6, 26.2, 26.14, 25.37,27.19),
NoDefect = c(100L, 100L, 100L, 92L, 100L, 100L),
Defect = c(0L, 0L, 0L, 8L, 0L, 0L)),
row.names = c(NA, 6L), class = "data.frame")
data$Collection
However, your Collection factor is defined with two levels but only one is shown in the example. Perhaps this could be why the averages were coming out greater than 1?I modified the code below to have 2 levels of collection represented in the data.
data2 <- structure(list(Sample = c(1011L, 1012L, 1014L, 1024L, 1025L, 1026L),
Collection = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("1",
"2"), class = "factor"),
Irrigation = structure(c(3L, 3L, 3L,5L, 5L, 5L), .Label = c("Rate1", "Rate2",
"Rate3", "Rate4", "Rate5"
), class = "factor"), Variety = structure(c(2L, 1L, 3L, 3L, 2L,
1L), .Label = c("Hodag", "Lamoka", "Snowden"), class = "factor"),
Suc = c(0.7333, 0.4717, 0.5883, 0.6783, 0.8283, 0.6833),
Gluc = c(0.03, 0.04, 0.043, 0.075, 0.057, 0.087),
L = c(59.48, 57.59, 59.25, 66.45, 68.29, 65.65),
a = c(4.36, 6.85, 3.43, 1.7, 0.78, 2.84),
b = c(26.82, 27.6, 26.2, 26.14, 25.37,27.19),
NoDefect = c(100L, 100L, 100L, 92L, 100L, 100L),
Defect = c(0L, 0L, 0L, 8L, 0L, 0L)),
row.names = c(NA, 6L), class = "data.frame")
data2$Collection
Since you're using dplyr just keep piping that object into ggplot-- I don't think you would need to create subsets of new dataframes, but can instead graph them all separately with a facet_wrap command. I also am using geom_col instead of geom_bar, which the latter is generally trying to graph count data. Since you want to plot an average, geom_col may be better. Also since the example below is piping to the next line, the "data=" definition typically used in ggplot commands is not needed.
First with data:
data %>%
dplyr::group_by(Collection,Irrigation, Variety) %>%
dplyr::summarise(meanSuc=mean(Suc)) %>%
ggplot(aes(x = Collection, y = meanSuc)) +
geom_col() +
facet_wrap(.~Variety)
Incorporate Irrigation:
data %>%
dplyr::group_by(Collection,Irrigation, Variety) %>%
dplyr::summarise(meanSuc=mean(Suc)) %>%
ggplot(aes(x = Collection, y = meanSuc, fill = Irrigation)) +
geom_col() +
facet_wrap(.~Variety)
And using data2 instead, as defined above, will produce the Collection levels 1 and 2 side by side on the graph. With this method I was able to generate a result and all averages were less than 1, between .4~.8

How do you create a slider on Shiny/Plotly App that returns anything between the ranges selected on the SliderInput?

I am currently looking at the relationship between Bonus Points Scored and Points Per Game in Fantasy Premier League. I have made a Shiny/Plotly app. I would like to make another filter with a "Price" slider that returns the data corresponding to the range of prices that is selected. E.g. return all players that have price between Current.Cost 4.0 & 6.0. Currently my code fails as it comes up with error 'Faceting variables must have at least one value. Any help would be greatly appreciated.
#load packaages
library(shiny)
library(plotly)
library(ggplot2)
library(ggrepel)
library(dplyr)
BPSvsPPG <- read.csv("file:///C:/Users/haoso/Documents/FPL_Database.csv", stringsAsFactors = FALSE)
# Filter the Data
BPSvsPPG2 <- subset(BPSvsPPG, AveragePoints > 4)
n_total <- max(BPSvsPPG2$RoundNo)
names <- unique(BPSvsPPG2$Full.Name)
positions <- unique(BPSvsPPG2$PositionsList)
min_price <- min(BPSvsPPG2$Current.Cost)
max_price <- max(BPSvsPPG2$Current.Cost)
mean_price <- mean(BPSvsPPG2$Current.Cost)
latestRound <- max(BPSvsPPG2$RoundNo)
# Create UI
ui <- fluidPage(
sidebarLayout(
# Inputs
sidebarPanel(
# y-axis
selectInput(inputId = "y",
label = "AverageBPS",
choices = "AverageBPS",
selected = "AverageBPS"),
# x-axis
selectInput(inputId = "x",
label = "AveragePoints",
choices = "AveragePoints",
selected = "AveragePoints"),
# positions
selectInput(inputId = "Pos",
label = "Positions",
choices = positions),
# round no
numericInput(inputId = "RoundNo",
label = "RoundNo",
min = 1, max = n_total,
value = latestRound),
# price slider
sliderInput(inputId = "Price",
label = "Price",
min = min_price, max = max_price,
value = c(min_price, mean_price))
),
# Outputs
mainPanel(
plotlyOutput("BPS"),
verbatimTextOutput("event")
)
)
)
# Server code
server <- function(input, output) {
# Create Subset of Data for GW
GW_subset <- reactive({
req(input$RoundNo, input$Price)
filter(BPSvsPPG2, RoundNo %in% input$RoundNo & PositionsList %in% input$Pos & Current.Cost %in% input$Price)
})
# renderPlotly
output$BPS <- renderPlotly({
p <- ggplot(GW_subset(), aes_string(x = input$x, y = input$y)) +
geom_point(aes(text = paste("Name:", Full.Name, "<br>",
"Price:", Current.Cost, "<br>",
"Team:", Team, "<br>",
"AverageBPS:", AverageBPS, "<br>",
"PPG:", AveragePoints), colour = Team,
size = AverageBPS/AveragePoints)) +
facet_wrap(~PositionsList) +
ggtitle(input$RoundNo)
ggplotly(p, tooltip = "text")
})
# renderPrint
output$event <- renderPrint({
d <- event_data("plotly_hover")
if (is.null(d)) "Hover on a point!" else d
})
}
shinyApp(ui, server)
Output of dput(head(BPSvsPPG))
> dput(head(BPSvsPPG))
structure(list(Full.Name = c("Tammy Abraham", "Adam Smith", "Adrian",
"Sergio Aguero", "Nathan Ake", "Marc Albrighton"), Current.Cost = c(5.5,
4.5, 4.5, 11, 5, 5.4), GW = c("GW1", "GW1", "GW1", "GW1", "GW1",
"GW1"), BPSLastRound = c("0", "25", "0", "4", "33", "0"), FirstName = c("Tammy",
"", "", "Sergio", "Nathan", "Marc"), Surname = c("Abraham", "Adam Smith",
"Adrian", "Aguero", "Ake", "Albrighton"), PositionsList = c("FWD",
"DEF", "GLK", "FWD", "DEF", "MID"), Team = c("CHE", "BOU", "WHU",
"MCI", "BOU", "LEI"), Cost = c(5500000L, 4500000L, 4500000L,
11000000L, 5000000L, 5400000L), PointsLastRound = c(0L, 6L, 0L,
2L, 8L, 0L), TotalPoints = c(0L, 6L, 0L, 2L, 8L, 0L), AveragePoints = c(0,
6, 0, 2, 8, 0), AveragePointsPerDollar = c(0, 1.33e-06, 0, 1.82e-07,
1.6e-06, 0), TotalPointsPerDollar = c(0, 1.33e-06, 0, 1.82e-07,
1.6e-06, 0), GameweekWeighting = c(0L, 0L, 0L, 0L, 0L, 0L), TransfersOut = c(1823L,
2437L, 1999L, 53898L, 9917L, 13253L), YellowCards = c(0L, 0L,
0L, 0L, 0L, 0L), GoalsConceded = c(0L, 0L, 0L, 0L, 0L, 0L), GoalsConcededPoints = c(0L,
0L, 0L, 0L, 0L, 0L), Saves = c(0L, 0L, 0L, 0L, 0L, 0L), SavesPoints = c(0L,
0L, 0L, 0L, 0L, 0L), GoalsScored = c(0L, 0L, 0L, 0L, 0L, 0L),
GoalsScoredPoints = c(0L, 0L, 0L, 0L, 0L, 0L), ValueSeason = c(0,
1.3, 0, 0.2, 1.6, 0), TransfersOutRound = c(1823L, 2437L,
1999L, 53898L, 9917L, 13253L), PriceRise = c(0L, 0L, 0L,
0L, 0L, -1L), PriceFallRound = c(0L, 0L, 0L, 0L, 0L, 1L),
LastSeasonPoints = c(0L, 6L, 0L, 2L, 8L, 0L), PriceFall = c(0L,
0L, 0L, 0L, 0L, 1L), ValueForm = c(0, 1.3, 0, 0.2, 1.6, 0
), PenaltiesMissed = c(0L, 0L, 0L, 0L, 0L, 0L), Form = c(0,
6, 0, 2, 8, 0), Bonus = c(0L, 0L, 0L, 0L, 2L, 0L), FanRating = c(0L,
0L, 0L, 0L, 0L, 0L), CleanSheets = c(0L, 1L, 0L, 1L, 1L,
0L), CleanSheetPoints = c(0L, 0L, 0L, 0L, 0L, 0L), Assists = c(0L,
0L, 0L, 0L, 0L, 0L), SelectedByPercent = c(0.2, 0.7, 0.5,
33.2, 4.3, 0.9), TransfersIn = c(416L, 7257L, 212L, 135506L,
26175L, 384L), OwnGoals = c(0L, 0L, 0L, 0L, 0L, 0L), EAIndex = c(0L,
0L, 0L, 0L, 0L, 0L), PenaltiesSaved = c(0L, 0L, 0L, 0L, 0L,
0L), DreamteamCount = c(0L, 0L, 0L, 0L, 0L, 0L), MinutesPlayed = c(0L,
90L, 0L, 78L, 90L, 0L), TransfersInRound = c(416L, 7257L,
212L, 135506L, 26175L, 384L), PriceRiseRound = c(0L, 0L,
0L, 0L, 0L, -1L), RedCards = c(0L, 0L, 0L, 0L, 0L, 0L), BPS = c(0L,
25L, 0L, 4L, 33L, 0L), RoundNo = c(1L, 1L, 1L, 1L, 1L, 1L
), AverageBPS = c(0, 25, 0, 4, 33, 0)), row.names = c(NA,
6L), class = "data.frame")
The problem with you filter function is the usage of %in%.
%in% requires you to pass the range as a vector. With the slider values you can do something like: x >= left & x <= right. Please see the code below.
An alternative would be using between() provided by library(dplyr).
#load packaages
library(shiny)
library(plotly)
library(ggplot2)
library(ggrepel)
library(dplyr)
BPSvsPPG <- read.csv("file:///C:/Users/haoso/Documents/FPL_Database.csv", stringsAsFactors = FALSE)
# Filter the Data
BPSvsPPG2 <- subset(BPSvsPPG, AveragePoints > 4)
n_total <- max(BPSvsPPG2$RoundNo)
names <- unique(BPSvsPPG2$Full.Name)
positions <- unique(BPSvsPPG2$PositionsList)
min_price <- min(BPSvsPPG2$Current.Cost)
max_price <- max(BPSvsPPG2$Current.Cost)
mean_price <- mean(BPSvsPPG2$Current.Cost)
latestRound <- max(BPSvsPPG2$RoundNo)
# Create UI
ui <- fluidPage(
sidebarLayout(
# Inputs
sidebarPanel(
# y-axis
selectInput(inputId = "y",
label = "AverageBPS",
choices = "AverageBPS",
selected = "AverageBPS"),
# x-axis
selectInput(inputId = "x",
label = "AveragePoints",
choices = "AveragePoints",
selected = "AveragePoints"),
# positions
selectInput(inputId = "Pos",
label = "Positions",
choices = positions),
# round no
numericInput(inputId = "RoundNo",
label = "RoundNo",
min = 1, max = n_total,
value = latestRound),
# price slider
sliderInput(inputId = "Price",
label = "Price",
min = min_price, max = max_price,
value = c(min_price, mean_price))
),
# Outputs
mainPanel(
plotlyOutput("BPS"),
verbatimTextOutput("event")
)
)
)
# Server code
server <- function(input, output) {
# Create Subset of Data for GW
GW_subset <- reactive({
req(input$RoundNo, input$Price)
filter(BPSvsPPG2, RoundNo %in% input$RoundNo & PositionsList %in% input$Pos & Current.Cost >= input$Price[1] & Current.Cost <= input$Price[2])
})
# renderPlotly
output$BPS <- renderPlotly({
req(nrow(GW_subset()) > 0)
p <- ggplot(GW_subset(), aes_string(x = input$x, y = input$y)) +
geom_point(aes(text = paste("Name:", Full.Name, "<br>",
"Price:", Current.Cost, "<br>",
"Team:", Team, "<br>",
"AverageBPS:", AverageBPS, "<br>",
"PPG:", AveragePoints), colour = Team,
size = AverageBPS/AveragePoints)) +
facet_wrap(~PositionsList) +
ggtitle(input$RoundNo)
ggplotly(p, tooltip = "text")
})
# renderPrint
output$event <- renderPrint({
d <- event_data("plotly_hover")
if (is.null(d)) "Hover on a point!" else d
})
}
shinyApp(ui, server)
The 'Faceting variables must have at least one value.' error shows up because with a specific combinations of filters, the GW_subset() will give an empty data.frame.
Consider adding req(nrow(GW_subset()) > 0) inside your renderPlotly to prevent plotly from rendering the plot, or define what else should happen if GW_subset() is empty.

How do I get rid off pre-printed text in Forest Plot using Metafor/R?

I am using the Metafor package to produce meta-analysis and subsequently a Forest Plot. When I print my Forest Plot a "RE Model"-text appears automatically as shown on the attached print. I can't figure out how to remove the "RE Model" although I use a separate text-script. I just want my "own" text to appear aligned with the polygon. Can you help?
First "load data" and then my script:
### Load data
dat <- structure(list(study = structure(c(4L, 5L, 3L, 1L, 2L, 7L, 6L
), .Label = c("Battaglia et al.", "Hong et al.", "Kosyakove et al.",
"Lim et al.", "Rauch et al.", "Swachia et al.", "Tsounis et
al."
), class = "factor"), n1i = c(20L, 121L, 25L, 18L, 31L, 35L,
22L), m1i = c(12.8, 30.2, 24.6, 21, 25, 27,
18.2), sd1i = c(15.4,
21.6, 17, 33, 18, 13.8, 8.72), n2i = c(20L, 129L, 25L, 17L, 32L,
34L, 20L), m2i = c(12.1, 28.7, 25.1, 31, 26, 28.6, 14.7), sd2i = c(14.6,
21.6, 12.2, 25, 19, 24.2, 12.9), ntotal = c(40L, 250L, 50L, 35L,
63L, 69L, 42L), mean.age = c(3L, 3L, 1L, 4L, 4L, 3L, 3L), demograhic =
c(0L,
2L, 1L, 1L, 0L, 1L, 0L), adjusted.comorbid = c(1L, 1L, 1L, 1L,
0L, 1L, 1L), follow.up = c(1L, 3L, 3L, 1L, 2L, 2L, 2L), severity = c(2L,
4L, 1L, 4L, 4L, 4L, 4L), treat.sys = c(1L, 2L, 1L, 2L, 1L, 2L,
1L), treat.int = c(1L, 1L, 2L, 1L, 2L, 1L, 1L), year = c(2000L,
2000L, 2000L, 2000L, 2000L, 2000L, 2000L), citation = c(1L, 2L,
3L, 4L, 5L, 6L, 6L), yi = structure(c(0.700000000000001, 1.5,
-0.5, -10, -1, -1.6, 3.5), measure = "MD", ni = c(40L, 250L,
50L, 35L, 63L, 69L, 42L)), vi = c(22.516, 7.47261195464155, 17.5136,
97.2647058823529, 21.7328629032258, 22.6658487394958, 11.7767909090909
)), .Names = c("study", "n1i", "m1i", "sd1i", "n2i", "m2i", "sd2i",
"ntotal", "mean.age", "demograhic", "adjusted.comorbid", "follow.up",
"severity", "treat.sys", "treat.int", "year", "citation", "yi",
"vi"), row.names = c(NA, -7L), class = c("escalc", "data.frame"
), digits = 4, yi.names = "yi", vi.names = "vi")
AND my code
### My code
res <- rma(yi, vi, data=dat, slab=paste(study, year, citation, sep=", "), method = "REML")
forest(res, xlim=c(-39,24), at=c(-12,-9,-6,-3,0,3,6,9,12), showweights = TRUE,
ilab=cbind(dat$m1i, dat$n1i, dat$sd1i, dat$m2i, dat$n2i, dat$sd2i),
ilab.xpos=c(-26,-24,-22,-18,-16,-14), cex=1, ylim=c(-2, 10), font=1, digits=2, col="darkgrey")
### Add own text
text(-39, -2, pos=4, cex=0.9, font=2,
bquote(paste("Random-effects model for all studies: Q = ",
.(formatC(q1$QE, digits=2, format="f")),
", df = ", .(q1$k - q1$p),", p = ",
.(formatC(q1$QEp, digits=2, format="f")),
", ", I^2, " = ",
.(formatC(q1$I2, digits=1, format="f")),
"%", ", ", tau^2 ==
.(formatC(q1$tau2, digits=2, format="f")))))
Thank you in advance!
To make your forest plot without the "RE Model" text in the bottom left hand corner, just use the mlab = "" argument in your forest function call.
forest(res, xlim=c(-39,24), at=c(-12,-9,-6,-3,0,3,6,9,12), showweights = TRUE,
ilab=cbind(dat$m1i, dat$n1i, dat$sd1i, dat$m2i, dat$n2i, dat$sd2i),
ilab.xpos=c(-26,-24,-22,-18,-16,-14), cex=1, ylim=c(-2, 10),
font=1, digits=2, col="darkgrey", mlab = "")
Unfortunately I can't run the "Add own text" section of your provided code as you do not provide your q1 object. But you should be able to solve that yourself.
I figured this out using the Metafor-Project site, specifically their page on forest plots.

Resources