I am trying to plot multiple density plots for some data.
I have the following code:
ggplot(data=stack) +
geom_density(aes(x=OfferPrice, group=Country, fill=Country),
alpha=0.5, adjust=2) +
facet_grid(~Country) +
theme_bw()
My problem arises with the x-axis on each of the density plots.
Looking a little closer at the data:
stack %>%
group_by(Country) %>%
summarise(min(OfferPrice),
mean(OfferPrice),
max(OfferPrice))
It looks like Country - JN has some very high numbers. Earlier I cut the top and bottom 5% of extreme values for each group so there shouldn´t necessarily be extreme values. What I think is wrong is the x-axis on the plots are using the maximum from the JN group. How is it possible to have different x-axis depending on each group?
Data:
stack <- structure(list(Country = c("US", "GB", "US", "HK", "JN", "US",
"CH", "CA", "US", "US", "CA", "JN", "GB", "AU", "US", "GB", "US",
"GB", "HK", "CH", "CA", "CA", "US", "GB", "TA", "JN", "CA", "CA",
"CA", "CA", "CH", "GB", "CA", "HK", "CA", "US", "US", "CA", "US",
"AU", "CA", "CA", "CA", "US", "GB", "GB", "AU", "US", "US", "AU",
"CA", "CA", "CA", "US", "CA", "GB", "CA", "US", "GB", "US", "AU",
"AU", "US", "CA", "US", "GB", "AU", "CH", "CA", "CA", "GB", "AU",
"AU", "CH", "CA", "AU", "CH", "US", "CH", "TA", "AU", "AU", "GB",
"CH", "HK", "AU", "AU", "CA", "US", "TA", "GB", "US", "AU", "US",
"CA", "CA", "US", "AU", "CA", "US", "CA", "US", "CA", "US", "CA",
"US", "US", "AU", "CA", "AU", "GB", "US", "HK", "AU", "US", "CA",
"JN", "JN", "GB", "JN", "CA", "CA", "AU", "GB", "GB", "US", "US",
"US", "AU", "GB", "CA", "CA", "US", "CH", "GB", "US", "US", "AU",
"GB", "CH", "JN", "CA", "AU", "CA", "US", "US", "AU", "AU", "CA",
"US", "GB", "GB", "US", "US", "CA", "US", "HK", "AU", "US", "GB",
"US", "GB", "GB", "US", "CA", "JN", "CA", "AU", "CA", "CA", "GB",
"CA", "HK", "HK", "US", "CH", "US", "AU", "TA", "US", "CH", "HK",
"AU", "US", "HK", "GB", "AU", "CH", "US", "AU", "US", "CH", "US",
"CH", "CA", "AU", "HK", "CA", "US", "CH", "GB", "CA", "CA", "CA",
"CA", "US", "CA", "CA", "US", "HK", "US", "HK", "AU", "GB", "AU",
"CH", "US", "AU", "CA", "CA", "US", "GB", "AU", "US", "CH", "CA",
"CA", "CA", "US", "AU", "GB", "GB", "CA", "AU", "CA", "AU", "US",
"HK", "AU", "US", "AU", "CA", "US", "US", "US", "CA", "GB", "CA",
"US", "CA", "US", "AU", "CA", "US", "AU", "CH", "GB", "CA", "CA",
"CA", "CA", "HK", "AU", "TA", "AU", "GB", "AU", "CA", "JN", "US",
"CA", "CA", "AU", "US", "US", "GB", "CA", "US", "GB", "US", "CA",
"CA", "CH", "US", "US", "US", "US", "US", "HK", "CH", "CA", "CA",
"CA", "AU", "GB", "CH", "CA", "GB", "CA", "AU"), EffectiveDate = structure(c(17617,
17500, 17556, 17596, 17618, 17667, 17786, 17728, 17569, 17760,
17585, 17613, 17806, 17847, 17786, 17665, 17702, 17683, 17574,
17725, 17723, 17658, 17563, 17847, 17584, 17578, 17842, 17522,
17697, 17521, 17680, 17794, 17813, 17697, 17773, 17578, 17556,
17595, 17669, 17501, 17655, 17562, 17668, 17589, 17582, 17780,
17812, 17667, 17611, 17758, 17751, 17617, 17505, 17505, 17725,
17506, 17513, 17541, 17644, 17702, 17828, 17688, 17696, 17519,
17850, 17746, 17779, 17547, 17806, 17701, 17638, 17759, 17786,
17770, 17835, 17750, 17841, 17653, 17788, 17553, 17794, 17654,
17724, 17675, 17802, 17638, 17625, 17514, 17709, 17758, 17681,
17507, 17708, 17758, 17800, 17569, 17821, 17493, 17834, 17848,
17639, 17549, 17590, 17575, 17630, 17639, 17760, 17724, 17701,
17725, 17756, 17794, 17511, 17494, 17822, 17758, 17709, 17492,
17605, 17709, 17602, 17644, 17814, 17696, 17760, 17603, 17730,
17675, 17590, 17724, 17743, 17680, 17690, 17711, 17560, 17570,
17702, 17512, 17626, 17618, 17576, 17731, 17527, 17709, 17729,
17672, 17515, 17527, 17641, 17597, 17812, 17540, 17602, 17743,
17662, 17709, 17505, 17737, 17603, 17515, 17568, 17843, 17738,
17508, 17816, 17578, 17590, 17812, 17743, 17528, 17616, 17687,
17646, 17515, 17816, 17557, 17507, 17564, 17777, 17802, 17511,
17842, 17584, 17556, 17547, 17844, 17590, 17674, 17759, 17583,
17836, 17721, 17724, 17801, 17578, 17808, 17682, 17849, 17708,
17515, 17746, 17633, 17759, 17591, 17802, 17540, 17560, 17588,
17800, 17787, 17821, 17724, 17645, 17527, 17722, 17556, 17704,
17844, 17619, 17792, 17577, 17637, 17843, 17765, 17688, 17562,
17834, 17738, 17653, 17645, 17718, 17676, 17637, 17570, 17490,
17534, 17646, 17625, 17766, 17808, 17675, 17786, 17808, 17555,
17739, 17802, 17617, 17619, 17667, 17634, 17662, 17711, 17806,
17513, 17627, 17673, 17574, 17647, 17609, 17619, 17521, 17543,
17686, 17807, 17613, 17543, 17543, 17528, 17694, 17576, 17584,
17521, 17605, 17618, 17723, 17641, 17683, 17823, 17634, 17844,
17836, 17816, 17539, 17583, 17618, 17687, 17589, 17602, 17717,
17535, 17718, 17625, 17822, 17651, 17521, 17751, 17617, 17563,
17578, 17772), class = "Date"), OfferPrice = c(44, 13, 33, 0.3,
3000, 23, 6.26, 0.35, 10, 6.25, 0.25, 7110, 109.5, 0.11, 16,
2, 5.5, 15, 0.5, 8.5, 0.5, 0.2, 5, 92.5, 22, 103740, 0.23, 0.75,
8.65, 1.23, 17.4, 1.5, 0.38, 0.84, 8.1, 27.5, 10, 1, 14, 0.42,
0.1, 1.82, 2, 39.8, 238, 340, 0.3, 4.5, 41.5, 0.2, 0.25, 0.27,
0.35, 5, 0.3, 115, 0.15, 5, 142.25, 14, 2.43, 0.02, 24, 0.115,
8.25, 25, 0.155, 9.6, 0.67, 6, 52.5, 0.2, 0.2, 1.37, 1.6, 0.65,
2.9, 4, 7, 72, 0.025, 0.14, 22.5, 6.75, 0.64, 0.8, 0.8, 0.4,
22, 94.8, 15, 10, 2.45, 34, 1, 9.3, 6.25, 0.018, 0.2, 24.5, 0.3,
2.9, 0.35, 2.05, 0.4, 29.5, 2.26, 0.36, 0.75, 0.027, 2.8, 16,
3.54, 0.018, 10, 0.15, 1780, 1602, 120, 3900, 0.25, 0.18, 2.32,
269, 175, 18, 18, 23, 0.2, 10, 0.1, 0.6, 4.8, 6, 164.5, 7, 26.42,
0.02, 190, 11, 992, 0.2, 1.42, 0.55, 23, 33.5, 0.35, 0.065, 1.16,
29.5, 65, 8, 27.5, 18.9, 0.15, 17, 0.63, 0.34, 26.25, 0.65, 6.9,
10, 6.75, 1.21, 0.95, 73125, 2.5961, 0.054, 1.2, 9.64, 251, 2.46,
0.18, 0.375, 9.97, 20.43, 25.5, 0.025, 60, 3, 1.55, 0.5, 0.2,
17, 0.443, 8, 0.05, 5.25, 1.15, 0.45, 7.155, 17, 24.5, 12.5,
2.1, 0.75, 0.35, 0.39, 38.2, 0.63, 16, 0.15, 0.1, 0.12, 2.32,
10, 0.3, 1.66, 17, 0.4, 1.3, 0.3, 1.08, 30, 0.8, 10.88, 0.9,
0.21, 0.17, 1.7, 2.25, 1, 0.08, 1.5, 14.75, 0.35, 0.44, 0.35,
17, 2, 37, 195, 0.165, 0.02, 0.2, 0.015, 25, 1.09, 0.45, 10,
0.145, 0.92, 36, 13.25, 4, 0.6, 101, 0.7, 15, 0.3, 0.7, 0.06,
0.25, 6.5, 1.1, 16.72, 1.25, 0.1, 0.12, 0.28, 1.18, 0.4, 0.02,
75, 0.08, 5, 0.02, 0.5, 2878, 8, 0.15, 0.33, 0.1, 21.25, 1, 12,
5.83, 4.25, 1.65, 3.15, 3, 0.15, 7.4, 31.25, 12, 24, 19.75, 41.5,
0.88, 13.65, 0.25, 0.15, 0.25, 2.35, 101, 1.26, 1.65, 10, 1.32,
0.5)), row.names = c(NA, -300L), class = c("tbl_df", "tbl", "data.frame"
))
You can specify whether you want free scales (x, y or both) in facet_grid.
ggplot(data=stack) +
geom_density(aes(x=OfferPrice, group=Country, fill=Country),
alpha=0.5, adjust=2) +
facet_grid(~Country, scales = "free_x") +
theme_bw()
Related
I am having a problem publishing my RShiny App. Whenever I publish it, everything works besides my stat_summary_hex() function within my ggplot. I am not really sure why it is not working, whenever I run it through RStudio it works perfectly fine, but not when I publish it. If I could get help, that would be much appreciated.
I am getting this message: Computation failed in stat_summary_hex():
Here's my code
library(shiny)
library(dplyr)
library(tidyverse)
library(ggplot2)
allStuff2 = read.csv("appData.csv")
allStuff2$player_name = iconv(allStuff2$player_name,from="UTF-8",to="ASCII//TRANSLIT")
allStuff2 = allStuff2 %>%
mutate(pitch_type = ifelse(pitch_type == "FF", "Fourseam Fastball", pitch_type),
pitch_type = ifelse(pitch_type == "SI", "Sinker", pitch_type),
pitch_type = ifelse(pitch_type == "FC", "Cutter", pitch_type),
pitch_type = ifelse(pitch_type == "SL", "Slider", pitch_type),
pitch_type = ifelse(pitch_type == "CU", "Curveball", pitch_type),
pitch_type = ifelse(pitch_type == "CH", "Changeup", pitch_type)) %>%
arrange(player_name)
# Define UI for application that draws a histogram
ui <- fluidPage(
# Application title
titlePanel("2022 Stuff+ Dashboard"),
theme = shinythemes::shinytheme("journal"),
# Sidebar with a slider input for number of bins
sidebarLayout(
sidebarPanel(
selectInput('name',
'Select Pitcher:',
choices = unique(allStuff2$player_name)),
selectInput('pitch_type',
'Select Pitch Type:',
choices = c('Fourseam Fastball', 'Sinker', 'Cutter', 'Slider', 'Curveball', 'Changeup'),
selected = 'Fourseam Fastball')),
# Show a plot of the generated distribution
mainPanel(
plotOutput("stuffPlot"),
tableOutput("table")
)
)
)
# Define server logic required to draw a histogram
server <- function(input, output) {
pitcherAvg = reactive({
allStuff2 %>%
filter(player_name == input$name,
pitch_type == input$pitch_type) %>%
group_by(pitch_type) %>%
summarise(velo = mean(release_speed, na.rm = T),
pfx_x = -mean(pfx_x, na.rm = T),
pfx_z = mean(pfx_z, na.rm = T),
rp_x = mean(release_pos_x, na.rm = T),
rp_z = mean(release_pos_z, na.rm = T),
p_throws = p_throws[1],
Stuff. = mean(Stuff., na.rm = T))
})
output$stuffPlot = renderPlot({
ggplot(filter(allStuff2, pitch_type == input$pitch_type,
p_throws == pitcherAvg()$p_throws,
release_pos_x >= pitcherAvg()$rp_x - 0.5,
release_pos_x <= pitcherAvg()$rp_x + 0.5,
release_pos_z >= pitcherAvg()$rp_z - 0.5,
release_pos_z <= pitcherAvg()$rp_z + 0.5,
release_speed >= pitcherAvg()$velo - 1,
release_speed <= pitcherAvg()$velo + 1),
aes(x = -pfx_x, y = pfx_z, z = Stuff.)) +
stat_summary_hex() +
scale_fill_gradient2(midpoint = 100, low = "blue", mid = "white", high = "red") +
geom_point(aes(x=pitcherAvg()$pfx_x, y=pitcherAvg()$pfx_z), colour="black", size = 5) +
theme_bw() +
labs(x = 'Horizontal Movement', y = 'Vertical Movement', fill = 'Stuff+',
title = paste(strsplit(input$name, ", ")[[1]][1], "'s Stuff+ by Location for ", input$pitch_type, sep = ""),
subtitle = "Based off of Pitchers with Similar Release Point and Velocity (Pitcher's Point of View)",
caption = 'Email: fran1412#umn.edu') +
coord_fixed() +
geom_vline(xintercept = 0) +
geom_hline(yintercept = 0) +
xlim(-2.5,2.5) +
ylim(-2.5,2.5)
})
output$table = renderTable({
allStuff2 %>%
filter(player_name == input$name,
pitch_type == input$pitch_type) %>%
group_by(pitch_type) %>%
summarise(Velocity = mean(release_speed, na.rm = T),
`Horiz. Break` = -mean(pfx_x, na.rm = T),
`Vert. Break` = mean(pfx_z, na.rm = T),
`Horiz. Release Point` = -mean(release_pos_x, na.rm = T),
`Vert. Release Point` = mean(release_pos_z, na.rm = T),
`Stuff+` = as.integer(round(mean(Stuff., na.rm = T), 0))) %>%
select(-pitch_type)
})
}
# Run the application
shinyApp(ui = ui, server = server)
Here is my dput file:
structure(list(release_speed = c(85.9, 97.6, 100.5, 98.9, 87.3,
100.7, 100.6, 87.3, 95, 100.7, 101.2, 87.4, 102, 87.2, 103.2,
95.8, 94.3, 93.1, 89, 101, 87.2, 95.8, 101.5, 96.1, 96.2, 85.5,
88.7, 88.4, 87.7, 90.5, 88.2, 87.2, 101.2, 100.6, 96.1, 95, 85,
85.7, 102.3, 96.2, 89.3, 88.3, 91.4, 86.1, 96.5, 101.5, 96.4,
100.7, 84.5, 89.9, 101.1, 94.3, 98.8, 95.7, 95.4, 89.3, 94.7,
98.1, 94.7, 97, 95.7, 98.5, 98.9, 87.6, 90.4, 86.5, 88.4, 88.3,
101.9, 98.9, 100.7, 89, 95.4, 99.6, 102.4, 87.5, 87.2, 88, 87,
95.9, 99.1, 96.5, 91, 85.9, 87.2, 84.8, 87.1, 88, 98.3, 102.1,
86.3, 99.2, 101.9, 100, 84.7, 86.1, 98.2, 102.4, 99.8, 98.8),
pfx_x = c(0.5, -1.12, -0.79, -0.95, 0.52, -0.89, -1.44, 0.26,
-1.12, -1, -0.86, 0.2, -0.84, 0.32, -1.2, -1.09, -1.37, -0.54,
0.25, -1.12, 0.15, -0.77, -0.84, -0.99, -1.38, 0.26, 0.05,
0.32, 0.2, 0.36, 0.29, 0.01, -1.08, -1, -0.75, -0.67, 0,
0.27, -0.9, -1.48, 0.33, 0.11, 0.33, 0.22, -0.75, -1.23,
-1.25, -1.02, 0.27, -0.13, -1.1, -0.51, -0.91, -0.7, -0.84,
0.23, -1.64, -0.96, -0.76, -1.03, -1.53, -1.24, -1.41, 0.48,
0.39, 0.09, 0.56, 0.18, -1.08, -0.97, -1.21, 0.5, -1.21,
-0.89, -1.12, 0.13, 0.16, 0.39, 0.46, -1.1, -0.79, -1.19,
-0.65, 0.28, 0.23, 0.35, 0.1, 0.34, -0.35, -0.91, 0.02, -0.86,
-1.02, -0.75, 0.38, 0.08, -1.38, -1.08, -1.02, -1.15), pfx_z = c(-0.63,
0.06, 1.22, 0.13, -0.45, 1.3, 1.15, -1.16, 0.64, 1.18, 1.08,
-0.75, 1, -0.59, 1.04, 0.29, 0.25, 0.53, -0.62, 0.97, -0.61,
0.13, 1.16, 0.32, 0.05, -0.48, -0.4, -0.39, -0.9, -0.42,
-0.64, -0.59, 1.24, 1.01, 0.1, 0.25, -1.05, -0.51, 1.12,
0.35, -0.63, -0.52, -0.56, -0.69, 0.22, 0.8, 0.48, 1.18,
-0.43, -0.34, 1.17, -0.21, 1.07, 0.21, 0.32, -0.49, 0.16,
1.35, 0.12, 0.43, 0.57, 1.04, 0.42, -0.56, -0.58, -0.82,
-0.37, -0.48, 0.8, 1, 1.33, -0.7, 0.45, 0.95, 1.37, -0.32,
-0.57, -0.72, -0.85, 0.3, 1.32, 0.33, 0.51, -0.55, -0.68,
-0.58, -0.57, -0.49, 0.17, 1.3, -0.5, 1.21, 1.14, 1.28, -0.72,
-0.42, 0.33, 1.37, 1.24, 0.3), release_pos_x = c(-2.18, -1.97,
-2.11, -1.72, -1.4, -1.66, -2.19, -1.93, -2.09, -1.89, -1.85,
-1.95, -1.89, -2.16, -1.73, -2.2, -2.22, -2.45, -2.07, -2.11,
-2.18, -2.24, -1.85, -2.29, -2.23, -2.16, -2.06, -2.07, -1.77,
-1.81, -2, -1.85, -1.75, -2.13, -2.14, -2.49, -2.14, -2.05,
-1.89, -1.79, -1.71, -1.73, -1.82, -2.08, -2.32, -1.76, -2.03,
-1.93, -2.04, -2.13, -1.97, -2.27, -2.2, -2.1, -2.27, -1.94,
-1.95, -1.86, -2.27, -2.28, -1.84, -2.21, -2, -1.72, -1.94,
-1.73, -1.74, -2, -1.82, -1.89, -1.59, -1.83, -2.12, -2.12,
-1.84, -1.92, -2.13, -1.69, -1.9, -2.12, -2.15, -1.98, -2.63,
-2.24, -2.02, -1.83, -2.32, -1.96, -1.88, -2.02, -2.29, -2.2,
-2.15, -2.43, -1.68, -1.92, -2.05, -1.65, -2.12, -1.91),
release_pos_z = c(6.03, 6.29, 6.13, 6.04, 6.32, 6.24, 6.07,
6.18, 6.21, 6.07, 6.02, 6.14, 6.17, 6.16, 6.06, 5.95, 6.06,
5.8, 6, 6.02, 6.09, 6.14, 6.08, 5.93, 6.08, 6.26, 6.08, 6.12,
6.14, 6.04, 6.41, 6.23, 6.08, 6.11, 5.88, 5.95, 6.28, 6.05,
6.06, 6.29, 6.27, 6.2, 5.99, 6.23, 6.08, 6.07, 6.09, 6.18,
6.1, 6.19, 5.97, 5.95, 6.11, 5.99, 6.25, 5.97, 6.02, 6.15,
6.01, 6.04, 6.18, 6.06, 6.03, 6.2, 5.85, 6.25, 6.07, 6.25,
6.06, 5.99, 6.28, 6.18, 5.87, 6.15, 6.01, 6.12, 6.2, 6.09,
6.09, 5.94, 5.99, 6, 5.74, 6.36, 6.04, 6.16, 5.94, 6.2, 6,
5.96, 6.31, 5.99, 5.96, 6.13, 6.17, 6.21, 6.14, 6.04, 6.05,
6.21), player_name = c("Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan", "Duran, Jhoan",
"Duran, Jhoan"), p_throws = c("R", "R", "R", "R", "R", "R",
"R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R",
"R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R",
"R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R",
"R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R",
"R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R",
"R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R",
"R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R",
"R", "R", "R", "R", "R", "R", "R", "R", "R", "R"), pitch_type = c("CU",
"CH", "FF", "CH", "CU", "FF", "FF", "CU", "CH", "FF", "FF",
"CU", "FF", "CU", "FF", "CH", "CH", "SL", "CU", "FF", "CU",
"CH", "FF", "CH", "CH", "CU", "CU", "CU", "CU", "CU", "CU",
"CU", "FF", "FF", "CH", "CH", "CU", "CU", "FF", "CH", "CU",
"CU", "CU", "CU", "CH", "FF", "CH", "FF", "CU", "CU", "FF",
"CH", "FF", "CH", "CH", "CU", "CH", "FF", "CH", "CH", "CH",
"FF", "CH", "CU", "CU", "CU", "CU", "CU", "FF", "FF", "FF",
"CU", "CH", "FF", "FF", "CU", "CU", "CU", "CU", "CH", "FF",
"CH", "SL", "CU", "CU", "CU", "CU", "CU", "CH", "FF", "CU",
"FF", "FF", "FF", "CU", "CU", "CH", "FF", "FF", "CH"), Stuff. = c(110.783365414633,
83.9695389876577, 126.982883382714, 100.773492772016, 122.695393953566,
119.617766459935, 118.785623163818, 130.831514405023, 61.756262527081,
96.8279502585635, 99.8866109055696, 129.196132513915, 100.484491385581,
126.11256167432, 119.411360427872, 99.3219706412825, 114.233606464787,
85.7119791685036, 130.467610095058, 112.469680863445, 141.219595669374,
107.42847854836, 108.245236352338, 99.7562461898892, 125.500229256936,
122.603272894611, 140.858949561694, 127.283168181566, 124.003695368084,
127.799900621368, 111.05222857869, 123.422997655113, 123.389105069617,
110.298869169031, 111.024067639178, 94.9843404478164, 150.783474706561,
126.3180435826, 100.726570873835, 119.565181758901, 122.935086985841,
141.957127666565, 131.623575183857, 111.648889433675, 79.7759749825627,
86.5740112525524, 93.0267990053248, 111.314387796175, 121.148229426222,
146.59561919429, 113.787652486095, 110.791517820695, 84.8826511258123,
99.281267964766, 95.6569834353375, 139.898066773251, 116.718918712834,
98.5337807227119, 96.0645681960684, 63.3208562591806, 108.735190496635,
80.3261641922412, 188.974704795231, 117.766297421377, 129.541840071494,
134.2787846362, 130.058767183761, 123.568417985415, 84.8628879203198,
68.4933286167661, 116.547997910403, 123.002208002103, 94.3192200671762,
85.8888577862371, 141.693480608564, 125.100305847327, 121.625166716099,
109.272368981841, 107.346832906711, 100.57797732587, 108.425613182612,
78.6537787345208, 85.6841619149002, 121.150698692734, 131.428503129421,
113.792479159864, 135.83714795316, 112.501339659899, 88.7429637176584,
155.705150252501, 137.647519897197, 97.8102480290771, 135.083550736976,
140.716728877702, 98.6674738226451, 134.137954477671, 169.959982914227,
141.693480608564, 137.731456338842, 93.4183672266801)), class = "data.frame", row.names = c(NA,
-100L))
When I run abline(h=0) within a loop it will not add a line at the horizontal, for example:
library(glue)
library(magrittr)
par(mfrow=n2mfrow(4, 2))
for(i in 1:4){
plot(savings[, i], savings[,1], xlab = glue('predictor: {i}'), ylab = 'Dependent variable') %>%
with(abline(h=0))
}
Here's some sample data:
savings <- structure(list(sr = c(11.43, 12.07, 13.17, 5.75, 12.88, 8.79,
0.6, 11.9, 4.98, 10.78, 16.85, 3.59, 11.24, 12.64, 12.55), pop15 = c(29.35,
23.32, 23.8, 41.89, 42.19, 31.72, 39.74, 44.75, 46.64, 47.64,
24.42, 46.31, 27.84, 25.06, 23.31), pop75 = c(2.87, 4.41, 4.43,
1.67, 0.83, 2.85, 1.34, 0.67, 1.06, 1.14, 3.93, 1.19, 2.37, 4.7,
3.35), dpi = c(2329.68, 1507.99, 2108.47, 189.13, 728.47, 2982.88,
662.86, 289.52, 276.65, 471.24, 2496.53, 287.77, 1681.25, 2213.82,
2457.12), ddpi = c(2.87, 3.93, 3.82, 0.22, 4.56, 2.43, 2.67,
6.51, 3.08, 2.8, 3.99, 2.19, 4.32, 4.52, 3.44)), row.names = c("Australia",
"Austria", "Belgium", "Bolivia", "Brazil", "Canada", "Chile",
"China", "Colombia", "Costa Rica", "Denmark", "Ecuador", "Finland",
"France", "Germany"), class = "data.frame")
I am trying to apply a Trim function from the DescTools package to a data frame in R using the dplyr package.
What I have so far is the following:
x <- df %>%
group_by(Country) %>%
mutate_all(OfferPrice, Trim(trim = 0.1, na.rm = TRUE))
Which returns the following error:
Error in Trim(trim = 0.1, na.rm = TRUE) :
argument "x" is missing, with no default
I know its a problem with the characteristics inside the Trim() part of the mutate but I cannot seem to apply this function inside dplyr.
The function trims the top and bottom 10% of the observations, hopefully removing any extreme values.
Data:
df <- structure(list(Country = c("AU", "AU", "AU", "AU", "AU", "AU",
"AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU",
"AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU",
"AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU",
"AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU",
"AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU",
"AU", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "GB", "GB", "GB", "GB", "GB",
"GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB",
"GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB",
"GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB",
"GB", "GB", "GB", "GB", "GB", "GB"), OfferPrice = c(0.25, 0.55,
0.065, 0.075, 0.019, 0.0114, 0.18, 0.015, 2.8, 3.62, 0.025, 0.07,
0.6, 0.9, 0.12, 2.72, 0.015, 0.015, 0.32, 0.2, 0.063, 0.01, 1.42,
0.0045, 0.02, 1.15, 0.2, 17.05, 0.009, 1.8, 3.22, 0.135, 0.35,
5, 0.37, 0.023, 0.014, 0.023, 0.35, 1.25, 0.05, 0.059, 0.2, 0.025,
5.45, 0.05, 0.3, 0.22, 0.04, 0.035, 2, 0.32, 0.2, 0.2, 0.02,
0.34, 0.04, 0.025, 0.03, 0.0125, 1.6, 0.03, 0.15, 13.5, 0.1,
0.3, 0.13, 0.115, 0.35, 0.2, 0.6, 0.7, 8, 14, 25, 15.75, 3.8,
2, 0.5, 35.2, 1.75, 0.12, 0.48, 0.15, 0.7, 0.075, 0.15, 14.5,
0.29, 0.58, 1.75, 9, 11.5, 0.5, 0.075, 0.12, 1.1, 0.6, 0.75,
0.26, 0.2, 0.12, 0.49, 12, 6.85, 0.55, 0.25, 1.6, 0.36, 0.06,
2, 0.272, 41, 0.15, 1.1, 4.1, 0.6, 0.08, 1.4, 3, 0.09, 0.15,
0.2, 0.3, 0.8, 0.21, 0.1, 0.05, 0.17, 0.1, 0.15, 0.05, 0.3, 0.6,
0.2, 0.5, 3.45, 3, 0.07, 0.1, 0.3, 7.2, 0.4, 0.1, 12.5, 0.07,
0.375, 0.25, 0.3, 1.15, 0.2, 3, 1, 0.3, 0.25, 530, 262, 20, 37.5,
3422, 295, 100, 0.085, 1925, 0.3, 107.5, 10, 2.1, 3, 15, 300,
690, 50, 410, 100, 120, 225, 40, 100, 100, 51, 10, 82, 9.58,
269, 0.5, 271, 100, 108, 0.3, 4.5, 0.5, 0.55, 50, 0.95, 275,
100, 170, 0.7), OfferTo1stOpen = c(18, -2.727274895, 9.230772972,
6.666662216, -15.78947067, 5.263155937, -2.777781725, 13.33333588,
5.000001907, -3.591157198, -0.000001490116119, 1.428570986, -4.166670322,
0.00000264909545, -34.16666412, -0.000001051846652, 26.66666985,
26.66666985, 9.375002861, 2.499998569, 6.34920454, 0.000002235174179,
-0.7042223215, -11.11110687, 15.00000286, 1.304349899, -0.000001490116119,
6.217013359, 11.11111546, 25.00000381, 0.9316761494, -0.000003973642833,
-15.71428394, 17.20000076, -0.000001288749104, 4.347826004, 14.28571033,
13.04347801, 4.285716057, 43.20000076, 1.99999845, 10.16949081,
2.499998569, -4.000001431, -0.1834827513, 11.99999809, -1.666670561,
95.45454407, -12.49999809, 25.7142849, -0.5, 18.75000191, -0.000001490116119,
-17.50000191, -9.999998093, 44.11764526, 15.00000286, 19.99999809,
0.000002235174179, 35.99999619, 10.62499809, 76.66667175, 6.666662216,
-0.3703703582, -10.00000095, -100, 146.1538544, 65.21739197,
-11.42856979, 14.99999809, -5.000003815, -11.42856979, 1.625,
6.785714149, NA, 3.492063522, -3.684209347, -2.5, 10, -1.420456648,
1.142857194, -12.49999809, -1.041664481, -0.000003973642833,
-14.2857132, 39.99999619, 36.66666031, -0.3448275924, -15.51723862,
-12.06896305, -18.2857151, 0.555555582, -5.434782505, 590, -6.666670322,
0.000002235174179, 1.818179607, 36.66666031, -6.666666508, 0.000003667978262,
-10.00000095, 20.83333588, -20.40816498, -2.916666746, -29.1970787,
-0.000002167441608, -10, -18.80635834, -100, 8.333335876, -3.5,
10.29411125, 2.097560883, -6.666670322, 7.272725105, 0.7317096591,
19.99999619, 81.25000763, 45.00000381, -20, -11.1111145, -0.000003973642833,
-7.500001431, -0.000003973642833, -1.250001431, -14.28571129,
49.99999619, -10.00000095, -5.882353783, NA, 23.33332825, 19.99999809,
18.33332825, -13.33333683, 34.99999619, -34, -19.71014595, -32.33333206,
-21.4285717, -20.00000191, -100, 0.1388915479, 7.499998569, -20.00000191,
-0.2399999946, 257.1428528, -16, 54, NA, -4.347824097, -100,
6, 1, 4.999995708, -8, 8.301886559, 3.511450291, 25, 16, -1.461133838,
-1.694915295, 1, 17.64705849, 3.376623392, 24.99999428, 3.255813837,
34, 0.00000454130668, -3.333333254, 10.33333302, 1.666666627,
16.231884, 9, 1.829268336, 3, 11.66666698, 4.888888836, 14.25,
3.5, 3.5, -4.411764622, 0.200000003, 1.829268336, 53.96659851,
9.665427208, 5, -1.586715817, 2, 1.111111164, 4.999995708, -10,
5, -4.545456409, NA, 7.894738197, 5.454545498, 1, 11.17647076,
25.00000191), OfferTo1stClose = c(8, -7.272729397, 9.230772972,
7.999995708, -21.05262947, -3.508773565, -2.777781725, 0.000002235174179,
3.571430445, -3.867400169, -0.000001490116119, 1.428570986, -6.666670322,
-1.666664004, -35.83333206, -3.308824539, 13.33333588, 26.66666985,
10.93750286, -0.000001490116119, 6.34920454, -9.999998093, -0.3521096706,
11.11111546, 5.000002384, -0.4347805381, -2.500001431, 3.519066334,
11.11111546, 27.22222519, 4.34782505, -7.407411098, -17.1428566,
15.39999962, 4.05405283, -0.0000001943629684, 7.142853737, 13.04347801,
2.857144594, 43.20000076, 3.999998569, 10.16949081, -7.500001431,
3.999998569, -0.5504552126, 19.99999809, -1.666670561, 170.4545441,
-14.99999809, 31.4285717, -0.5, 18.75000191, -20.00000191, -17.50000191,
0.000002235174179, 44.11764526, 12.50000286, 15.99999809, 3.333335638,
35.99999619, 10.62499809, 123.3333359, 13.3333292, -1.481481433,
-10.00000095, -100, 138.4615479, 47.82608414, -12.85714149, 32.49999619,
-13.33333683, -24.2857132, 1.75, -0.3571428657, NA, 3.93650794,
-7.894735813, -7, 20, -0.9375021458, 1.714285731, -8.333331108,
-1.041664481, 3.333329201, -19.99999809, 33.33332825, 33.33332825,
-0.06896551698, -16.3793087, -16.3793087, -18.2857151, 2.666666746,
2.173913002, 590, -6.666670322, -16.66666412, 2.727270603, 44.99999237,
-10.66666698, 1.923080683, -12.50000095, 16.66666985, -22.44898033,
-4.166666508, -39.85401535, -3.636365652, -12, -16.8959198, -100,
0.000002235174179, -3.5, 13.97058201, 2.707317114, -8.066670418,
5.454543114, 0.4878072143, 19.99999619, 87.50000763, 45.7142868,
-25.66666603, -5.555559158, 16.66666222, -2.500001431, 3.333329201,
-0.000001490116119, -14.28571129, 49.99999619, -10.00000095,
-5.882353783, NA, 39.99999619, 19.99999809, 13.3333292, -10.00000381,
65, -26, -19.71014595, -31.66666603, -21.4285717, -20.00000191,
-100, -0.1388862431, 11.24999809, -20.00000191, -1.679999948,
228.5714264, -22.66666603, 42, NA, -7.826085091, -100, 6.666666508,
0, 4.999995708, -8, 8.301886559, 3.969465733, 26, 16, -5.084745884,
1.322033882, 1.5, 17.64705849, 2.077922106, 24.99999428, 3.255813837,
43, 0.00000454130668, -4.166666508, 10.33333302, 1.333333373,
18.69565201, 9, 1.829268336, 3, 11.66666698, 3.111111164, 15,
3.5, 3.5, -4.411764622, 0.6000000238, 50.60975647, 53.96659851,
37.54646683, 0, -0.1476014704, 3, 1.296296239, 4.999995708, -11.11111069,
5, -0.000002167441608, NA, 7.894738197, 4.181818008, 0.5, 10.88235283,
25.00000191)), row.names = c(NA, -199L), vars = "Country", drop = TRUE, indices = list(
0:61, 62:154, 155:198), group_sizes = c(62L, 93L, 44L), biggest_group_size = 93L, labels = structure(list(
Country = c("AU", "CA", "GB")), row.names = c(NA, -3L), class = "data.frame", vars = "Country", drop = TRUE, indices = list(
0:61, 62:154, 155:198), group_sizes = c(62L, 93L, 44L), biggest_group_size = 93L, labels = structure(list(
Country = c("AU", "CA", "GB")), row.names = c(NA, -3L), class = "data.frame", vars = "Country", drop = TRUE)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
I think you'll need to do this with do since the action of Trim is to return essentially a subset of observations. Try:
x <- df %>%
group_by(Country) %>%
do(
Trim(.$OfferPrice, trim = 0.1, na.rm = TRUE)
)
You could then use lapply or map inside the do statement to Trim each column of data, but I'm not sure if this is actually what you want. It's unclear since you have not provided any sample data. The attempt to use mutate_all suggests you want to Trim each column of data separately, but this doesn't make sense to me.
EDIT based on your comment you really want to filter the dataframe by the Trimmed column OfferPrice, so
x <- df %>%
group_by(Country) %>%
do(
.[attr(Trim(.$OfferPrice, trim = 0.1, na.rm = TRUE), "trim"), ]
)
See the documentation of Trim for details, specifically
The indices of the trimmed values will be attached as attribute named "trim".
Assuming that what you want is that for any element of OfferPrice excluded by Trim(OfferPrice, ...) that entire row of df should be dropped, get the trim attribute of the result of Trim(...) and remove those rows using slice doing it all by Country.
library(dplyr)
library(DescTools)
df %>%
group_by(Country) %>%
slice(-attr(Trim(OfferPrice, trim = 0.1, na.rm = TRUE), "trim")) %>%
ungroup
This could also be written:
df %>%
group_by(Country) %>%
slice(OfferPrice %>%
Trim(trim = 0.1, na.rm = TRUE) %>%
attr("trim") %>%
`-`) %>%
ungroup
I have 4 columns: date & time, stage_duration, various_stages, Vehicle_ID. I want to plot date and time in mins on X-axis and id, stage_duration on Y-axis and fill by various stages on line or bar chart.
Something like this would be good:
Here is my data:
var_events time_date event_duration veh_id
LD 17-06-2018 13:25 6.52 B33
WL 17-06-2018 13:25 14.52 B31
TL 17-06-2018 13:26 0.32 B32
TE 17-06-2018 13:26 4.58 B13
UL 17-06-2018 13:26 3.45 B12
WT 17-06-2018 13:26 5.46 B25
UL 17-06-2018 13:26 1.56 B17
TL 17-06-2018 13:26 13.6 B33
SL 17-06-2018 13:26 0.05 B32
Here is a minimal example that creates the plot
# load data
data(presidential)
data(economics)
# events of interest
events <- presidential[-(1:3),]
# strip year from economics and events data frames
economics$year = as.numeric(format(economics$date, format = "%Y"))
# use dplyr to summarise data by year
#install.packages("dplyr")
library(dplyr)
econonomics_mean <- economics %>%
group_by(year) %>%
summarise(mean_unemployment = mean(unemploy))
# add president terms to summarized data frame as a factor
president <- c(rep(NA,14), rep("Reagan", 8), rep("Bush", 4), rep("Clinton", 8), rep("Bush", 8), rep("Obama", 7))
econonomics_mean$president <- president
# create ggplot
p <- ggplot(data = econonomics_mean, aes(x = year, y = mean_unemployment)) +
geom_point(aes(color = president)) +
geom_line(alpha = 1/3)
Update
This is the output:
structure(list(Event_stage = c("SE", "MN", "MN", "TE", "TE",
"TE", "TE", "TE", "TE", "TE", "TE", "WL", "TE", "TE", "SE", "TE",
"TE", "WL", "WT", "MN", "WL", "TE", "WL", "WL", "WT", "WL", "LD",
"WT", "WL", "WT", "WT", "TE", "WL", "LD", "WT", "LD", "MN", "TL",
"TE", "WL", "TL", "TL", "WT", "TE", "TE", "LD", "WT", "TL", "LD"),
event_date = structure(c(1529573704, 1529573710, 1529573713,
1529573724, 1529573855, 1529573874, 1529573880, 1529573895, 1529573906,
1529573918, 1529573925, 1529573931, 1529573931, 1529573941, 1529573947,
1529573969, 1529574006, 1529574054, 1529574088, 1529574114, 1529574120,
1529574123, 1529574134, 1529574137, 1529574148, 1529574163, 1529574164,
1529574148, 1529574169, 1529574170, 1529574178, 1529574188, 1529574189,
1529574196, 1529574178, 1529574188, 1529574203, 1529574213, 1529574214,
1529574214, 1529574215, 1529574227, 1529574231, 1529574242, 1529574244,
1529574245, 1529574248, 1529574260, 1529574262), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), stage_duration = c(3.78, 3.47, 2.78,
3.45, 3.32, 4.93, 4.23, 4.22, 3.85, 3.37, 5.88, 5.92, 3.97, 3.7,
NA, 4.08, 3.05, 0.57, 11.18, 12.08, 2.6, 3.3, 0.23, 0.85, 0.27,
0.25, 0.82, 10.42, 0.15, 0.43, 1.4, 0.25, 0.7, 0.52, 1.12, 0.45,
12.87, 12.18, 2.92, 0.57, 14.07, 12.72, 17.12, 4.13, 3.13, 0.25,
0.33, 18.98, 1.05), veh_id = c("B35", "B05", "B04", "B08", "B14",
"B13", "B04", "B17", "B41", "B05", "B26", "B08", "B35", "B19a",
"B10a", "B01a", "B28", "B14", "B14", "B18", "B05", "B37", "B04",
"B41", "B04", "B19a", "B04", "B17", "B35", "B13", "B35", "B02b",
"B28", "B13", "B19a", "B41", "B02b", "B04", "B15", "B01a", "B41",
"B13", "B28", "B27", "B33", "B19a", "B01a", "B19a", "B35")),
.Names = c("Event_stage", "event_date", "stage_duration", "veh_id"),
row.names = c(NA, -49L), class = c("tbl_df", "tbl", "data.frame"))
require(ggplot2)
require(dplyr)
df = structure(list(Event_stage = c("SE", "MN", "MN", "TE", "TE", "TE", "TE", "TE", "TE", "TE", "TE", "WL", "TE", "TE", "SE", "TE", "TE", "WL", "WT", "MN", "WL", "TE", "WL", "WL", "WT", "WL", "LD", "WT", "WL", "WT", "WT", "TE", "WL", "LD", "WT", "LD", "MN", "TL", "TE", "WL", "TL", "TL", "WT", "TE", "TE", "LD", "WT", "TL", "LD" ), event_date = structure(c(1529573704, 1529573710, 1529573713, 1529573724, 1529573855, 1529573874, 1529573880, 1529573895, 1529573906, 1529573918, 1529573925, 1529573931, 1529573931, 1529573941, 1529573947, 1529573969, 1529574006, 1529574054, 1529574088, 1529574114, 1529574120, 1529574123, 1529574134, 1529574137, 1529574148, 1529574163, 1529574164, 1529574148, 1529574169, 1529574170, 1529574178, 1529574188, 1529574189, 1529574196, 1529574178, 1529574188, 1529574203, 1529574213, 1529574214, 1529574214, 1529574215, 1529574227, 1529574231, 1529574242, 1529574244, 1529574245, 1529574248, 1529574260, 1529574262), class = c("POSIXct", "POSIXt"), tzone = "UTC"), stage_duration = c(3.78, 3.47, 2.78, 3.45, 3.32, 4.93, 4.23, 4.22, 3.85, 3.37, 5.88, 5.92, 3.97, 3.7, NA, 4.08, 3.05, 0.57, 11.18, 12.08, 2.6, 3.3, 0.23, 0.85, 0.27, 0.25, 0.82, 10.42, 0.15, 0.43, 1.4, 0.25, 0.7, 0.52, 1.12, 0.45, 12.87, 12.18, 2.92, 0.57, 14.07, 12.72, 17.12, 4.13, 3.13, 0.25, 0.33, 18.98, 1.05), veh_id = c("B35", "B05", "B04", "B08", "B14", "B13", "B04", "B17", "B41", "B05", "B26", "B08", "B35", "B19a", "B10a", "B01a", "B28", "B14", "B14", "B18", "B05", "B37", "B04", "B41", "B04", "B19a", "B04", "B17", "B35", "B13", "B35", "B02b", "B28", "B13", "B19a", "B41", "B02b", "B04", "B15", "B01a", "B41", "B13", "B28", "B27", "B33", "B19a", "B01a", "B19a", "B35")), .Names = c("Event_stage", "event_date", "stage_duration", "veh_id"), row.names = c(NA, -49L), class = c("tbl_df", "tbl", "data.frame"))
# create ggplot
ggplot(data = df, aes(x = event_date,
y = stage_duration)) +
geom_point(aes(color = Event_stage), size= 3) +
geom_line(alpha = 1/2)+
facet_wrap(~veh_id, nrow = 4) +
labs(x = "Event date", y = "Stage duration")
I have data on earnings which looks like the following;
# A tibble: 6 x 24
m_ticker ticker comp_name comp_name_2 exchange currency_code per_end_date_fr0
<chr> <chr> <chr> <chr> <chr> <chr> <date>
1 AAPL AAPL APPLE INC Apple Inc. NSDQ USD 2017-09-30
2 AXP AXP AMER EXPRES~ American Express~ NYSE USD 2017-12-31
3 BA BA BOEING CO The Boeing Compa~ NYSE USD 2017-12-31
4 CTR CAT CATERPILLAR~ Caterpillar Inc. NYSE USD 2017-12-31
5 CSCO CSCO CISCO SYSTE~ Cisco Systems, I~ NSDQ USD 2017-07-31
6 SD CVX CHEVRON CORP Chevron Corporat~ NYSE USD 2017-12-31
# ... with 17 more variables: per_end_date_qr1 <date>, eps_mean_est_qr1 <dbl>,
# street_mean_est_qr1 <dbl>, exp_rpt_date_qr1 <date>, exp_rpt_date_qr2 <date>,
# exp_rpt_date_fr1 <date>, exp_rpt_date_fr2 <date>, late_last_flag <dbl>,
# late_last_desc <chr>, source_flag <dbl>, source_desc <chr>, time_of_day_code <dbl>,
# time_of_day_desc <chr>, per_end_date_qr0 <date>, eps_act_qr0 <dbl>,
# per_end_date_qrm3 <date>, eps_act_qrm3 <dbl>
I also have a vector of ticker symbols called tickers.
tickers <- c("PYPL", "GOOG", "AAPL", "MSFT", "CSCO")
I am trying to create an ifelse statement which will print a small table of the following columns from the earnings data:
ticker | comp_name | exp_rpt_date_qr1 | exp_rpt_date_qr2 | time_of_day_desc
So, if ticker matches earnings$ticker then print the above columns.
I have tried using grepl to print a basic yes / no which reports a warning message.
ifelse(grepl(tickers, earnings$ticker), "yes", "no")
Data:
earnings <- structure(list(m_ticker = c("AAPL", "AXP", "BA", "CTR", "CSCO",
"SD", "DIS", "GE", "GS&", "HOMD", "IBM", "ITL", "JNJ", "CHL",
"KO", "MCD", "MMM", "MRK", "MSFT", "NIKE", "PFE", "PG", "SPM",
"UNIH", "UA", "VISA", "BEL", "WMS", "J"), ticker = c("AAPL",
"AXP", "BA", "CAT", "CSCO", "CVX", "DIS", "GE", "GS", "HD", "IBM",
"INTC", "JNJ", "JPM", "KO", "MCD", "MMM", "MRK", "MSFT", "NKE",
"PFE", "PG", "TRV", "UNH", "UTX", "V", "VZ", "WMT", "XOM"), comp_name = c("APPLE INC",
"AMER EXPRESS CO", "BOEING CO", "CATERPILLAR INC", "CISCO SYSTEMS",
"CHEVRON CORP", "DISNEY WALT", "GENL ELECTRIC", "GOLDMAN SACHS",
"HOME DEPOT", "INTL BUS MACH", "INTEL CORP", "JOHNSON & JOHNS",
"JPMORGAN CHASE", "COCA COLA CO", "MCDONALDS CORP", "3M CO",
"MERCK & CO INC", "MICROSOFT CORP", "NIKE INC-B", "PFIZER INC",
"PROCTER & GAMBL", "TRAVELERS COS", "UNITEDHEALTH GP", "UTD TECHS CORP",
"VISA INC-A", "VERIZON COMM", "WALMART INC", "EXXON MOBIL CRP"
), comp_name_2 = c("Apple Inc.", "American Express Company",
"The Boeing Company", "Caterpillar Inc.", "Cisco Systems, Inc.",
"Chevron Corporation", "The Walt Disney Company", "General Electric Company",
"The Goldman Sachs Group, Inc.", "The Home Depot, Inc.", "International Business Machines Corporation",
"Intel Corporation", "Johnson & Johnson", "JPMorgan Chase & Co.",
"Coca-Cola Company (The)", "McDonald's Corporation", "3M Company",
"Merck & Co., Inc.", "Microsoft Corporation", "NIKE, Inc.", "Pfizer Inc.",
"Procter & Gamble Company (The)", "The Travelers Companies, Inc.",
"UnitedHealth Group Incorporated", "United Technologies Corporation",
"Visa Inc.", "Verizon Communications Inc.", "Walmart Inc.", "Exxon Mobil Corporation"
), exchange = c("NSDQ", "NYSE", "NYSE", "NYSE", "NSDQ", "NYSE",
"NYSE", "NYSE", "NYSE", "NYSE", "NYSE", "NSDQ", "NYSE", "NYSE",
"NYSE", "NYSE", "NYSE", "NYSE", "NSDQ", "NYSE", "NYSE", "NYSE",
"NYSE", "NYSE", "NYSE", "NYSE", "NYSE", "NYSE", "NYSE"), currency_code = c("USD",
"USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD",
"USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD",
"USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD",
"USD"), per_end_date_fr0 = structure(c(17439, 17531, 17531, 17531,
17378, 17531, 17439, 17531, 17531, 17562, 17531, 17531, 17531,
17531, 17531, 17531, 17531, 17531, 17347, 17682, 17531, 17347,
17531, 17531, 17531, 17439, 17531, 17562, 17531), class = "Date"),
per_end_date_qr1 = structure(c(17712, 17712, 17712, 17712,
17743, 17712, 17712, 17712, 17712, 17743, 17712, 17712, 17712,
17804, 17712, 17712, 17712, 17712, 17712, 17774, 17712, 17712,
17712, 17712, 17712, 17712, 17712, 17743, 17712), class = "Date"),
eps_mean_est_qr1 = c(2.19, 1.83, 3.43, 2.66, 0.63, 2.1, 2.04,
0.18, 4.67, 2.85, 3.03, 0.99, 2.06, 2.27, 0.6, 1.93, 2.59,
1.03, 1.07, 0.61, 0.75, 0.91, 2.44, 3.03, 1.86, 1.09, 1.15,
1.21, 1.24), street_mean_est_qr1 = c(2.187, 1.83, 3.434,
2.658, 0.689, 2.098, 2.043, 0.178, 4.674, 2.847, 3.031, 0.99,
2.055, 2.27, 0.601, 1.929, 2.594, 1.03, 1.074, 0.609, 0.748,
0.906, 2.436, 3.031, 1.858, 1.089, 1.145, 1.212, 1.244),
exp_rpt_date_qr1 = structure(c(17743, 17730, 17737, 17742,
17758, 17739, 17750, 17732, 17729, 17764, 17730, 17738, 17729,
17815, 17737, 17738, 17736, 17739, 17731, 17799, 17743, 17743,
17731, 17729, 17736, 17737, 17736, 17759, 17739), class = "Date"),
exp_rpt_date_qr2 = structure(c(17836, 17821, 17828, 17827,
17856, 17830, 17843, 17823, 17820, 17848, 17820, 17829, 17820,
17907, 17828, 17827, 17827, 17830, 17829, 17885, 17834, 17823,
17822, 17820, 17827, 17828, 17822, 17850, 17830), class = "Date"),
exp_rpt_date_fr1 = structure(c(17836, 17913, 17926, 17920,
17758, 17928, 17843, 17919, 17912, 17946, 17913, 17920, 17918,
17907, 17942, 17925, 17920, 17928, 17731, 18074, 17925, 17743,
17918, 17911, 17919, 17828, 17918, 17946, 17928), class = "Date"),
exp_rpt_date_fr2 = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), class = "Date"), late_last_flag = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), late_last_desc = c("Not late",
"Not late", "Not late", "Not late", "Not late", "Not late",
"Not late", "Not late", "Not late", "Not late", "Not late",
"Not late", "Not late", "Not late", "Not late", "Not late",
"Not late", "Not late", "Not late", "Not late", "Not late",
"Not late", "Not late", "Not late", "Not late", "Not late",
"Not late", "Not late", "Not late"), source_flag = c(1, 1,
1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1,
1, 1, 1, 1, 1, 1, 2, 1), source_desc = c("Company", "Company",
"Company", "Company", "Estimated", "Company", "Company",
"Company", "Company", "Estimated", "Company", "Company",
"Company", "Estimated", "Company", "Company", "Company",
"Company", "Company", "Estimated", "Company", "Company",
"Company", "Company", "Company", "Company", "Company", "Estimated",
"Company"), time_of_day_code = c(1, 1, 2, 2, 4, 2, 1, 2,
2, 4, 1, 1, 2, 4, 2, 2, 2, 2, 1, 4, 2, 2, 2, 2, 2, 1, 2,
4, 2), time_of_day_desc = c("After market close", "After market close",
"Before the open", "Before the open", "Unknown", "Before the open",
"After market close", "Before the open", "Before the open",
"Unknown", "After market close", "After market close", "Before the open",
"Unknown", "Before the open", "Before the open", "Before the open",
"Before the open", "After market close", "Unknown", "Before the open",
"Before the open", "Before the open", "Before the open",
"Before the open", "After market close", "Before the open",
"Unknown", "Before the open"), per_end_date_qr0 = structure(c(17621,
17621, 17621, 17621, 17651, 17621, 17621, 17621, 17621, 17651,
17621, 17621, 17621, 17712, 17621, 17621, 17621, 17621, 17621,
17682, 17621, 17621, 17621, 17621, 17621, 17621, 17621, 17651,
17621), class = "Date"), eps_act_qr0 = c(2.73, 1.86, 3.64,
2.82, 0.6, 1.9, 1.84, 0.16, 6.95, 2.08, 2.45, 0.87, 2.06,
2.29, 0.47, 1.79, 2.5, 1.05, 0.95, 0.69, 0.77, 1, 2.46, 3.04,
1.77, 1.11, 1.17, 1.14, 1.09), per_end_date_qrm3 = structure(c(17347,
17347, 17347, 17347, 17378, 17347, 17347, 17347, 17347, 17378,
17347, 17347, 17347, 17439, 17347, 17347, 17347, 17347, 17347,
17409, 17347, 17347, 17347, 17347, 17347, 17347, 17347, 17378,
17347), class = "Date"), eps_act_qrm3 = c(1.67, 1.47, 2.55,
1.49, 0.55, 0.91, 1.58, 0.28, 3.95, 2.25, 2.97, 0.72, 1.83,
1.76, 0.59, 1.73, 2.58, 1.01, 0.98, 0.57, 0.67, 0.85, 1.92,
2.46, 1.85, 0.86, 0.96, 1.08, 0.78)), .Names = c("m_ticker",
"ticker", "comp_name", "comp_name_2", "exchange", "currency_code",
"per_end_date_fr0", "per_end_date_qr1", "eps_mean_est_qr1", "street_mean_est_qr1",
"exp_rpt_date_qr1", "exp_rpt_date_qr2", "exp_rpt_date_fr1", "exp_rpt_date_fr2",
"late_last_flag", "late_last_desc", "source_flag", "source_desc",
"time_of_day_code", "time_of_day_desc", "per_end_date_qr0", "eps_act_qr0",
"per_end_date_qrm3", "eps_act_qrm3"), row.names = c(NA, -29L), class = c("tbl_df",
"tbl", "data.frame"))
Instead of grepl you can use %in%.
Furthermore, if all you're doing is choosing specific rows and columns to print, you could use subset.
> keepcols = c('ticker','comp_name','exp_rpt_date_qr1','exp_rpt_date_qr2','time_of_day_desc')
> subset(earnings, ticker %in% tickers, select = keepcols)
ticker comp_name exp_rpt_date_qr1 exp_rpt_date_qr2 time_of_day_desc
1 AAPL APPLE INC 2018-07-31 2018-11-01 After market close
5 CSCO CISCO SYSTEMS 2018-08-15 2018-11-21 Unknown
19 MSFT MICROSOFT CORP 2018-07-19 2018-10-25 After market close