I am creating a shiny app for my data but my current code does not display the plot. Also based on column 8 onwards I also want to categorize my data under 2 checkboxes "Stage1" and "Stage2". And based on a dropdown, date range and checkbox show subset/filter the data and show the plot.
Stage1<-(mytest$status_2019|mytest$status_2020|mytest$status_2021|mytest$status_2022==1)
Stage2<-(mytest$status_stage2_2019|mytest$status_stage2_2020|mytest$status_stage2_2021|mytest$status_stage2_2022==1)
Here is mydata:
mydata<-structure(list(Id = c("DB-1", "DB-2", "DB-3", "DB-4", "DB-5",
"DB-6", "DB-7", "DB-9", "DB-11", "DB-12", "DB-13", "DB-14", "DB-15",
"DB-16", "DB-17", "DB-18", "DB-19", "DB-20", "DB-23", "DB-25",
"DB-26", "DB-27", "DB-28", "DB-29", "DB-30", "DB-31", "DB-32",
"DB-34", "DB-35", "DB-36", "DB-37"), examiner = c("Alex", "Alex",
"Alex", "Alex", "Alex", "Alex", "Kim", "Kim", "Kim", "Kim", "Kim",
"Alex", "Alex", "Jhon", "Jhon", "Jhon", "Jhon", "Jhon", "Jhon",
"Maymoon", "Maymoon", "Maymoon", "Maymoon", "Maymoon", "Mike",
"Mike", "Mike", "Mike", "Mike", "Mike", "Mike"), Relationship = c("sibling",
"mother", "self", "father", "self", "self", "self", "self", "self",
"mother", "self", "self", "self", "self", "mother", "father",
"self", "self", "mother", "self", "self", "self", "self", "sibling",
"father", "mother", "mother", "mother", "mother", "self", "father"
), signed_date = c("12/4/18", "11/27/18", "11/30/18", "11/13/18",
"11/27/18", "11/13/18", "11/28/18", "2/26/19", "4/3/19", "1/15/19",
"4/3/19", "11/13/18", "2/25/19", "12/6/18", "1/15/19", "11/30/18",
"12/4/18", "11/20/18", "4/3/19", "2/25/19", "2/14/19", "12/6/18",
"3/14/19", "12/7/18", "1/10/19", "3/12/19", "3/22/19", "12/20/18",
"3/21/19", "4/5/19", "11/15/18"), gender = c("male", "female",
"male", "male", "male", "male", "female", "female", "female",
"female", "male", "female", "female", "female", "female", "male",
"male", "female", "female", "female", "male", "male", "female",
"male", "male", "female", "female", "female", "female", "female",
"male"), stage1_date = c("2/21/19 21:15", "1/10/19 21:45", "1/9/19 19:50",
"1/10/19 21:40", "1/10/19 21:45", "1/9/20 14:50", "1/10/19 21:45",
"3/15/19 16:50", "4/26/19 19:20", "3/21/19 18:21", "4/26/19 19:20",
"1/10/19 21:40", "3/15/19 16:50", "1/10/19 21:45", "3/21/19 18:21",
"1/31/19 20:25", NA, "1/10/19 21:45", "1/9/20 14:50", "7/30/19 15:10",
"3/4/19 16:30", NA, "4/8/19 12:40", "2/6/19 20:36", "1/31/19 20:25",
"5/1/19 18:05", "4/8/19 12:41", "1/17/19 19:26", "5/1/19 18:05",
NA, "1/10/19 21:45"), stage2_date = c(NA, NA, NA, NA, "5/11/21 17:50",
NA, "5/21/21 17:46", NA, "5/11/21 17:37", NA, "5/21/21 17:47",
"5/15/21 16:07", "5/16/21 16:07", NA, NA, NA, "5/11/21 17:52",
NA, NA, "5/14/21 16:07", "5/11/21 17:37", "5/11/21 17:52", NA,
NA, NA, NA, NA, NA, NA, "5/11/21 17:42", NA), status_2019 = c(1,
1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 0, 1), status_2020 = c(0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), status_2021 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
status_2022 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), status_stage2_2020 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), status_stage2_2021 = c(0,
0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0), status_stage2_2022 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -31L
), spec = structure(list(cols = list(Id = structure(list(), class = c("collector_character",
"collector")), Onboarded = structure(list(), class = c("collector_character",
"collector")), Relationship = structure(list(), class = c("collector_character",
"collector")), signed_date = structure(list(), class = c("collector_character",
"collector")), gender = structure(list(), class = c("collector_character",
"collector")), stage1_date = structure(list(), class = c("collector_character",
"collector")), stage2_date = structure(list(), class = c("collector_character",
"collector")), status_2019 = structure(list(), class = c("collector_double",
"collector")), status_2020 = structure(list(), class = c("collector_double",
"collector")), status_2021 = structure(list(), class = c("collector_double",
"collector")), status_2022 = structure(list(), class = c("collector_double",
"collector")), status_stage2_2020 = structure(list(), class = c("collector_double",
"collector")), status_stage2_2021 = structure(list(), class = c("collector_double",
"collector")), status_stage2_2022 = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x7f7f0a7dc7c0>, class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
Here is my code:
library(shiny)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(lubridate)
mydata <- read_csv("test.csv")
mydata$signed_date <- as.Date(mydata$signed_date, format = "%Y-%m-%d", optional=FALSE)
server <- function(input, output, session) {
#Summarize Data and then Plot
data <- reactive({
req(input$examiner)
mydata %>%
dplyr::filter(examiner %in% input$examiner ,
signed_date >= input$daterange[1] &
signed_date <= input$daterange[2]) %>%
group_by(relation) %>% summarize(Total = n())
})
output$selected_var <- renderText({
paste("You have chosen ", input$examiner, "between", input$Dates[1], "and", input$Dates[2])
})
#Plot
output$plot <- renderPlot({
g <- ggplot(data(), aes( y = Total, x = relation))
g + geom_bar(stat = "sum")
})
}
ui <- basicPage(
titlePanel("My Dashboard"),
helpText("Shows my data"),
selectInput(inputId = "examiner",
label = h3("Choose examiner"),
choices = c("None", as.character(mydata$examiner), selected = "None")),
dateRangeInput("Dates", h3("Select the Dates"), format="yyyy-mm-dd", start = "2018-04-01"),
mainPanel(
textOutput("selected_var"),
plotOutput("plot")
)
)
shinyApp(ui = ui, server = server)
I am now running into the following error
Problem with filter() input ..1.
[34mℹ[39m Input ..1 is examiner %in% input$examiner.
[31mx[39m object 'examiner' not found
You have multiple problems:
Typos
Your date data is not in a standard date format
Your posted data had an error in it.
I've begun to clean up some of these typos, but I'll leave you to figure out the date issue. The below has a browser() statement in one of the reactives. Run the app and then work interactively at the R console to see the date problem (by running something like class(mydata$stage1_date). Once you clean up the date problem post better data.
Here's something that is a little closer.
mydata <-
structure(
list(
Id = c(
"DB-1",
"DB-2",
"DB-3",
"DB-4",
"DB-5",
"DB-6",
"DB-7",
"DB-9",
"DB-11",
"DB-12",
"DB-13",
"DB-14",
"DB-15",
"DB-16",
"DB-17",
"DB-18",
"DB-19",
"DB-20",
"DB-23",
"DB-25",
"DB-26",
"DB-27",
"DB-28",
"DB-29",
"DB-30",
"DB-31",
"DB-32",
"DB-34",
"DB-35",
"DB-36",
"DB-37"
),
examiner = c(
"Alex",
"Alex",
"Alex",
"Alex",
"Alex",
"Alex",
"Kim",
"Kim",
"Kim",
"Kim",
"Kim",
"Alex",
"Alex",
"Jhon",
"Jhon",
"Jhon",
"Jhon",
"Jhon",
"Jhon",
"Maymoon",
"Maymoon",
"Maymoon",
"Maymoon",
"Maymoon",
"Mike",
"Mike",
"Mike",
"Mike",
"Mike",
"Mike",
"Mike"
),
Relationship = c(
"sibling",
"mother",
"self",
"father",
"self",
"self",
"self",
"self",
"self",
"mother",
"self",
"self",
"self",
"self",
"mother",
"father",
"self",
"self",
"mother",
"self",
"self",
"self",
"self",
"sibling",
"father",
"mother",
"mother",
"mother",
"mother",
"self",
"father"
),
application_date = c(
"12/4/18",
"11/27/18",
"11/30/18",
"11/13/18",
"11/27/18",
"11/13/18",
"11/28/18",
"2/26/19",
"4/3/19",
"1/15/19",
"4/3/19",
"11/13/18",
"2/25/19",
"12/6/18",
"1/15/19",
"11/30/18",
"12/4/18",
"11/20/18",
"4/3/19",
"2/25/19",
"2/14/19",
"12/6/18",
"3/14/19",
"12/7/18",
"1/10/19",
"3/12/19",
"3/22/19",
"12/20/18",
"3/21/19",
"4/5/19",
"11/15/18"
),
gender = c(
"male",
"female",
"male",
"male",
"male",
"male",
"female",
"female",
"female",
"female",
"male",
"female",
"female",
"female",
"female",
"male",
"male",
"female",
"female",
"female",
"male",
"male",
"female",
"male",
"male",
"female",
"female",
"female",
"female",
"female",
"male"
),
stage1_date = c(
"2/21/19 21:15",
"1/10/19 21:45",
"1/9/19 19:50",
"1/10/19 21:40",
"1/10/19 21:45",
"1/9/20 14:50",
"1/10/19 21:45",
"3/15/19 16:50",
"4/26/19 19:20",
"3/21/19 18:21",
"4/26/19 19:20",
"1/10/19 21:40",
"3/15/19 16:50",
"1/10/19 21:45",
"3/21/19 18:21",
"1/31/19 20:25",
NA,
"1/10/19 21:45",
"1/9/20 14:50",
"7/30/19 15:10",
"3/4/19 16:30",
NA,
"4/8/19 12:40",
"2/6/19 20:36",
"1/31/19 20:25",
"5/1/19 18:05",
"4/8/19 12:41",
"1/17/19 19:26",
"5/1/19 18:05",
NA,
"1/10/19 21:45"
),
stage2_date = c(
NA,
NA,
NA,
NA,
"5/11/21 17:50",
NA,
"5/21/21 17:46",
NA,
"5/11/21 17:37",
NA,
"5/21/21 17:47",
"5/15/21 16:07",
"5/16/21 16:07",
NA,
NA,
NA,
"5/11/21 17:52",
NA,
NA,
"5/14/21 16:07",
"5/11/21 17:37",
"5/11/21 17:52",
NA,
NA,
NA,
NA,
NA,
NA,
NA,
"5/11/21 17:42",
NA
),
status_2019 = c(
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
0,
1,
0,
1,
1,
0,
1,
1,
1,
1,
1,
1,
1,
0,
1
),
status_2020 = c(
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
),
status_2021 = c(
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
),
status_2022 = c(
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
),
status_stage2_2020 = c(
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
),
status_stage2_2021 = c(
0,
0,
0,
0,
1,
0,
1,
0,
1,
0,
1,
1,
1,
0,
0,
0,
1,
0,
0,
1,
1,
1,
0,
0,
0,
0,
0,
0,
0,
1,
0
),
status_stage2_2022 = c(
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
)
),
row.names = c(NA, -31L),
spec = structure(list(
cols = list(
Id = structure(list(), class = c("collector_character",
"collector")),
Onboarded = structure(list(), class = c("collector_character",
"collector")),
Relationship = structure(list(), class = c("collector_character",
"collector")),
application_date = structure(list(), class = c("collector_character",
"collector")),
gender = structure(list(), class = c("collector_character",
"collector")),
stage1_date = structure(list(), class = c("collector_character",
"collector")),
stage2_date = structure(list(), class = c("collector_character",
"collector")),
status_2019 = structure(list(), class = c("collector_double",
"collector")),
status_2020 = structure(list(), class = c("collector_double",
"collector")),
status_2021 = structure(list(), class = c("collector_double",
"collector")),
status_2022 = structure(list(), class = c("collector_double",
"collector")),
status_stage2_2020 = structure(list(), class = c("collector_double",
"collector")),
status_stage2_2021 = structure(list(), class = c("collector_double",
"collector")),
status_stage2_2022 = structure(list(), class = c("collector_double",
"collector"))
),
default = structure(list(), class = c("collector_guess",
"collector")),
delim = ","
), class = "col_spec"),
# problems = < pointer:0x7f7f0a7dc7c0 > ,
class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame")
)
library(shiny)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(lubridate)
# mydata <- read_csv("test.csv")
mydata$signed_date <-
as.Date(mydata$signed_date, format = "%Y-%m-%d", optional = FALSE)
server <- function(input, output, session) {
#Summarize Data and then Plot
data <- reactive({
req(input$examiner)
browser()
mydata %>%
dplyr::filter(
examiner %in% input$examiner ,
stage1_date >= input$daterange[1] &
stage1_date <= input$daterange[2]
) %>%
group_by(Relationship) %>% summarize(Total = n())
})
output$selected_var <- renderText({
paste("You have chosen ",
input$examiner,
"between",
input$Dates[1],
"and",
input$Dates[2])
})
#Plot
output$plot <- renderPlot({
g <- ggplot(data(), aes(y = Total, x = relation))
g + geom_bar(stat = "sum")
})
}
ui <- basicPage(
titlePanel("My Dashboard"),
helpText("Shows my data"),
selectInput(
inputId = "examiner",
label = h3("Choose examiner"),
choices = c("None", as.character(mydata$examiner), selected = "None")
),
dateRangeInput(
"daterange",
h3("Select the Dates"),
format = "yyyy-mm-dd",
start = "2018-04-01"
),
mainPanel(textOutput("selected_var"),
plotOutput("plot"))
)
shinyApp(ui = ui, server = server)
You have too many typos in your code as noted by #dca. Try this
mydata$signed_date <- as.Date(mydata$application_date, format = "%m/%d/%y", optional=FALSE)
server <- function(input, output, session) {
#Summarize Data and then Plot
data <- reactive({
#req(input$examiner)
if (is.null(input$examiner) | input$examiner=="None") return(NULL)
mydata %>% dplyr::filter(examiner %in% input$examiner) %>%
dplyr::filter(signed_date >= input$Dates[1] & signed_date <= input$Dates[2]) %>%
group_by(Relationship) %>% dplyr::summarize(Total = n())
})
output$selected_var <- renderText({
paste("You have chosen ", input$examiner, "between", input$Dates[1], "and", input$Dates[2])
})
#Plot
output$plot <- renderPlot({
req(data())
g <- ggplot(data(), aes( y = Total, x = Relationship))
g + geom_bar(stat = "sum")
})
}
ui <- basicPage(
titlePanel("My Dashboard"),
helpText("Shows my data"),
selectInput(inputId = "examiner",
label = h3("Choose examiner"),
choices = c("None", unique(mydata$examiner)), selected = "None"),
dateRangeInput("Dates", h3("Select the Dates"), format="yyyy-mm-dd", start = "2018-04-01"),
mainPanel(
textOutput("selected_var"),
plotOutput("plot")
)
)
shinyApp(ui = ui, server = server)
I would like to find which column contains the highest number of 1. Number 1 should appear only once per row. As soon as column with highest number 1 will be located the script should check also neighboring columns (+1+ / -1) and if any of them contain number 1 it should be also selected. All of these rows should be kept within subset function.
Let's put part of original data:
structure(list( `10` = c(0, 0, 0, 0), `34` = c(0, 0, 0, 0),
`59` = c(0, 0, 0, 0), `84` = c(0, 0, 0, 0),
`110` = c(0, 0, 0, 0), `134` = c(0, 0, 0, 0),
`165` = c(0, 0, 0, 0), `199` = c(0, 0, 0, 0),
`234` = c(0, 0, 0, 0),
`257` = c(0.0160178986200301, 0, 0.0409772658686249, 0.0289710439505515),
`362` = c(0.0679054515644214, 0.126933274414494, 0.0855598028367368, 0.0596214721268868),
`433` = c(0.490914059297718, 0.604765061128296, 0.813348757670254, 1),
`506` = c(1, 1, 1, 0.971410482822965),
`581` = c(0.198244295668807, 0.234158197083517, 0.269655970224324, 0.195318383259472),
`652` = c(0.271177756524115, 0.223018854028576, 0.301352982597324, 0.142584385725234),
`733` = c(0.212426561005602, 0.212778023272942, 0.228513228045468, 0),
`818` = c(0.213816778248395, 0.168570481661511, 0.264465345538678, 0),
`896` = c(0.137102063123377, 0, 0.320234382858867, 0),
`972` = c(0.108932231179123, 0, 0.179106729705261, 0),
`1039` = c(0.101762535865555, 0, 0, 0),
EOD = c("Peter", "Peter", "Peter", "Peter"),
Complex = c(""FT team", "FT team", "FT team", "FT team")),
.Names = c("10", "34", "59", "84", "110", "134", "165", "199",
"234", "257", "362", "433", "506", "581", "652", "733",
"818", "896", "972", "1039", "EOD", "Complex"),
row.names = c("Peter_1_Rep_1_E", "Peter_1_Rep_2_E",
"Peter_1_Rep_3_E", "Peter_1_Rep_4_E"),
class = "data.frame")
As you can clearly see in the original data the column 506 should be selected as the one containing the highest number of 1 and data should be subseted base on it. However, output would be exactly the same because in this data neighboring fraction (-1, 433) contains also 1. That's easy example.
Situation might be more complicated, like in that case:
structure(list( `10` = c(0, 0, 0, 0, 0, 0, 0, 0),
`34` = c(0, 0, 0, 0, 0, 0, 0, 0),
`59` = c(0, 0, 0, 0, 0, 0, 0, 0),
`84` = c(0, 0, 0, 0, 0, 0, 0, 0),
`110` = c(0, 0, 0, 0, 0, 0, 0, 0),
`134` = c(0.168783347110543, 0, 0.382618775924215, 0, 0.530638724516877, 0, 0.169526042048202, 0),
`165` = c(1, 0.36380544964196, 1, 0.13979454361738, 1, 0.239652477288689, 1, 0.240341578327444),
`199` = c(0.355158938904336, 1, 0.646724265971128, 1, 0.582637073151552, 1, 0.20319390520841, 1),
`234` = c(0.0963628165627114, 0.575436312346942, 0.229853828180188, 0.433555069046817, 0.247567185011894, 0.508529485059242, 0.138356164383562, 0.389880251276011),
`257` = c(0, 0.17393595585728, 0, 0.127787133715056, 0, 0.117147323350173, 0, 0),
`362` = c(0, 0, 0, 0.0919333108790839, 0, 0, 0, 0),
`433` = c(0, 0, 0, 0.0745570899292691, 0, 0, 0, 0),
`506` = c(0, 0, 0, 0, 0, 0, 0, 0),
`581` = c(0, 0, 0, 0, 0, 0, 0, 0),
`652` = c(0, 0, 0, 0, 0, 0, 0, 0),
`733` = c(0, 0, 0, 0, 0, 0, 0, 0),
`818` = c(0, 0, 0, 0, 0, 0, 0, 0),
`896` = c(0, 0, 0, 0, 0, 0, 0, 0),
`972` = c(0, 0, 0, 0, 0, 0, 0, 0),
`1039` = c(0, 0, 0, 0, 0, 0, 0, 0),
EOD = c("Paul", "Paul", "Paul", "Paul", "Paul", "Paul", "Paul", "Paul"),
Complex = c("GG Team", "GG Team", "GG Team", "GG Team", "GG Team", "GG Team", "GG Team", "GG Team")),
.Names = c("10", "34", "59", "84", "110", "134", "165", "199", "234", "257", "362", "433", "506", "581", "652", "733", "818", "896", "972", "1039", "EOD", "Complex"),
row.names = c("PaulG_1_Rep_1_E", "Paul_1_Rep_1_E", "PaulN_1_Rep_2_E", "PaulG_1_Rep_2_E", "Paul_1_Rep_3_E", "PaulC_1_Rep_3_E", "PaulC_1_Rep_4_E", "Paul_1_Rep_4_E"),
class = "data.frame")
In that situation there are two columns which contain the same number of 1s. In this case column with bigger colsum should be selected.
let df1 be your input:
df_num <- df1[,sapply(df1,is.numeric)] # keep only numeric columns to build filter
n1 <- colSums(df_num == 1) # number of 1s per column
i <- which(n1 == max(n1)) # index of cols with max 1s
if(length(i) > 1){
max_cs <- which.max(colSums(df_num[,i])) # index of col with max colsum among results
i <- i[max_cs] # our column index
}
filter <- rowSums(df_num[,seq(max(i-1,0),min(i+1,ncol(df_num)))]==1) >0 # filter is true if chosen column is 1 or if any neighbour is 1
df1[filter,] # your result
In both of your examples, all rows are kept
I'd use the tidyverse to convert it to long format then pull in the column sums to determine where the first one (with the largest sum) is:
library(tidyverse)
# add rownames to the data frame
df2$id <- rownames(df2)
# make a data frame of each column's sum
thecolsums <- colSums(df2[,map_lgl(df2, is.numeric)]) %>%
enframe(name = "colname", value = "colsum")
# change the data frame to long format
dflong <- df2 %>%
mutate(rowid = row_number()) %>%
gather(colname, val, -rowid)
# which column has the first 1 value
whichcol <- dflong %>%
group_by(colname) %>%
filter(val ==1) %>%
summarize(
firstone = min(rowid, na.rm = T)
) %>%
left_join(thecolsums, by = 'colname') %>%
filter(colsum == max(colsum)) %>%
pluck('colname')
# what's the numerical index of the column
whichcolindex <- which(names(df2) == whichcol)
# get previous and next columns if they exist
prevcolindex <- ifelse(whichcolindex < 1, F, whichcolindex -1)
nextcolindex <- ifelse(whichcolindex == ncol(df2) , F, whichcolindex +1)
# do the previous and next columns have 1s in them?
prevcolhasone <- any(df2[,prevcolindex] == 1)
nextcolhasone <- any(df2[,nextcolindex] == 1)
# create a vector with 1, 2 or 3 column indexes
finalindex <- c(
prevcolindex[prevcolhasone]
, whichcolindex
, nextcolindex[nextcolhasone]
)
# subset the original data frame, only preserving the columns in question
results <- df2[, finalindex]
genres=c("Action","Adventure","Animation","Biography","Comedy","Crime",
"Documentary","Drama","Family","Game.Show","Horror","Music","Musical",
"Mystery","Romance","Sci.Fi","Short","Thriller","War","Western")
This is my vector of genres.
Another data set has the same column names.
This is the data set column names
"Title" "Genre" "imdbRating" "Release_Year"
"Action" "Adventure" "Animation" "Biography" "Comedy"
"Crime" "Documentary" "Drama" "Family"
"Fantasy" "Game.Show" "Horror" "Music"
"Musical" "Mystery" "N.A" "Romance"
"Sci.Fi" "Short" "Sport" "Thriller"
"War" "Western"
I want to run this command for all genres replacing each genre with the value.
data_predict$genres[grepl("*genres*", data_predict$Genre)]=1
Orignal Data set
data_predict<-structure(list(Genre = structure(c(3L, 1L, 2L), .Label = c("Action, Adventure, Sci-Fi",
"Action, Drama, War", "Sci-Fi"), class = "factor"), Action = c(0,
0, 0), Adventure = c(0, 0, 0), Animation = c(0, 0, 0), Biography = c(0,
0, 0), Comedy = c(0, 0, 0), Crime = c(0, 0, 0), Documentary = c(0,
0, 0), Drama = c(0, 0, 0), Family = c(0, 0, 0), Game.Show = c(0,
0, 0), Horror = c(0, 0, 0), Music = c(0, 0, 0), Musical = c(0,
0, 0), Mystery = c(0, 0, 0), Romance = c(0, 0, 0), Sci.Fi = c(0,
0, 0), Short = c(0, 0, 0), Thriller = c(0, 0, 0), War = c(0,
0, 0), Western = c(0, 0, 0)), .Names = c("Genre", "Action", "Adventure",
"Animation", "Biography", "Comedy", "Crime", "Documentary", "Drama",
"Family", "Game.Show", "Horror", "Music", "Musical", "Mystery",
"Romance", "Sci.Fi", "Short", "Thriller", "War", "Western"), row.names = c(NA,
3L), class = "data.frame")
Expected result
data_predicted<-structure(list(Genre = structure(c(3L, 1L, 2L), .Label = c("Action, Adventure, Sci-Fi",
"Action, Drama, War", "Sci-Fi"), class = "factor"), Action = c(0,
1, 1), Adventure = c(0, 1, 0), Animation = c(0, 0, 0), Biography = c(0,
0, 0), Comedy = c(0, 0, 0), Crime = c(0, 0, 0), Documentary = c(0,
0, 0), Drama = c(0, 0, 1), Family = c(0, 0, 0), Game.Show = c(0,
0, 0), Horror = c(0, 0, 0), Music = c(0, 0, 0), Musical = c(0,
0, 0), Mystery = c(0, 0, 0), Romance = c(0, 0, 0), Sci.Fi = c(0,
0, 0), Short = c(0, 0, 0), Thriller = c(0, 0, 0), War = c(0,
0, 1), Western = c(0, 0, 0)), .Names = c("Genre", "Action", "Adventure",
"Animation", "Biography", "Comedy", "Crime", "Documentary", "Drama",
"Family", "Game.Show", "Horror", "Music", "Musical", "Mystery",
"Romance", "Sci.Fi", "Short", "Thriller", "War", "Western"), row.names = c(NA,
3L), class = "data.frame")
Try
library(qdapTools)
mtabulate(strsplit(as.character(data_predict$Genre), ', '))
Or
data_predict[-1] <- lapply(names(data_predict)[-1],
function(x) as.numeric(grepl(x, data_predict$Genre)))