Is there any way to run a vector in checkluhn package? - r

I try to run the following code:
library(checkLuhn)
library(gsheet)
data<-gsheet2tbl("https://docs.google.com/spreadsheets/d/145Wowgp6NXmcj-IqqKfZ-2B2aKwAOzZ1VuCjxAq7WeM/edit?usp=sharing")
df <- data.frame(Card = character(),
Active = character(),
issuer =character())
for (i in 1: nrow(data))
{
card <- data[i,2]
active <- checkLuhn(card)
issuer <- issuer(card)
df = rbind(df, data.frame(Card = card, Active = active, Issuer =issuer, stringsAsFactors = FALSE))
}
But it is showing the following error:
Error in data.frame(Card = card, Active = active, Issuer = issuer, stringsAsFactors = FALSE) :
arguments imply differing number of rows: 1, 0
Can anyone help me please?

The issue seems to be with issuer which returns logical FALSE if there is none or else return a two column tibble. We could check the output type and make changes
library(purrr)
library(checkLuhn)
library(gsheet)
map_dfr(data$Number, ~ {
active <- checkLuhn(.x)
issuer <- issuer(.x)['issuer']
if(is.logical(issuer)) issuer <- tibble(issuer= NA_character_)
tibble(card = .x, active, issuer)
})
-output
# A tibble: 18 × 3
card active issuer
<chr> <lgl> <chr>
1 378 2822 4631 0005 TRUE American Express
2 371 4496 3539 8431 TRUE American Express
3 378 7344 9367 1000 TRUE American Express
4 5610 5910 8101 8250 TRUE Bankcard
5 5610 5910 8101 8250 TRUE Maestro
6 30 5693 0902 5904 TRUE Diners Club Carte Blanche
7 6011 1111 1111 1110 FALSE <NA>
8 6011 0009 9013 9420 FALSE <NA>
9 3530 1113 3330 0000 TRUE JCB
10 3566 0020 2036 0500 FALSE <NA>
11 5555 5555 5555 4440 FALSE <NA>
12 5105 1051 0510 5100 TRUE MasterCard
13 4111 1111 1111 1110 FALSE <NA>
14 4012 8888 8888 1880 FALSE <NA>
15 4 2222 2222 2222 TRUE Visa
16 760 0924 4561 FALSE <NA>
17 5019 7170 1010 3740 FALSE <NA>
18 6331 1019 9999 0010 FALSE <NA>
Or using the OP's code
df <- data.frame(Card = character(),
Active = character(),
Issuer =character())
for (i in 1:nrow(data))
{
card <- data[[2]][i]
active <- checkLuhn(card)
issuer <- issuer(card);
if(NROW(issuer) == 0 |is.logical(issuer)) {
issuer <- NA_character_
}else {
issuer <- issuer$issuer }
df <- rbind(df, data.frame(Card = card,
Active = active, Issuer =issuer, stringsAsFactors = FALSE))
}
-output
df
Card Active Issuer
1 378 2822 4631 0005 TRUE American Express
2 371 4496 3539 8431 TRUE American Express
3 378 7344 9367 1000 TRUE American Express
4 5610 5910 8101 8250 TRUE Bankcard
5 5610 5910 8101 8250 TRUE Maestro
6 30 5693 0902 5904 TRUE Diners Club Carte Blanche
7 38 5200 0002 3237 TRUE <NA>
8 6011 1111 1111 1110 FALSE <NA>
9 6011 0009 9013 9420 FALSE <NA>
10 3530 1113 3330 0000 TRUE JCB
11 3566 0020 2036 0500 FALSE <NA>
12 5555 5555 5555 4440 FALSE <NA>
13 5105 1051 0510 5100 TRUE MasterCard
14 4111 1111 1111 1110 FALSE <NA>
15 4012 8888 8888 1880 FALSE <NA>
16 4 2222 2222 2222 TRUE Visa
17 760 0924 4561 FALSE <NA>
18 5019 7170 1010 3740 FALSE <NA>
19 6331 1019 9999 0010 FALSE <NA>

Related

Retaining row pairs and excluding singlets and triplets

Say that I have a data frame like this....
df <- data.frame(ID = c("2280", "2280","2280","2280","3115","2281", "2281","2281","2281", "3282","3282","3282","3282", "3283","3283","3283","3283","1821","1822", "4007", "1145", "1145", "1146", "1147"), sib_ID = c("2282", "2282", "2282", "2282", "3117", "2282", "2282", "2282", "2282", "3284", "3284", "3284","3284", "3284", "3284", "3284", "3284", "1823", "1823","4009", "1148", "1148","1148", "1148"), Age = c("3", "12", "6", "9", "3","9", "6", "12","3","9", "6", "12","3","9", "6", "12", "6", "12","12", "6", "12","12", "6", "6"), Behavior = c("good", "bad", "good", "bad", "good", "good", "good", "bad", "good", "good", "good", "bad","good", "good", "good", "bad", "good", "bad", "good","good", "bad", "good","good", "good"))
> df
ID sib_ID Age Behavior
1 2280 2282 3 good
2 2280 2282 12 bad
3 2280 2282 6 good
4 2280 2282 9 bad
5 3115 3117 3 good
6 2281 2282 9 good
7 2281 2282 6 good
8 2281 2282 12 bad
9 2281 2282 3 good
10 3282 3284 9 good
11 3282 3284 6 good
12 3282 3284 12 bad
13 3282 3284 3 good
14 3283 3284 9 good
15 3283 3284 6 good
16 3283 3284 12 bad
17 3283 3284 6 good
18 1821 1823 12 bad
19 1822 1823 12 good
20 4007 4009 6 good
21 1145 1148 12 bad
22 1145 1148 12 good
23 1146 1148 6 good
24 1147 1148 6 good
and I want my data frame to only consist of IDs that have a pair. So we would keep data from (2280, 2281), (3282, 3283), (1821, 1822) and remove cases where the ID does not have a pair (3115 and 4007) and cases where we have a triplet (1145, 1146, 1147). What would be the most efficient way to go about doing this?
Example of desired output:
> df
ID sib_ID Age Behavior
1 2280 2282 3 good
2 2280 2282 12 bad
3 2280 2282 6 good
4 2280 2282 9 bad
6 2281 2282 9 good
7 2281 2282 6 good
8 2281 2282 12 bad
9 2281 2282 3 good
10 3282 3284 9 good
11 3282 3284 6 good
12 3282 3284 12 bad
13 3282 3284 3 good
14 3283 3284 9 good
15 3283 3284 6 good
16 3283 3284 12 bad
17 3283 3284 6 good
18 1821 1823 12 bad
19 1822 1823 12 good
First, I think we need to clarify and solidify the grouping of IDs. I suggest we create a frame up-front to clearly identify these groups, then join back into the original data.
GRPs <- data.frame(ID = as.integer(sort(unique(df$ID)))) %>%
mutate(GRP = c(0, cumsum(diff(ID) > 1)), ID = as.character(ID))
GRPs
# ID GRP
# 1 1145 0
# 2 1146 0
# 3 1147 0
# 4 1821 1
# 5 1822 1
# 6 2280 2
# 7 2281 2
# 8 3115 3
# 9 3282 4
# 10 3283 4
# 11 4007 5
From here, we join them back in and then do the grouped determination of "complete" or not.
left_join(df, GRPs, by = "ID") %>%
group_by(ID) %>%
mutate(keep1 = all(c("good", "bad") %in% Behavior)) %>%
group_by(GRP) %>%
mutate(keep2 = all(c("good", "bad") %in% Behavior)) %>%
ungroup() %>%
dplyr::filter(keep1 | keep2)
# # A tibble: 22 × 6
# ID Age Behavior GRP keep1 keep2
# <chr> <chr> <chr> <dbl> <lgl> <lgl>
# 1 2280 3 good 2 TRUE TRUE
# 2 2280 12 bad 2 TRUE TRUE
# 3 2280 6 good 2 TRUE TRUE
# 4 2280 9 bad 2 TRUE TRUE
# 5 2281 9 good 2 TRUE TRUE
# 6 2281 6 good 2 TRUE TRUE
# 7 2281 12 bad 2 TRUE TRUE
# 8 2281 3 good 2 TRUE TRUE
# 9 3282 9 good 4 TRUE TRUE
# 10 3282 6 good 4 TRUE TRUE
# # … with 12 more rows
# # ℹ Use `print(n = ...)` to see more rows
Though it's returning more rows ...

How to mark episodes in which patient will be readmitted in 30-days?

I have a dataset with patients episodes.
Every patient has its own patientPersonalNumber.
Inpatient episode has admission and discharge date.
I need to mark in new variable (with TRUE, or with 1) all episodes that patient in that episode will be readmitted within 30 days.
install.packages("lubridate")
library(lubridate)
admission <- c("06/23/2013", "06/30/2013", "07/12/2013","06/24/2013","06/28/2013","06/29/2013","06/23/2013","06/24/2013","06/24/2013","07/02/2013","07/09/2013","06/24/2013","09/08/2013","07/22/2014")
discharge<- c("06/25/2013", "07/03/2014", "07/17/2014","06/30/2013","06/30/2013","07/02/2013","06/29/2013","06/29/2013","06/27/2013","07/05/2013","07/12/2013","06/28/2013","10/12/2013","08/01/2014")
admission.date <- mdy(admission)
discharge.date <- mdy(discharge)
patientPersonalNumber<-c("001","002","004","005","006","007","008","009","010", "005","005","011","005", "004")
df<-data.frame(patientPersonalNumber,admission.date,discharge.date)
df
patientPersonalNumber admission.date discharge.date
1 001 2013-06-23 2013-06-25
2 002 2013-06-30 2014-07-03
3 004 2014-07-12 2014-07-17
4 005 2013-06-24 2013-06-30
5 006 2013-06-28 2013-06-30
6 007 2013-06-29 2013-07-02
7 008 2013-06-23 2013-06-29
8 009 2013-06-24 2013-06-29
9 010 2013-06-24 2013-06-27
10 005 2013-07-02 2013-07-05
11 005 2013-07-09 2013-07-12
12 011 2013-06-24 2013-06-28
13 005 2013-09-08 2013-10-12
14 004 2014-07-22 2014-08-01
So I have to mark lines (3,4,10) as true.
#4 Patient 005 discharged 2013-06-30 was admitted 2013-07-02
#10 Patient 005 discharged 2013-07-05 was admitted 2013-07-09
#3 Patient 004 discharged 2013-06-30 was admitted 2013-07-22
I appreciate any help.
#origianl data were edit
would go with something like this:
require(tidyverse)
df %>%
arrange(patientPersonalNumber, admission.date) %>%
group_by(patientPersonalNumber) %>%
mutate(re.admin = (lag(discharge.date) + 30) >= admission.date) %>%
mutate(re.admin = ifelse(is.na(re.admin), FALSE, re.admin ))
# A tibble: 14 x 4
# Groups: patientPersonalNumber [10]
patientPersonalNumber admission.date discharge.date re.admin
<chr> <date> <date> <lgl>
1 001 2013-06-23 2013-06-25 FALSE
2 002 2013-06-30 2014-07-03 FALSE
3 004 2013-07-22 2014-08-01 FALSE
4 004 2014-07-12 2014-07-17 TRUE
5 005 2013-06-24 2013-06-30 FALSE
6 005 2013-07-02 2013-07-05 TRUE
7 005 2013-07-09 2013-07-12 TRUE
8 005 2013-09-08 2013-10-12 FALSE
9 006 2013-06-28 2013-06-30 FALSE
10 007 2013-06-29 2013-07-02 FALSE
11 008 2013-06-23 2013-06-29 FALSE
12 009 2013-06-24 2013-06-29 FALSE
13 010 2013-06-24 2013-06-27 FALSE
14 011 2013-06-24 2013-06-28 FALSE

Web Scraping with R : gz/csv files

Im trying to read the archive on this link:
COVID CSV
I'm using the read.csv, but it doesn't seems to work:
read.table(file = "https://data.brasil.io/dataset/covid19/caso.csv.gz")
Error in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
line 1 did not have 3 elements
I'm trying build a code that pulls up the data from this website with COVID infos, so i don't have to download it everytime that i wan't to use it.
We could use fread
library(data.table)
fread("https://data.brasil.io/dataset/covid19/caso.csv.gz")
# date state city place_type confirmed deaths order_for_place is_last estimated_population_2019
# 1: 2020-07-17 AP state 33436 499 119 TRUE 845731
# 2: 2020-07-16 AP state 33004 493 118 FALSE 845731
# 3: 2020-07-15 AP state 32408 488 117 FALSE 845731
# 4: 2020-07-14 AP state 31885 483 116 FALSE 845731
# 5: 2020-07-13 AP state 31552 478 115 FALSE 845731
# ---
#372166: 2020-06-23 SP Óleo city 1 0 5 FALSE 2496
#372167: 2020-06-22 SP Óleo city 1 0 4 FALSE 2496
#372168: 2020-06-21 SP Óleo city 1 0 3 FALSE 2496
#372169: 2020-06-20 SP Óleo city 1 0 2 FALSE 2496
#372170: 2020-06-19 SP Óleo city 1 0 1 FALSE 2496
# city_ibge_code confirmed_per_100k_inhabitants death_rate
# 1: 16 3953.5030 0.0149
# 2: 16 3902.4229 0.0149
# 3: 16 3831.9513 0.0151
# 4: 16 3770.1113 0.0151
# 5: 16 3730.7371 0.0151
# ---
#372166: 3533809 40.0641 0.0000
#372167: 3533809 40.0641 0.0000
#372168: 3533809 40.0641 0.0000
#372169: 3533809 40.0641 0.0000
#372170: 3533809 40.0641 0.0000
Seems to work with readr::read_csv
readr::read_csv("https://data.brasil.io/dataset/covid19/caso.csv.gz")
# A tibble: 376,064 x 12
# date state city place_type confirmed deaths order_for_place is_last
# <date> <chr> <chr> <chr> <dbl> <dbl> <dbl> <lgl>
# 1 2020-07-18 AC NA state 17202 457 124 TRUE
# 2 2020-07-17 AC NA state 16965 452 123 FALSE
# 3 2020-07-16 AC NA state 16865 447 122 FALSE
# 4 2020-07-15 AC NA state 16672 446 121 FALSE
# 5 2020-07-14 AC NA state 16479 436 120 FALSE
# 6 2020-07-13 AC NA state 16260 430 119 FALSE
# 7 2020-07-12 AC NA state 16190 426 118 FALSE
# 8 2020-07-11 AC NA state 16080 419 117 FALSE
# 9 2020-07-10 AC NA state 15768 417 116 FALSE
#10 2020-07-09 AC NA state 15465 411 115 FALSE
# … with 376,054 more rows, and 4 more variables:
# estimated_population_2019 <dbl>, city_ibge_code <dbl>,
# confirmed_per_100k_inhabitants <dbl>, death_rate <dbl>

How to create a new table given X and Y from a data frame

I'm trying to get a new dataset where it can take two columns and make a new table based on a calculation of a third column.
Cust T S1 S2 S3 S4
1009 150 1007 1006 1001 1000
1010 50 1007 1006 1001 1000
1011 50 1007 1006 1001 1000
1013 10000 1007 1006 1001 1000
1931 60 1008 1007 1006 1005
1141 1000 1014 1013 1007 1006
I need to make a new table where it is:
Cust 1014 1013 1008 1007 1006 1001 1000
1009 NA NA NA T *.1 T *.1 T*.05 T * .025
1010 NA NA NA T *.1 T *.1 T*.05 T * .025
1011 NA NA NA T *.1 T *.1 T*.05 T * .025
1013 NA NA NA T *.1 T *.1 T*.05 T * .025
1931 NA NA T*.1 T *.1 T*.05 T * .025 NA
1141 T*.1 T *.1 NA T*.05 T * .025 NA NA
I just can't seem to figure it out and I'm not even sure if it is possible.
A tidyverse solution:
library(tidyverse)
df %>% gather(select = -c(Cust, T)) %>%
select(-key) %>%
spread(value, T) %>%
map2_dfc(c(1, .025, .05, rep(.1, 6)), ~ .x * .y)
# Cust `1000` `1001` `1005` `1006` `1007` `1008` `1013` `1014`
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1009 3.75 7.5 NA 15 15 NA NA NA
# 2 1010 1.25 2.5 NA 5 5 NA NA NA
# 3 1011 1.25 2.5 NA 5 5 NA NA NA
# 4 1013 250 500 NA 1000 1000 NA NA NA
# 5 1141 NA NA NA 100 100 NA 100 100
# 6 1931 NA NA 6 6 6 6 NA NA
library(dplyr)
library(tidyr)
library(data.table)
df %>% gather(key=k,value = val, -c('Cust','T')) %>%
mutate(val_upd=ifelse(k=='S1'|k=='S2','T*.1',ifelse(k=='S3','T*.05','T*.025'))) %>%
#Change 'T*.1' to T*.1 to get the actual value
select(-T,-k) %>% dcast(Cust~val,value.var='val_upd')
Cust 1000 1001 1005 1006 1007 1008 1013 1014
1 1009 T*.025 T*.05 <NA> T*.1 T*.1 <NA> <NA> <NA>
2 1010 T*.025 T*.05 <NA> T*.1 T*.1 <NA> <NA> <NA>
3 1011 T*.025 T*.05 <NA> T*.1 T*.1 <NA> <NA> <NA>
4 1013 T*.025 T*.05 <NA> T*.1 T*.1 <NA> <NA> <NA>
5 1141 <NA> <NA> <NA> T*.025 T*.05 <NA> T*.1 T*.1
6 1931 <NA> <NA> T*.025 T*.05 T*.1 T*.1 <NA> <NA>
Data
df <- read.table(text = "
Cust T S1 S2 S3 S4
1009 150 1007 1006 1001 1000
1010 50 1007 1006 1001 1000
1011 50 1007 1006 1001 1000
1013 10000 1007 1006 1001 1000
1931 60 1008 1007 1006 1005
1141 1000 1014 1013 1007 1006
", header=TRUE)
This is one way using a combination of reshape2::melt, dplyr::select, tidyr::spread and dplyr::mutate. May not be the best way, but it should do what you want:
# Read the data (if you don't already have it loaded)
df <- read.table(text="Cust T S1 S2 S3 S4
1009 150 1007 1006 1001 1000
1010 50 1007 1006 1001 1000
1011 50 1007 1006 1001 1000
1013 10000 1007 1006 1001 1000", header=T)
# Manipulate your data.frame. Replace df with the name of your data.frame
reshape2::melt(df, c("Cust", "T"), c("S1", "S2", "S3", "S4")) %>%
dplyr::select(-variable) %>%
tidyr::spread(value, T) %>%
dplyr::mutate(`1007`=`1007`*0.1,
`1006`=`1006`*0.1,
`1001`=`1001`*0.05,
`1000`=`1000`*0.025)
# Cust 1000 1001 1006 1007
#1 1009 3.75 7.5 15 15
#2 1010 1.25 2.5 5 5
#3 1011 1.25 2.5 5 5
#4 1013 250.00 500.0 1000 1000
You'll need the backticks as R doesn't handle having numeric colnames very well.
Let me know if I've misunderstood anything/something doesn't make sense

if id is same calculate the matrix, esle write 0 in r [duplicate]

This question already has answers here:
Calculate difference between values in consecutive rows by group
(4 answers)
Closed 5 years ago.
If ID are the same, I want to enter the calculated value with Locate column in the new row.
If ID are not same, I want to represent it as 0.
I tried with for statement, however i had a hard in work.
help me!! please. thank you
this is my data and what I want to do.
>ID Locate
>10 2333
>10 5555
>10 2330
>10 1355
>12 1332
>12 1233
>12 1112
>13 3213
>14 3222
>14 5123
>14 6321
what i want to is
>ID Locate diff
>10 2333 0
>10 5555 3222
>10 2330 -3225
>10 1355 -975
>12 1332 0
>12 1233 -99
>12 1112 -121
>13 3213 0
>14 3222 0
>14 5123 1901
>14 6321 1198
this is my code....
rm <- c()
for ( i in 1:ncol(test))
{for ( j in 1:nrow(test))
{if(test[i,1]==test[i,2])
{c(test[i,j]-test[i,j+1])}
else
0
rm <- c(rm,i)}}
You can do this with diff. Here I am using diff both to compute the differences and to decide which IDs match.
myData$diff = 0
myData$diff[which(diff(myData$ID) == 0) + 1] =
diff(myData$Locate)[which(diff(myData$ID) == 0)]
myData
ID Locate diff
1 10 2333 0
2 10 5555 3222
3 10 2330 -3225
4 10 1355 -975
5 12 1332 0
6 12 1233 -99
7 12 1112 -121
8 13 3213 0
9 14 3222 0
10 14 5123 1901
11 14 6321 1198
Data:
myData = read.table(text="ID Locate
10 2333
10 5555
10 2330
10 1355
12 1332
12 1233
12 1112
13 3213
14 3222
14 5123
14 6321",
header=TRUE)
If your data is an R dataframe, you can use the popular tidyverse group of packages to make jobs like this a bit easier.
df %>%
group_by(ID) %>%
mutate(diff = Locate - lag(Locate))
This code results in
ID Locate diff
10 2333 NA
10 5555 3222
10 2330 -3225
10 1355 -975
12 1332 NA
12 1233 -99
12 1112 -121
13 3213 NA
14 3222 NA
14 5123 1901
14 6321 1198
If you really need 0 instead of NA, then you an just do
df %>%
group_by(ID) %>%
mutate(diff = Locate - lag(Locate)) %>%
mutate(diff = ifelse(is.na(diff), 0, diff))

Resources