Unlist Function Is Turning Numeric Values Into Characters. R - r

The code is scraping a website for stock data and returns a 1x18 dataframe for each stock. I am trying to convert the dataframe into a vector without turning the numeric columns into factors which is what is happening. I have also attempted to try and turn the dataframe into a matrix, but the numeric columns are still being converted into factors. In conclusion, I would like to keep characters as characters and numeric as numeric all in a vector. Thank you.
#get.dates is a function I created to scrape
data = get.dates("AAPL")
class(data)
[1] "data.frame"
class(data$surprise)
[1] "numeric"
dput(data)
structure(list(date = "2019-05-07T00:00:00", company = "Apple",
ticker = "AAPL", periodEnding = "Mar 2019", eps = "2.37",
reportedEPS = NA_character_, lastEps = "2.73", consensus = 4L,
bpConsensus = 4L, ratingsAndPT = structure(list(priceTarget = 177.34,
numBuys = 17L, numHolds = 18L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), bpRatingsAndPT = structure(list(priceTarget = 176.88,
numBuys = 14L, numHolds = 14L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), marketCap = 827573630900, sector = 18731L, stockId = 7624L,
stockTypeId = 1L, surprise = NA_real_, timeOfDay = 4L, isConfirmed = FALSE), class = "data.frame", row.names = c(NA,
-1L))
data = unlist(data)
class(data)
[1] "character"
So the final output is to rbind each of the outputs into a single data.frame.
I think I have to convert each 1x18 dataframe into a vector to rbind because I am getting an error when trying to rbind the columns using the foreach package.
tickers = c("AAPL", "PEP", "KO")
system.time({
data = foreach(r = tickers, .packages = c("jsonlite", "dplyr"), .combine = rbind) %dopar% {get.dates(r)}
})
error calling combine function:
<simpleError in `.rowNamesDF<-`(x, value = value): duplicate 'row.names' are not allowed>
user system elapsed
0.02 0.00 0.56
Warning message:
non-unique value when setting 'row.names': ‘1’
print(data)
NULL
#I will do the same thing outside of the foreach loop to give some more context
data = lapply(tickers, get.dates)
do.call(rbind, data)
Error in `.rowNamesDF<-`(x, value = value) :
duplicate 'row.names' are not allowed
In addition: Warning message:
non-unique value when setting 'row.names': ‘1’
dput(data)
list(structure(list(date = "2019-05-07T00:00:00", company = "Apple",
ticker = "AAPL", periodEnding = "Mar 2019", eps = "2.37",
reportedEPS = NA_character_, lastEps = "2.73", consensus = 4L,
bpConsensus = 4L, ratingsAndPT = structure(list(priceTarget = 177.34,
numBuys = 17L, numHolds = 18L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), bpRatingsAndPT = structure(list(priceTarget = 176.88,
numBuys = 14L, numHolds = 14L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), marketCap = 827573630900, sector = 18731L, stockId = 7624L,
stockTypeId = 1L, surprise = NA_real_, timeOfDay = 4L, isConfirmed = FALSE), class = "data.frame", row.names = c(NA,
-1L)), structure(list(date = "2019-04-23T00:00:00", company = "Coca-Cola",
ticker = "KO", periodEnding = "Mar 2019", eps = "0.46", reportedEPS = NA_character_,
lastEps = "0.47", consensus = 4L, bpConsensus = 5L, ratingsAndPT = structure(list(
priceTarget = 50.89, numBuys = 4L, numHolds = 5L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), bpRatingsAndPT = structure(list(priceTarget = 51.25,
numBuys = 3L, numHolds = 1L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), marketCap = 193681840000, sector = 18731L, stockId = 8359L,
stockTypeId = 1L, surprise = NA_real_, timeOfDay = 4L, isConfirmed = FALSE), class = "data.frame", row.names = c(NA,
-1L)), structure(list(date = "2019-04-25T00:00:00", company = "PepsiCo",
ticker = "PEP", periodEnding = "Mar 2019", eps = "0.92",
reportedEPS = NA_character_, lastEps = "0.96", consensus = 4L,
bpConsensus = 4L, ratingsAndPT = structure(list(priceTarget = 123.67,
numBuys = 4L, numHolds = 3L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), bpRatingsAndPT = structure(list(priceTarget = 126,
numBuys = 1L, numHolds = 1L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), marketCap = 163697620000, sector = 18731L, stockId = 10962L,
stockTypeId = 1L, surprise = NA_real_, timeOfDay = 4L, isConfirmed = FALSE), class = "data.frame", row.names = c(NA,
-1L)))
Here is what I would like the output to look like. Thank you!!

You basically have to do your own list flattening here, which is not desirable. It's easier to do this when you get the json data originally. https://rdrr.io/cran/jsonlite/man/flatten.html
The below solution users purrr but you can do it with a for-loop or apply functions if you prefer. There are two main ideas here:
1. Bind together the dataframe-type columns with the part of the dataframe that doesn't have any nested columns. In your example, we bind together 3 separate pieces: 1 original dataframe with df_cols removed, and the other two dataframe columns. You can do this with bind_cols. It helps to prepend the original column names to avoid duplicates.
2. Collapse all the rows together with rbind or the like.
flatten_df_cols <- function(df) {
df_cols <- map_lgl(df, is.data.frame)
imap_dfc(df[, df_cols], ~setNames(.x, paste0(.y, ".", names(.x)))) %>%
bind_cols(list(df[, !df_cols]), .)
}
map_dfr(data, flatten_df_cols)
Observations: 3
Variables: 24
$ date <chr> "2019-05-07T00:00:00", "2019-04...
$ company <chr> "Apple", "Coca-Cola", "PepsiCo"
$ ticker <chr> "AAPL", "KO", "PEP"
$ periodEnding <chr> "Mar 2019", "Mar 2019", "Mar 2019"
$ eps <chr> "2.37", "0.46", "0.92"
$ reportedEPS <chr> NA, NA, NA
$ lastEps <chr> "2.73", "0.47", "0.96"
$ consensus <int> 4, 4, 4
$ bpConsensus <int> 4, 5, 4
$ marketCap <dbl> 827573630900, 193681840000, 163...
$ sector <int> 18731, 18731, 18731
$ stockId <int> 7624, 8359, 10962
$ stockTypeId <int> 1, 1, 1
$ surprise <dbl> NA, NA, NA
$ timeOfDay <int> 4, 4, 4
$ isConfirmed <lgl> FALSE, FALSE, FALSE
$ ratingsAndPT.priceTarget <dbl> 177.34, 50.89, 123.67
$ ratingsAndPT.numBuys <int> 17, 4, 4
$ ratingsAndPT.numHolds <int> 18, 5, 3
$ ratingsAndPT.numSells <int> 0, 0, 0
$ bpRatingsAndPT.priceTarget <dbl> 176.88, 51.25, 126.00
$ bpRatingsAndPT.numBuys <int> 14, 3, 1
$ bpRatingsAndPT.numHolds <int> 14, 1, 1
$ bpRatingsAndPT.numSells <int> 0, 0, 0

Related

How to customize a function for a UI report when function is producing duplicates/triplicates of certain args [r]

I've created this function below that produces data that will go in a report in a UI.
However its not necessarily doing what I would like it to in the name and age arguments. It prints out the name and age in connection to how many orders there are. So if i.e. Customer ID 59 made 2 orders - her name will be printed out "Jane" "Jane" - I would like it to not do that.
If anyone has any idea on how to change this, i'd appreciate your input.
CustomerReport <- function(ID, Start_Date, End_Date) {
CustomerOrders <- OrdersData[OrdersData$Customer_ID == ID & OrdersData$Date >= Start_Date & OrdersData$Date <= End_Date,]
ProductOrders <- ItemsInOrders[ItemsInOrders$Order_ID %in% CustomerOrders$Order_ID,]
CustomerInfo <- CustomersData[CustomersData$Customer_ID == ID,]
Name <- paste(CustomerInfo$First_Name, CustomerInfo$Last_Name)
Age <- CustomerInfo$Customer_Age
NumberofOrders <- nrow(CustomerOrders)
MeanTotals <- mean(ProductOrders$Quantities)
MedianTotals <- median(ProductOrders$Quantities)
PercentageType <- table(CustomerOrders$Type)/NumberofOrders
PercentageBreakdown <- table(ProductOrders$Products)/nrow(ItemsInOrders)
Result <- list(Name = Name, Age = Age, NumberofOrders = NumberofOrders, MeanTotals = MeanTotals,
MedianTotals = MedianTotals, PercentageType = PercentageType, PercentageBreakdown = PercentageBreakdown
)
return(Result)
}
#Test the Customer Report Funcion
CustomerReport(1251, "2019-01-01", "2019-01-25")
the dput for the data frames
dput(droplevels(CustomersData[1:5, ]))
structure(list(First_Name = c("Ariel", "Kinshasa", "May", "Gabrielle",
"Jennifer"), Last_Name = c("Dirrim", "Purifoy", "Sue", "Finley",
"Towns"), Customer_ID = c(1251L, 290L, 1714L, 381L, 109L), Customer_DOB = structure(c(11181,
3956, 10632, 9742, 11145), class = "Date"), Customer_Age = c(20,
39, 21, 24, 20)), row.names = c(NA, 5L), class = "data.frame")
dput(droplevels(OrdersData[1:5, ]))
structure(list(Order_ID = c(69L, 3025L, 3549L, 27L, 4561L), Customer_ID = c(1251L,
290L, 1714L, 381L, 109L), Date = structure(c(17899, 17921, 17925,
17923, 17917), class = "Date"), Type = structure(c(2L, 1L, 2L,
2L, 2L), .Label = c("Delivery", "Pick Up"), class = "factor"),
Coupon = c("OFF10", NA, "LARGE10", "LARGE10", "LARGE10"),
Delivery_Fee = c("0", "12", "0", "0", "0"), Sub_Total_Before_Discount = c(27.98,
40.9, 74.94, 91.85, 80.82), Discount = c(2.8, 0, 7.49, 9.19,
8.08), Sub_Total_After_Discount = c(25.18, 40.9, 67.45, 82.66,
72.74), GST = c(2.52, 4.09, 6.74, 8.27, 7.27), Total = c(27.7,
44.99, 74.19, 90.93, 80.01)), row.names = c(NA, 5L), class = "data.frame")
dput(droplevels(ItemsInOrders[1:5, ]))
structure(list(Order_ID = c(69L, 3025L, 3025L, 3549L, 3549L),
Products = structure(c(2L, 4L, 1L, 3L, 5L), .Label = c("BBQ Chicken Pizza",
"Meatlovers Pizza", "Seafood Pizza", "Supreme Pizza", "Vegetarian Pizza"
), class = "factor"), Prices = c(13.99, 13.95, 14.95, 13.99,
10.99), Quantities = c(2L, 1L, 1L, 3L, 3L)), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame"))
>
Everything else is perfect. Just the Names and the age are coming out in duplicates or triplicates.
Also, while we are here - is it possible to return the Percentage breakdowns as actual % values rather than 0.1 etc?
I guess you are looking for unique. However, the behaviour you are describing isn't reproducible with the data you provided.
Try to replace the two lines getting the name and age information in your CustomerReport function:
Name <- paste(unique(CustomerInfo$First_Name), unique(CustomerInfo$Last_Name))
Age <- unique(CustomerInfo$Customer_Age)

How to select variables from dataframe inside other recursive dataframe in r

I have the following dataframe:
str(data_raw)
'data.frame': 20 obs. of 18 variables:
$ id : chr "2306173214621953247_176548637" "2304792897512042631_176548637" "2298783867773662543_176548637" "2249480367030200759_176548637" ...
$ user :'data.frame': 20 obs. of 4 variables:
..$ id : chr "176548637" "176548637" "176548637" "176548637" ...
..$ full_name : chr "Carlos Costa" "Carlos Costa" "Carlos Costa" "Carlos Costa" ...
But when I try to get the user$id variable, it shows the error:
data_raw<- data_raw %>%
select(id,user.id)
Error: Can't subset columns that don't exist.
x The column user.id doesn't exist.
I also tried this way:
data_raw<- data_raw %>%
+ select(id,user$id)
Error: object 'user' not found
I know that the user variable is a dataframe, but how do I get information from a dataframe within another dataframe?
I simplified the structure to help. The dataframe has only 1 line.
dput(head(data_raw, 2))
structure(list(id = "2306173214621953247_176548637", user = structure(list(
id = "176548637", full_name = "Carlos Costa", profile_picture = "link.com",
username = "carlosocosta"), row.names = 1L, class = "data.frame"),
images = structure(list(thumbnail = structure(list(width = 150L, height = 150L, url = "link.com"), row.names = 1L, class = "data.frame"),
low_resolution = structure(list(width = 320L, height = 320L,
url = "link.com"), row.names = 1L, class = "data.frame"),
standard_resolution = structure(list(width = 640L, height = 640L,
url = "link.com"), row.names = 1L, class = "data.frame")), row.names = 1L, class = "data.frame"),
created_time = "1589137292", caption = structure(list(id = "18105905566138276",
text = "Não basta ser mãe! Tem que aprender a jogar Fortnite! Feliz dia das mães! #maedemenino",
created_time = "1589137292", from = structure(list(id = "176548637",
full_name = "Carlos Costa", profile_picture = "link.com",
username = "carlosocosta"), row.names = 1L, class = "data.frame")), row.names = 1L, class = "data.frame"),
user_has_liked = FALSE, likes = structure(list(count = 69L), row.names = 1L, class = "data.frame"),
tags = list("maedemenino"), filter = "Normal", comments = structure(list(
count = 3L), row.names = 1L, class = "data.frame"), type = "image",
link = "link.com", location = structure(list(
latitude = NA_real_, longitude = NA_real_, name = NA_character_,
id = NA_integer_), row.names = 1L, class = "data.frame"),
attribution = NA, users_in_photo = list(structure(list(user = structure(list(
username = "vivicosta_oficial"), class = "data.frame", row.names = 1L),
position = structure(list(x = 0.2210144928, y = 0.5857487923), class = "data.frame", row.names = 1L)), class = "data.frame", row.names = 1L)),
carousel_media = list(NULL), videos = structure(list(standard_resolution = structure(list(
width = NA_integer_, height = NA_integer_, url = NA_character_,
id = NA_character_), row.names = 1L, class = "data.frame"),
low_resolution = structure(list(width = NA_integer_,
height = NA_integer_, url = NA_character_, id = NA_character_), row.names = 1L, class = "data.frame"),
low_bandwidth = structure(list(width = NA_integer_, height = NA_integer_,
url = NA_character_, id = NA_character_), row.names = 1L, class = "data.frame")), row.names = 1L, class = "data.frame"),
video_views = NA_integer_), row.names = 1L, class = "data.frame")
Since we have a dataframe inside a dataframe, make it a single dataframe using do.call +cbind and then subset the columns needed.
do.call(cbind, data_raw)[c('id', 'user.id')]
# id user.id
#1 2306173214621953247_176548637 176548637
#2 2304792897512042631_176548637 176548637
Or with dplyr::select
library(dplyr)
do.call(cbind, data_raw) %>% select(id, user.id)
data
Tested on this data :
data_raw <- data.frame(id = c('2306173214621953247_176548637',
'2304792897512042631_176548637'))
user <- data.frame(id = c('176548637', '176548637'), full_name = c('a', 'b'))
data_raw$user <- user
str(data_raw)
#'data.frame': 2 obs. of 2 variables:
# $ id : chr "2306173214621953247_176548637" "2304792897512042631_176548637"
# $ user:'data.frame': 2 obs. of 2 variables:
# ..$ id : chr "176548637" "176548637"
# ..$ full_name: chr "a" "b"

STEP Algorithm does not find variable in custom function

I am working on a tv retail dataset in R and wanted to put steps I will need to use repeatedly into a function.
This includes checking the VIF and return it, run the STEP algorithm to determine the best model and then use the result of the STEP and display it.
The major issue is the error message
Error in eval(predvars, data, env) : object 'Hour' not found
which appears to appear in the step() call.
Regression <- function(data, dep_var, features) {
lin.null = lm(paste(dep_var,'~ 1', sep = ''), data= data)
lin.full = lm(paste(dep_var,'~', paste(features, collapse='+'), sep = ''), data = data)
vif(lin.full)
opt = step(lin.null, scope = list(lower = lin.null, upper = lin.full), direction = "forward")
step_opt = opt$call
stargazer(step_opt, type = 'text')
}
dep_var = 'imp'
feat = c('Hour', 'grp')
paste(dep_var,'~', paste(feat, collapse='+'), sep = '')
Regression(comb_a, 'imp', feat)
The final result should show me the VIF values for each variable and the stargazer output of the STEP optimized regression.
EDIT 1:
comb_a is the input data the regression should take
The dput() output follows down below:
# comb_a
structure(list(Day = structure(c(1483833600, 1483833600, 1483833600,
1483833600, 1483833600, 1483833600), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), Hour = c(0, 1, 6, 7, 8, 9), Model = c("Model A",
"Model A", "Model A", "Model A", "Model A", "Model A"), tv_count = c(5L,
8L, 4L, 9L, 11L, 8L), grp_abs = c(55500, 8308, 19026, 12184,
10141, 113225), grp = c(0.22, 0.03, 0.07, 0.05, 0.04, 0.45),
sum_duration = c(150, 240, 120, 270, 330, 240), grp_per_second = c(370,
34.6166666666667, 158.55, 45.1259259259259, 30.730303030303,
471.770833333333), hours_since = c(NA, 1, 5, 1, 1, 1), camp_count = c(2L,
2L, 2L, 2L, 3L, 4L), imp = c(528, 319, 97, 182, 327, 785),
clicks = c(28, 15, 6, 13, 29, 53), leads = c(0, 0, 0, 0,
0, 1)), .Names = c("Day", "Hour", "Model", "tv_count", "grp_abs",
"grp", "sum_duration", "grp_per_second", "hours_since", "camp_count",
"imp", "clicks", "leads"), row.names = c(NA, -6L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = c("Day", "Hour"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L), group_sizes = c(1L, 1L, 1L, 1L,
1L, 1L), biggest_group_size = 1L, labels = structure(list(Day = structure(c(1483833600,
1483833600, 1483833600, 1483833600, 1483833600, 1483833600), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), Hour = c(0, 1, 6, 7, 8, 9)), row.names = c(NA,
-6L), class = "data.frame", vars = c("Day", "Hour"), drop = TRUE, .Names = c("Day",
"Hour")))
desired output would be: (Numbers are just for representation)
> vif(lin.full)
Hour grp sum_duration grp_per_second hours_since camp_count
2.979362 4.981504 2.290328 3.279818 1.013725 1.110823
imp clicks
7.471457 9.244811
> stargazer(step_opt, type = 'text')
===============================================
Dependent variable:
---------------------------
leads
-----------------------------------------------
clicks 0.005***
(0.0004)
camp_count 0.040*
(0.024)
Constant -0.107
(0.098)
-----------------------------------------------
Observations 898
R2 0.181
Adjusted R2 0.179
Residual Std. Error 0.772 (df = 895)
F Statistic 98.901*** (df = 2; 895)
===============================================
Note: *p<0.1; **p<0.05; ***p<0.01

Summarize dataframe with start and end times in R?

Here is a sample of my df:
structure(list(press_id = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L),
start_time = c(164429106370978, 164429106370978, 164429411618824,
164429411618824, 164429837271939, 164429837271939, 164430399454284,
164430399454284), end_time = c(164429182443824, 164429182443824,
164429512525747, 164429512525747, 164429903243169, 164429903243169,
164430465927554, 164430465927554), timestamp = c(164429140697138,
164429175921880, 164429440899844, 164429440899844, 164429867184830,
164429891199391, 164430427558256, 164430433561155), acc_x = c(3.1053743,
2.9904492, 5.889916, 5.889916, 5.808511, 5.36557, 3.545921,
3.4788814), acc_y = c(8.406299, 8.12138, 8.600235, 8.600235,
7.920261, 7.922655, 7.9346266, 7.972935), acc_z = c(4.577853,
4.0894213, 0.35435268, 0.35435268, -0.21309046, 0.46927786,
4.005622, 4.4198313), grav_x = c(3.931084, 4.0214577, 4.7844357,
4.7844357, 5.6572776, 5.65053, 3.9938855, 3.9938855), grav_y = c(8.318872,
8.281514, 8.21449, 8.21449, 7.94851, 7.9495893, 8.027369,
8.027369), grav_z = c(3.393116, 3.3785365, 2.408623, 2.408623,
0.99327636, 1.0226398, 3.9724596, 3.9724596), gyro_x = c(-0.35906965,
0.099690154, 0.06792516, 0.04532315, -0.05546962, -0.06524346,
-0.2967614, -0.32180685), gyro_y = c(0.15843217, -0.48053285,
-0.2196934, -0.21175216, 0.1895863, 0.37467846, 0.12239113,
0.04847643), gyro_z = c(-0.042139318, 0.39585108, 0.12523776,
0.11240959, -0.05863268, 0.042770952, 0.047047008, 0.097137965
), acc_mag = c(10.0630984547559, 9.5719886173707, 10.4297995361418,
10.4297995361418, 9.82419166595324, 9.58008483176486, 9.56958006531909,
9.75731607717771), acc_mag_max = c(10.4656808698978, 10.4656808698978,
10.5978974240054, 10.5978974240054, 10.2717799984467, 10.2717799984467,
10.0054693945119, 10.0054693945119), acc_mag_min = c(9.55048847884876,
9.55048847884876, 9.45791784630329, 9.45791784630329, 9.58008483176486,
9.58008483176486, 9.49389444102469, 9.49389444102469), acc_mag_avg = c(9.9181794947982,
9.9181794947982, 9.82876220923978, 9.82876220923978, 9.89351246166363,
9.89351246166363, 9.77034322149792, 9.77034322149792), vel_ang_mag = c(0.394724572535758,
0.630514095219792, 0.261846355511019, 0.243985821544114,
0.206052505577139, 0.382714007838398, 0.324438496782347,
0.339625377757329), vel_ang_mag_max = c(0.665292823798622,
0.665292823798622, 1.00730683166191, 1.00730683166191, 0.561349818527019,
0.561349818527019, 0.445252333070234, 0.445252333070234),
vel_ang_mag_min = c(0.212944405199931, 0.212944405199931,
0.18680382123856, 0.18680382123856, 0.111795327479332, 0.111795327479332,
0.258342546774667, 0.258342546774667), vel_ang_mag_avg = c(0.440700089033948,
0.440700089033948, 0.405484992593493, 0.405484992593493,
0.284553957549617, 0.284553957549617, 0.348811700631375,
0.348811700631375)), .Names = c("press_id", "start_time",
"end_time", "timestamp", "acc_x", "acc_y", "acc_z", "grav_x",
"grav_y", "grav_z", "gyro_x", "gyro_y", "gyro_z", "acc_mag",
"acc_mag_max", "acc_mag_min", "acc_mag_avg", "vel_ang_mag", "vel_ang_mag_max",
"vel_ang_mag_min", "vel_ang_mag_avg"), row.names = c(NA, -8L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = "press_id", drop = TRUE, indices = list(
0:1, 2:3, 4:5, 6:7), group_sizes = c(2L, 2L, 2L, 2L), biggest_group_size = 2L, labels = structure(list(
press_id = 1:4), row.names = c(NA, -4L), class = "data.frame", vars = "press_id", drop = TRUE, indices = list(
0:1, 2:3, 4:5, 6:7), group_sizes = c(2L, 2L, 2L, 2L), biggest_group_size = 2L, labels = structure(list(
press_id = 1:4), row.names = c(NA, -4L), class = "data.frame", vars = "press_id", drop = TRUE, .Names = "press_id"), .Names = "press_id"))
And I am trying to summarize it in the following way where the last columns(the blank are filled with their appropriate values from above dataframe):
press_id time_state time_state_val acc_mag acc_mag_max acc_mag_min acc_mag_avg vel_ang_mag vel_ang_mag_max vel_ang_mag_min vel_ang_mag_avg
1 start_time 164429106370978
1 end_time 164429182443824
2 start_time 164429411618824
2 end_time 164429512525747
3 start_time 164429837271939
3 end_time 164429903243169
4 start_time 164430399454284
4 end_time 164430427558256
Please advise how can I transform it to be like expected result.
I am trying to do this with combination of tidyr gather and dplyr but I don't get the structure I need.
library(dplyr)
library(tidyr)
df1 <- df[,1:6]
df1 %>% mutate(row=row_number()) %>%
gather(time_state , time_state_val, -press_id, -row,-timestamp:-acc_y) %>%
arrange(press_id, row) %>%
select(press_id, time_state, time_state_val, everything(),-row)

R Create a new variable name by partially matching a string from other variable names

I have multiple variable names that I need to combine into a single variable based on a common string of text. My sample data are:
structure(list(And = c(10L, NA, 10L), and = c(20L, 10L, 10L),
andbc = c(1L, NA, NA), baNdc = c(4L, NA, 5L), ban = c(1L,
NA, 1L)), .Names = c("And", "and", "andbc", "baNdc", "ban"), class = "data.frame", row.names = c(NA, -3L))
I would like to create a new variable x, the value of which would be a row sum of the values of the other variables that share the common text string "and" ignoring the case of any of the letters in that string.
I attempted creating the variable by specifying the permutations, which I'm hoping to avoid:
names1[, 1:5][is.na(names1[, 1:5])] <- 0
names1$x <- sum(names1[which(grepl("And|and|aNd", names(names1)))])
The result I get for values of x is a sum total of all values for the variables that meet the text string criteria:
structure(list(And = c(10, 0, 10), and = c(20L, 10L, 10L), andbc = c(1, 0, 0), baNdc = c(4, 0, 5), ban = c(1, 0, 1), x = c(70, 70, 70)), .Names = c("And", "and", "andbc", "baNdc", "ban", "x"), row.names = c(NA, -3L), class ="data.frame"
How can I obtain the row sums based on the text string criteria and avoid having to specify the permutations of upper or lower case?
The following would do the trick
df <- structure(list(And = c(10L, NA, 10L), and = c(20L, 10L, 10L),
andbc = c(1L, NA, NA), baNdc = c(4L, NA, 5L), ban = c(1L,
NA, 1L)), .Names = c("And", "and", "andbc", "baNdc", "ban"), class = "data.frame", row.names = c(NA, -3L))
x <- rowSums(df[, grep("and", tolower(colnames(df)))], na.rm = TRUE)
colnames(names1) <- tolower(colnames(names1))
will rid you for the need for permutations
names1$x <- rowSums(names1[which(grepl('and', colnames(names1)))], na.rm = TRUE)

Resources