Related
My first data frame (df) contains Entrydate and ExitDate columns. Another dataframe (n1) has all trading dates. I need a new column in first dataframe calculated as number of days as calculated from the second dataframe. How do I call this dayCount function for each row of df. When I try to use mapply, I am unable to pass n1 as a parameter.
dayCount <- function (startDate, endDate, n1) {
return (nrow(subset(n1, Date >= startDate & Date <= endDate)))
}
df<- structure(list(EntryDate = structure(c(11355, 11418, 11436, 11449,
11520, 11523, 11548, 11620, 11768, 11773), class = "Date"), ExitDate = structure(c(11360,
11422, 11438, 11457, 11522, 11526, 11554, 11625, 11772, 11778
), class = "Date")), row.names = c(22L, 65L, 76L, 84L, 135L,
138L, 155L, 204L, 305L, 307L), class = "data.frame")
n1<- structure(c(11354, 11355, 11358, 11359, 11360, 11361, 11362,
11365, 11366, 11367, 11368, 11369, 11372, 11373, 11374, 11375,
11376, 11379, 11380, 11381, 11382, 11383, 11386, 11388, 11389,
11390, 11393, 11394, 11395, 11396, 11397, 11400, 11401, 11402,
11403, 11404, 11407, 11408, 11409, 11410, 11411, 11414, 11415,
11416, 11418, 11421, 11422, 11423, 11424, 11428, 11429, 11430,
11431, 11432, 11435, 11436, 11437, 11438, 11439, 11442, 11444,
11445, 11446, 11449, 11450, 11451, 11452, 11453, 11456, 11457,
11458, 11459, 11460, 11463, 11464, 11465, 11466, 11467, 11470,
11471, 11472, 11473, 11474, 11477, 11478, 11479, 11480, 11481,
11484, 11485, 11486, 11487, 11488, 11491, 11492, 11493, 11494,
11495, 11498, 11499, 11500, 11501, 11502, 11505, 11506, 11507,
11508, 11509, 11512, 11513, 11514, 11515, 11516, 11519, 11520,
11521, 11522, 11523, 11526, 11527, 11528, 11529, 11530, 11533,
11534, 11535, 11536, 11537, 11540, 11541, 11542, 11543, 11544,
11547, 11548, 11550, 11551, 11554, 11555, 11557, 11558, 11561,
11562, 11563, 11564, 11565, 11568, 11569, 11570, 11571, 11572,
11575, 11576, 11577, 11578, 11579, 11582, 11583, 11584, 11585,
11586, 11589, 11590, 11591, 11592, 11593, 11596, 11598, 11599,
11600, 11603, 11604, 11605, 11606, 11607, 11610, 11611, 11612,
11613, 11614, 11617, 11618, 11619, 11620, 11624, 11625, 11626,
11627, 11628, 11631, 11632, 11633, 11634, 11635, 11638, 11639,
11640, 11641, 11645, 11646, 11647, 11648, 11649, 11652, 11653,
11654, 11655, 11659, 11660, 11661, 11662, 11663, 11666, 11667,
11668, 11669, 11670, 11674, 11675, 11676, 11677, 11680, 11682,
11683, 11684, 11687, 11688, 11689, 11690, 11691, 11694, 11695,
11696, 11697, 11698, 11701, 11702, 11703, 11704, 11705, 11708,
11709, 11710, 11711, 11712, 11715, 11716, 11717, 11718, 11719,
11722, 11723, 11724, 11725, 11726, 11729, 11730, 11731, 11732,
11733, 11736, 11737, 11738, 11739, 11740, 11743, 11744, 11745,
11746, 11747, 11750, 11751, 11752, 11753, 11754, 11757, 11758,
11759, 11760, 11761, 11764, 11765, 11766, 11767, 11768, 11772,
11773, 11774, 11778), class = "Date")
You can use %in% to count number of days in n1 between each EntryDate and ExitDate.
df$dayCount <- colSums(mapply(function(x, y) n1 %in% seq(x, y, by = '1 day'),
df$EntryDate, df$ExitDate))
df
# EntryDate ExitDate dayCount
#22 2001-02-02 2001-02-07 4
#65 2001-04-06 2001-04-10 3
#76 2001-04-24 2001-04-26 3
#84 2001-05-07 2001-05-15 7
#135 2001-07-17 2001-07-19 3
#138 2001-07-20 2001-07-23 2
#155 2001-08-14 2001-08-20 4
#204 2001-10-25 2001-10-30 3
#305 2002-03-22 2002-03-26 2
#307 2002-03-27 2002-04-01 3
I want to write an algorithm that spits out the points highlighted by arrows. I've tried using a second derivative but it returns a similar plot to the one above and not sure how to use it.
Hi, sorry about that, I don't want the peaks, I want the point where the graph starts to increase - ie I want the point where the gradient changes from ~0 to something larger, does that make sense
Example data is below.
df = structure(list(X1 = c("2729", "2730", "2731", "2732", "2733",
"2734", "2735", "2736", "2737", "2738", "2739", "2740", "2741",
"2742", "2743", "2744", "2745", "2746", "2747", "2748", "2749",
"2750", "2751", "2752", "2753", "2754", "2755", "2756", "2757",
"2758", "2759", "2760", "2761", "2762", "2763", "2764", "2765",
"2766", "2767", "2768", "2769", "2770", "2771", "2772", "2773",
"2774", "2775", "2776", "2777", "2778", "2779", "2780", "2781",
"2782", "2783", "2784", "2785", "2786", "2787", "2788", "2789",
"2790", "2791", "2792", "2793", "2794", "2795", "2796", "2797",
"2798", "2799", "2800", "2801", "2802", "2803", "2804", "2805",
"2806", "2807", "2808", "2809", "2810", "2811", "2812", "2813",
"2814", "2815", "2816", "2817", "2818", "2819", "2820", "2821",
"2822", "2823", "2824", "2825", "2826", "2827", "2828", "2829",
"2830", "2831", "2832", "2833", "2834", "2835", "2836", "2837",
"2838", "2839", "2840", "2841", "2842", "2843", "2844", "2845",
"2846", "2847", "2848", "2849", "2850", "2851", "2852", "2853",
"2854", "2855", "2856", "2857", "2858", "2859", "2860", "2861",
"2862", "2863", "2864", "2865", "2866", "2867", "2868", "2869",
"2870", "2871", "2872", "2873", "2874", "2875", "2876", "2877",
"2878", "2879", "2880", "2881", "2882", "2883", "2884", "2885",
"2886", "2887", "2888", "2889", "2890", "2891", "2892", "2893",
"2894", "2895", "2896", "2897", "2898", "2899", "2900", "2901",
"2902", "2903", "2904", "2905", "2906", "2907", "2908", "2909",
"2910", "2911", "2912", "2913", "2914", "2915", "2916", "2917",
"2918", "2919", "2920", "2921", "2922", "2923", "2924", "2925",
"2926", "2927", "2928", "2929", "2930", "2931", "2932", "2933",
"2934", "2935", "2936", "2937", "2938", "2939", "2940", "2941",
"2942", "2943", "2944", "2945", "2946", "2947", "2948", "2949",
"2950", "2951", "2952", "2953", "2954", "2955", "2956", "2957",
"2958", "2959", "2960", "2961", "2962", "2963", "2964", "2965",
"2966", "2967", "2968", "2969", "2970", "2971", "2972", "2973",
"2974", "2975", "2976", "2977", "2978", "2979", "2980", "2981",
"2982", "2983", "2984", "2985", "2986", "2987", "2988", "2989",
"2990", "2991", "2992", "2993", "2994", "2995", "2996", "2997",
"2998", "2999", "3000", "3001", "3002", "3003", "3004", "3005",
"3006", "3007", "3008", "3009", "3010", "3011", "3012", "3013",
"3014", "3015", "3016", "3017", "3018", "3019", "3020", "3021",
"3022", "3023", "3024", "3025", "3026", "3027", "3028", "3029",
"3030", "3031", "3032", "3033", "3034", "3035", "3036", "3037",
"3038", "3039", "3040", "3041", "3042", "3043", "3044", "3045",
"3046", "3047", "3048", "3049", "3050", "3051", "3052", "3053",
"3054", "3055", "3056", "3057", "3058", "3059", "3060", "3061",
"3062", "3063", "3064", "3065", "3066", "3067", "3068", "3069",
"3070", "3071", "3072", "3073", "3074", "3075", "3076", "3077",
"3078", "3079", "3080", "3081", "3082", "3083", "3084", "3085",
"3086", "3087", "3088", "3089", "3090", "3091", "3092", "3093",
"3094", "3095", "3096", "3097", "3098", "3099", "3100", "3101",
"3102", "3103", "3104", "3105", "3106", "3107", "3108", "3109",
"3110", "3111", "3112", "3113", "3114", "3115", "3116", "3117",
"3118", "3119", "3120", "3121", "3122", "3123", "3124", "3125",
"3126", "3127", "3128", "3129", "3130", "3131", "3132", "3133",
"3134", "3135", "3136", "3137", "3138", "3139", "3140", "3141",
"3142", "3143", "3144", "3145", "3146", "3147", "3148", "3149",
"3150", "3151", "3152", "3153", "3154", "3155", "3156", "3157",
"3158", "3159", "3160", "3161", "3162", "3163", "3164", "3165",
"3166", "3167", "3168", "3169", "3170", "3171", "3172", "3173",
"3174", "3175", "3176", "3177", "3178", "3179", "3180", "3181",
"3182", "3183", "3184", "3185", "3186", "3187", "3188", "3189",
"3190", "3191", "3192", "3193", "3194", "3195", "3196", "3197",
"3198", "3199", "3200", "3201", "3202", "3203", "3204", "3205",
"3206", "3207", "3208", "3209", "3210", "3211", "3212", "3213",
"3214", "3215", "3216", "3217", "3218", "3219", "3220", "3221",
"3222", "3223", "3224", "3225", "3226", "3227", "3228", "3229",
"3230", "3231", "3232", "3233", "3234", "3235", "3236", "3237",
"3238", "3239", "3240", "3241", "3242", "3243", "3244", "3245",
"3246", "3247", "3248", "3249", "3250", "3251", "3252", "3253",
"3254", "3255", "3256", "3257", "3258", "3259", "3260", "3261",
"3262", "3263", "3264", "3265", "3266", "3267", "3268", "3269",
"3270", "3271", "3272", "3273", "3274", "3275", "3276", "3277",
"3278", "3279", "3280", "3281", "3282", "3283", "3284", "3285",
"3286", "3287", "3288", "3289", "3290", "3291", "3292", "3293",
"3294", "3295", "3296", "3297", "3298", "3299", "3300", "3301",
"3302", "3303", "3304", "3305", "3306", "3307", "3308", "3309",
"3310", "3311", "3312", "3313", "3314", "3315", "3316", "3317",
"3318", "3319", "3320", "3321", "3322", "3323", "3324", "3325",
"3326", "3327", "3328", "3329", "3330", "3331", "3332", "3333",
"3334", "3335", "3336", "3337", "3338", "3339", "3340", "3341",
"3342", "3343", "3344", "3345", "3346", "3347", "3348", "3349",
"3350", "3351", "3352", "3353", "3354", "3355", "3356", "3357",
"3358", "3359", "3360", "3361", "3362", "3363", "3364", "3365",
"3366", "3367", "3368", "3369", "3370", "3371", "3372", "3373",
"3374", "3375", "3376", "3377", "3378", "3379", "3380", "3381",
"3382", "3383", "3384", "3385", "3386", "3387", "3388", "3389",
"3390", "3391", "3392", "3393", "3394", "3395", "3396", "3397",
"3398", "3399", "3400", "3401", "3402", "3403", "3404", "3405",
"3406", "3407", "3408", "3409", "3410", "3411", "3412", "3413",
"3414", "3415", "3416", "3417", "3418", "3419", "3420", "3421",
"3422", "3423", "3424", "3425", "3426", "3427", "3428", "3429",
"3430", "3431", "3432", "3433", "3434", "3435", "3436", "3437",
"3438", "3439", "3440", "3441", "3442", "3443", "3444", "3445"
), X2 = c(-0.00385000000001254, -0.0154500000000484, -0.0277600000000007,
-0.0154500000000279, -0.0386000000000704, -0.0154500000000329,
-0.0115500000000053, 2.5238009638656e-15, -0.00385000000000757,
3.60475000000867, -0.470850000000881, -0.347350000000663, -0.173700000000328,
-0.139699999999998, -0.096500000000187, -0.0617500000001111,
-0.0579000000001016, -0.0424500000000768, -0.050150000000105,
-0.0579000000001191, -0.0540000000000976, -0.0579000000001924,
-0.0270000000000563, -0.0309000000000539, -0.0231500000000468,
-0.0270500000000538, -0.00775000000002209, -0.0193000000000404,
-0.0131199999999931, 0.219999999999842, 0.0579000000001427, -0.061750000000126,
-0.0617500000002055, -0.0309000000000726, -0.050150000000105,
-0.042450000000091, -0.0193000000000293, -0.0309000000000144,
-0.0115500000000196, -0.0116000000000154, -0.0154500000000366,
-0.00385000000000946, -0.0193000000000305, -0.00390000000000946,
-0.00390000000000639, -0.00771000000000015, -0.000789999999999225,
-4.97400384373025e-15, -0.00619000000000085, -0.0116000000000265,
-0.011550000000014, -0.00385000000000504, -0.00538999999999987,
-0.0116000000000203, -0.011550000000014, 0.00385000000001136,
-0.00230999999999795, 2.86419210237446e-15, -0.00230999999999954,
-0.00770000000002508, -0.00770000000001703, -0.00390000000000449,
-0.0085000000000008, -0.0193000000000529, -8.05101707233625e-15,
-0.00385000000001751, -0.0146699999999988, -0.00619000000000085,
-0.0116000000000265, 0.00153999999999996, 0.00385000000000546,
-0.00231000000000233, -0.000780000000000314, -0.00230999999999884,
0.0015400000000021, -8.05101707233625e-15, -0.00848000000000013,
-0.00385000000001751, -0.00775000000003729, -0.00769999999999792,
-1.1787959787484e-15, -0.00384999999999692, 0.00385000000001136,
-0.00384999999999762, 0.00385000000000639, -0.00385000000001161,
-0.000440000000001542, -0.00390000000000639, -0.000769999999999981,
0, -0.0154500000000091, -0.0077500000000059, -0.0154500000000335,
-0.0115500000000165, -0.00385000000000567, -0.00311000000000092,
0.0116000000000272, -0.00230999999999994, 0.0116000000000172,
0.00770000000001277, -0.00385000000000377, -0.00385000000001254,
0.00385000000001136, -0.00385000000000411, -0.0038499999999997,
-0.0116000000000215, -0.0154300000000006, -6.15348059644161e-15,
-0.00849999999999866, -0.0015500000000003, 0.00154000000000174,
-3.07674029821757e-15, -0.0115500000000345, -0.0115500000000165,
-6.15348059644161e-15, -0.00385000000002247, 0.0077000000000059,
-0.00385000000001254, -0.0115500000000315, -0.0154500000000107,
-0.0154500000000229, -0.0309000000000733, -1.65190000000256,
-0.258600000000477, -0.111900000000204, -0.0640499999999989,
-0.0579000000001016, -0.0270000000000494, -0.02393, -0.0193000000000324,
-0.0115500000000165, -0.0270000000000624, -0.0193000000000598,
-0.0309000000000733, -0.0463000000001036, -2.19220000000482,
-0.524900000000959, -0.189100000000636, -0.11580000000022, -0.0717700000000001,
-0.0424500000001407, -0.057900000000101, -0.0386000000000673,
-0.0193000000000449, -0.0277899999999995, -0.0077500000000276,
-0.0208600000000011, -0.0193000000000293, -0.0463000000000912,
-0.0386000000000716, -0.0501500000001031, -0.0347500000000728,
-0.0502000000000926, -0.0424500000000836, -0.00307999999999993,
-0.0116000000000234, 0.00389999999999833, -0.000769999999999981,
-0.00153999999999996, -0.00153999999999996, 0.00153999999999783,
-0.0162100000000009, -0.0386000000000797, -0.0432300000000026,
-0.038600000000117, -0.050200000000097, -0.0309000000000527,
-0.0231500000000593, 0.00461999999999989, -0.00385000000001064,
-0.00385000000000757, -0.0116000000000215, 0.00770000000004104,
0.00385000000000639, -0.941700000001459, -0.169850000000308,
-0.100350000000196, -0.0933799999999984, -0.0617500000001154,
-0.0579000000001165, -0.0386000000000822, -0.019300000000043,
-0.0231500000000629, -0.0115500000000165, -0.0270000000000464,
-0.0116000000000284, -0.00769999999999982, -2.76340000000441,
-0.270200000000513, -0.119650000000229, -0.108100000000387, -0.0540000000001033,
-0.0772000000001527, -0.0579000000001345, -0.0656000000001255,
-0.0540500000001704, -0.0386000000000716, -0.0270500000000663,
-0.0116000000000284, -0.0216200000000043, -0.00770000000001206,
-0.0308500000000552, -0.0115500000000265, -2.4190463576414e-14,
-0.00770000000003006, -0.0115900000000011, -0.0231500000000985,
-0.0193000000000293, -0.033979999999999, -0.00775000000002643,
-0.0478400000000022, -0.0231500000000412, -0.019300000000043,
-0.00233000000000134, -0.00390000000002501, 0.00154999999999958,
0.00384999999999991, 0.0077000000000059, -0.00770000000003193,
-0.0200899999999983, -0.0193000000000423, -0.0347000000000634,
-0.0540000000000927, -0.0733500000001364, -0.0501500000001637,
-0.0424500000000886, -0.050200000000087, -0.0308500000000459,
0.00384999999999834, -0.00231000000000208, -0.00387000000000167,
0.0030799999999978, -0.00385000000000757, -0.00385000000001064,
-0.0192500000000504, -0.0115500000000296, -0.0231500000001104,
-0.0579000000001085, -0.0733500000001314, -0.0386000000000697,
-0.0386000000000754, -0.0347500000000935, -0.00775000000001395,
0.00385000000000881, 0.000769999999999982, 0.0115500000000203,
0.00390000000001095, 0.00154000000000294, -0.00385000000001497,
-0.00385000000000567, -0.0309000000001234, -0.0347500000000728,
-0.0193000000000814, -0.0424500000000992, -0.0347500000000678,
0.274000000000822, 0.463150000000818, 1.03820000000353, 0.636800000000563,
-0.13663, -0.87225000000281, 0.644550000001354, -0.0579000000003174,
-0.72560000000209, -0.115800000000169, 2.08025000000553, -0.208400000000342,
-0.227700000000415, -0.328050000000636, -0.169850000000303, -0.104200000000212,
-0.0656500000001349, -0.0656500000001373, -0.0424500000000712,
-0.0347500000000697, -0.0285600000000002, -0.0193000000000324,
-0.0270000000000538, -0.0193000000000498, -0.0270000000000513,
-0.00849999999999724, -0.00770000000001513, -0.0162100000000009,
-0.0339800000000025, -0.0502000000001566, -0.0501500000000907,
-0.0193000000000454, -0.00770000000001893, 0.00385000000001136,
0.00390000000001402, 0.00153999999999996, -0.00307999999999993,
0.00390000000000023, 0.00384999999999834, 0.00384999999999644,
0.00385000000002943, -0.0138899999999971, -0.0223899999999993,
-0.0270500000000588, -0.00618999999999943, -0.0270500000000669,
0.00153999999999892, -0.000779999999999603, -2.5238009638656e-15,
0.00465000000000089, -0.00770000000001703, -2.91289464345889e-16,
0.00461999999999805, -0.0115900000000011, -0.00390000000001506,
-0.019300000000043, -0.0115899999999989, -0.0115900000000011,
-0.00770000000003258, 0, 0.00390000000000331, 0.0193000000000281,
0.00385000000002044, 0.00770000000002145, 0.00770000000000148,
0.0077000000000078, 0, 0.00308000000000135, -6.15348059644161e-15,
-0.015450000000036, -0.0309000000000726, -0.00385000000001254,
-0.0154000000000341, -1.11274169835756e-14, -0.00923999999999978,
-0.00234000000000107, -0.00770999999999944, 0.00385000000003251,
0.00461999999999429, 0.00385999999999811, -0.00770000000000798,
-0.023150000000093, -0.0154500000000348, -0.0424500000000737,
-0.019300000000043, -0.0308500000000125, -0.0309000000001054,
-0.0231500000000394, -1.1787959787484e-15, 0.000790000000000646,
-0.00231000000000036, 0, -0.00307999999999851, -0.00390000000002326,
-0.00230999999999753, -0.0193100000000022, -0.042450000000016,
-0.0385500000000679, -0.057900000000106, -0.0347000000000627,
-0.0386000000000922, -0.00385000000000445, 0.0077500000000097,
0.00230999999999995, -0.00385000000000352, 0.00307999999999948,
-0.000769999999999381, -1.1787959787484e-15, -0.015440000000001,
-0.0193000000000099, -0.0425000000000806, -0.0386000000000829,
-0.0424500000001675, -0.0386000000000773, -0.0463000000000192,
-0.00385000000001562, 0, 0.00769999999999875, -3.07674029821757e-15,
-0.00307999999999922, -0.0030799999999978, -0.0154000000000493,
-0.00385000000001254, -0.0231500000000079, -0.0347500000000802,
-0.0231500000000319, -0.0355200000000003, -0.0386000000000829,
-0.0463500000000801, -0.0347500000000678, 0.00155999999999792,
0.00385000000000639, -0.00385000000000231, 0, -0.00385000000000946,
-0.00153999999999966, 0, -0.0285600000000002, -0.0309000000000546,
-0.069450000000125, -0.0502000000000889, -0.0502000000000896,
0.3898000000001, 0.0540500000001028, 0.0115500000000253, 0.0116000000000142,
0.000769999999999981, -0.00385000000000504, -7.40090066366128e-15,
-0.00230999999999995, 0.00385000000000141, 0.00385000000000639,
-0.00385000000001254, -0.0270199999999981, -0.0502000000000896,
0.409100000000679, 0.0386000000000674, -0.0116000000000445, -0.00775000000004081,
-0.00307999999999993, -0.00385000000000757, -0.00770000000000208,
-0.00385000000001562, -0.00385000000001064, -0.00153999999999783,
-8.05101707233625e-15, -0.0177499999999995, -0.0424500000001796,
-0.0509500000000003, -0.0694500000001324, -0.0424500000001587,
-0.061750000000121, -0.0232000000000369, -0.0131299999999989,
-0.00153999999999886, 0.0077000000000059, -0.00310999999999879,
0.00769999999999982, 7.49841812496252e-15, -8.05101707233625e-15,
-0.0077000000000406, -0.0424500000000787, -0.0502000000001032,
-0.0347500000000747, -0.0656000000001262, -0.0733000000001494,
-0.034700000000074, -0.0193000000000869, 0.0231500000000662,
-0.00385000000000757, 0.00770000000001088, 0.0115600000000001,
-0.957150000001501, -0.14670000000027, -0.0772000000001383, -0.0617500000002002,
-0.0463000000000981, -0.0617500000001229, -0.0270000000000544,
-0.0347500000000597, -0.0386000000001412, -0.0694500000001331,
-0.0887500000001619, -0.0386000000000747, -0.0077500000000295,
0.015400000000064, 0.355050000000611, 0.0478699999999975, -6.15348059644161e-15,
-0.0177800000000019, -0.00385000000001064, -0.0116000000000674,
-0.0154500000000435, -0.0524900000000017, -0.0540500000002006,
-0.0540500000001021, -0.0617500000002394, -0.0308500000000688,
-0.0193000000000355, -0.0154000000000216, -0.000770000000000204,
0.00770000000001775, 0.00694000000000017, -0.00385000000001254,
0.0116000000000123, -0.00385000000002063, -0.00385000000000757,
-0.0270000000000743, -0.0309000000000763, -0.0926500000001775,
-0.0887500000003161, -0.0656500000001199, -0.042450000000078,
-0.0270000000000588, -0.0116000000000364, -0.000769999999999982,
-0.00385000000001254, -0.015400000000026, 0.00385000000002041,
0.0115500000000253, 0, -0.00385000000001161, -0.0386000000000723,
-0.0154500000000693, -0.100350000000196, -0.0849000000001704,
-0.0926500000001751, -0.0115500000000116, 0.00385000000000546,
-0.0116000000000154, 6.87160777622118e-15, -0.00384999999999991,
0.00231999999999886, -3.07674029821757e-15, 0.00390000000003514,
0.000779999999996745, -3.07674029821757e-15, -0.0231500000000617,
-0.0270500000000527, -0.0517200000000003, -0.050150000000105,
-0.0347500000000721, -0.0347500000000142, -0.00385000000001161,
0.00770000000000401, -0.00385000000000197, 0.000769999999999982,
-0.00385000000001372, 0.00385000000000141, 0.0116000000000278,
-3.71670324204166e-15, -0.0116000000000584, -0.00385000000001064,
-0.00464999999999875, -0.00775000000004982, -0.00390000000001506,
0.277900000000906, 0.119650000000208, 0.054000000000013, 0.0463000000000931,
0.0154500000000168, 0.00775000000000384, 0.0115500000000154,
0.00769999999999875, 1.89760393249092e-15, 0.00231999999999957,
0.000769999999999304, -0.0231500000000085, -0.0270500000000402,
0.351200000000562, -0.0231500000000833, -0.0270500000000588,
-0.0463500000000216, -0.0139000000000062, -9.23022089465272e-15,
-8.05101707233625e-15, 0.00385000000000546, 0.000759999999998229,
-0.0115500000000395, 0.000769999999999982, -0.011600000000024,
-0.00770000000001206, -0.0540500000001929, -0.0772000000001558,
-0.0656000000000217, -0.0772000000001484, -0.0579000000001128,
-0.0347000000000764, -0.0193000000000461, -0.00385000000000352,
-0.00385000000002122, -0.00696000000000083, 0.000789999999999225,
0.00384999999999834, -0.000800000000000978, -0.0116000000000234,
-0.00775000000001088, -0.0115900000000055, -0.0193000000000218,
-0.0347500000000808, -0.0386000000000897, -0.0501500000000858,
-0.00233999999999881, -0.00385000000000757, 2.00000000009208e-05,
0.308750000000515, 0.092650000000154, 0.0424500000000756, 0.0231500000000227,
0.0154500000000312, -0.00385000000001469, 0.00538999999999237,
0.474750000000936, 0.212300000000357, -0.0030699999999996, -0.0309000000000739,
-0.0115500000000265, -0.0116000000000265, -3.57390000000716,
-0.293350000001048, -0.119650000000226, -0.104200000000194, -0.0926500000001831,
-0.0540500000001096, -0.0694500000002714, -0.0772000000001527,
-0.0965000000001976, -0.0694500000001375, -0.100350000000182,
-0.084950000000289, -0.061750000000121, -0.0425000000000912,
-0.0424500000000662, -0.00770000000002011, -0.0154500000000422,
-0.00307999999999993, -0.00230999999999994, 0.00385000000001447,
-0.00154, -0.00385000000000567, -0.0386000000000747, -0.0695000000002463,
-0.0772000000001664, -0.0849000000002961, -0.0887500000001668,
-0.0193000000000504, -0.0578500000001047, -0.00775000000000708,
-1.2095231788207e-14, 0.00848999999999485, -3.07674029821757e-15,
-0.00541000000000057, -0.00390000000002247, 0.000769999999999981,
-0.0293300000000002, -0.050200000000087, -0.0656000000002546,
-0.0540500000001096, -0.069450000000138, 0.123500000000375, 0.0849000000001387,
0.00384999999999644, 0.023200000000042, 0.0115500000000123, 0.00775000000000473,
0.0115500000000203, 0.00385000000001447, -0.00775000000002506,
0.00466000000000122, -0.0254699999999978, -0.054799999999998,
-0.0231500000000444, 0.0116000000000454, 0.115800000000206, 0.030900000000046,
0.00385000000000331, -0.00153999999999996, 0.00384999999999084,
-0.00385000000000757, 0.00770000000001088, 1.7849988639723e-14,
0.00230999999999994, 0.00385000000001326, -0.00153999999999882,
-0.038600000000126, -0.0309000000000553, -0.00692999999999628,
-0.0154000000000403, -0.0579000000001097, -0.0347500000000678,
-0.0100400000000054, 0.00385000000000023, -0.00385000000001994,
-2.17923926727129e-14, 0.00389999999999028, 0.00390000000001402,
0.00384999999999084, -0.00385000000001751, 0.00770000000001399,
-0.0308500000000632, -0.0502000000001986, -0.0695000000001394,
-0.0501799999999982, -0.0309000000000752, -0.0270500000000557,
-0.0100500000000011, 0.00389999999999596, 0.0116000000000117,
1.89760393249092e-15, 0.0115500000000123, 0.00384999999998841,
-0.00385000000002965, 0.0077000000000078, 0.00385000000000639,
0.00770000000000283, -0.0501500000001132, -0.0617500000002242,
-0.0710100000000004, -0.0810500000000306, -0.0540500000001891,
-0.0386000000000617, -0.019300000000043, 0.00775000000000473,
0.00847000000000282, 0.00462999999999951, -2.11128370304365e-14,
0.00770000000001088, 0.00384999999999858, 9.99999999962123e-06,
-0.00770000000001206, -0.0733000000000254, -0.0656000000001967,
-0.111900000000213, -0.100350000000323, -0.0579000000001141,
-0.0385500000000131, -0.0116000000000215, 0.0193000000000318,
0.00390000000001402, 0.0270000000000452, 0.00770000000000182,
-8.05101707233625e-15)), row.names = c(NA, -717L), class = "data.frame")
As others have said, it is not clear what you are looking for.
specifically, it's not clear how high above "baseline" is too high.
Here's a shot at it:
df_prime <- df$X2[-1] - df$X2[-length(df$X2)]
large_rise <- which(df_prime > sd(df_prime) & df$X2[-length(df$X2)] > -sd(df$X2))
df$X1[large_rise]
It's difficult to know from the question, but aren't you just looking for something like this?
spikes <- as.numeric(df$X1[df$X2 > 0.1])
spikes <- spikes[which(diff(c(0, spikes)) > 3)]
spikes
#> [1] 2738 2758 2984 2994 3126 3139 3190 3260 3273 3309 3316 3363 3377
So, for example if you did
plot(df$X1, df$X2, type = "l")
points(spikes, rep(1, length(spikes)), col="red")
You would get
I am using the following ID data, I am trying to put the data back into the correct form.
The first 20 observations of the "incorrect ID" look like:
[1] 11820096867 11820053047 13410057602 13410015341 14257205715 28382012393 13410001306 11820000771 11820000784 11820000884 11820011030
[12] 15230002545 13410015602 17336011108 11820000769 11820096867 11820053030 13410050602 11820053030 14257205715
This data can be split up into 4 sections S, G, V and I
I want to add back these leading zeros and separate the data into 4 columns.
S = 2 digits long
G = 1 digit long
V = 5 digits
I = 5 digits
I have been working backwards with these "incorrect ID´s" so for example the observation 11820000771 would be split into the last 5 digits (minus the leading zero) and would be = I the next 5 digits (minus the leading zero) would be = V etc. So;
Example 1:
11820000771 would be:
I = 0771
V = 82000
G = 1
S = 1
Example 2:
14257205715 would be:
I = 5715
V = 25720
G = 4
S = 1
Example 3:
13410015602 would be:
I = 15602
V = 4100
G = 3
S = 1
Example 4:
10943900008 would be:
I = 0008
V = 94390
G = 0
S = 1
In the documentation it states that the leading zeros are not shown for the "incorrect ID" data and have been removed.
In a second "correct" data frame this is what the S, G, V and I look like:
S G V I
[91,] 0 1 18200 97341
[92,] 0 1 71990 15340
[93,] 0 1 18200 87418
[94,] 6 1 18200 38602
[95,] 27 1 34100 1640
[96,] 0 1 19699 30069
[97,] 0 2 84694 59574
[98,] 0 1 71990 1640
[99,] 0 1 18200 771
[100,] 0 1 18200 1640
So
The first objective is to split the "incorrect ID´s" into the correct S, G, V and I similar to the above.
The second objective is to create a new ID key which looks like the following:
[1] "00-01-73360-50661" "00-01-87692-30040" "00-01-34100-57509" "00-01-18200-53047" "00-03-70310-30703" "00-01-82000-72385"
[7] "00-01-68213-09410" "00-01-18200-00771" "00-01-34100-50340" "00-03-73360-97341"
Where the S, G, V and I are combined and split by a - and leading zeros are added back to the data.
Overview:
I am trying to add back leading zeros to segments of an ID variable which is split into 4 maximum length sections. If a segment begins on a 0 then it is removed. If it begins on a number greater than 0 then no leading zero is added to the ID.
Hopefull I am clear, if I am not in any part let me know and I will clarify
DATA:
ID <- c(11820096867, 11820053047, 13410057602, 13410015341, 14257205715,
28382012393, 13410001306, 11820000771, 11820000784, 11820000884,
11820011030, 15230002545, 13410015602, 17336011108, 11820000769,
11820096867, 11820053030, 13410050602, 11820053030, 14257205715,
11820011168, 27336097343, 13410015509, 12556924173, 13410001222,
18769227102, 18769210012, 13410048574, 13410057602, 28066095605,
17199030030, 11820011047, 13410057509, 13410017256, 13410050306,
18200072518, 13410001306, 11820053168, 11820053168, 11820096867,
11820043047, 18200072385, 11820043218, 13410029602, 13410030341,
17199030030, 17199000048, 18066095615, 15230002540, 13410015341,
17199030030, 13410057306, 11820011168, 13410059505, 17336011214,
11820096867, 11820000884, 13410003602, 31820000042, 13410015341,
11820000891, 13410000355, 11820096867, 13410031306, 17289010016,
11820053218, 11820053030, 11820000016, 11820011030, 17336011214,
13410015340, 2710000106005, 11820061030, 17089701331, 23410017306,
11820000016, 27199077005, 13410003256, 13410057341, 17199030030,
15230000435, 11820053218, 13410015341, 18769241103, 15230000434,
11820043218, 11820000842, 13410057340, 11820011047, 13410001340,
33410000354, 12210000170, 11820041218, 27336097343, 13410046874,
13410015340, 31820000697, 13410015306, 13410000007, 613598510062,
15230000022, 618516510505, 11820053218, 13410001602, 15146051460,
15230000022, 17031000024, 11820000884, 14182700012, 11820000784,
2710000106005, 18769233103, 17199010074, 17199030030, 18200072385,
11820011168, 11820000769, 16821309117, 11820053168, 13410050505,
11820043218, 11820053030, 13410017509, 17231163001, 15230002540,
33410000354, 18769210014, 15230002545, 27031030701, 15230000002,
18769240020, 12210000170, 23410017306, 13410050340, 17199000048,
15230000434, 11820096867, 15230002903, 13410057340, 28066095605,
11820079047, 17199000048, 11820011030, 17199000048, 27336097343,
13410057341, 13410000555, 13410050574, 18769230050, 11820096867,
11820000884, 18769210014, 21820086167, 11820053168, 11820041218,
13410015306, 715643501208, 11820002990, 613598512001, 16821309117,
13410000355, 33410000354, 13410057602, 11820000126, 17089701331,
11820027168, 17336035201, 27336097343, 13410057340, 11820000769,
11820053218, 11820011168, 16206705142, 11820000884, 11820053168,
11820011168, 18066095615, 15230000017, 11820003982, 11820043218,
17199030030, 11820000466, 27336097343, 11820096867, 11820011030,
15230002966, 611969902000, 11820011030, 17289010011, 711820053025,
23410017306, 11820096867, 12210000170, 13410057341, 18382072553,
15230000434, 13410057306, 13410048574, 12556971416, 618516510505,
13410014574, 13410017340, 27336082341, 13410001306, 18200072385,
13410015341, 11820079047, 15230000435, 17336035201, 13410015341,
13410051574, 17289010011, 11820096867, 13410050574, 13410001306,
15230000434, 21820000801, 13410001602, 17089701331, 23410017306,
13410050306, 11820053030, 11820000771, 11820000016, 11820000884,
18200072385, 15230002903, 17143945712, 11820004989, 16206705155,
11820011030, 13410050602, 16821309117, 18769233103, 11820011030,
13410003602, 17199030069, 23410017306, 17336013661, 15230002540,
13410050340, 15230002903, 18769283102, 13410057602, 17336011108,
27336097343, 17199070002, 13410057306, 15230000966, 13410072805,
11820000693, 17336035301, 21820000115, 15230000536, 31820000042,
13410057340, 17143932012, 11820053047, 13410017256, 13410001222,
18769241103, 17199030030, 13410015340, 10948700007, 11820086031,
11820043218, 13410031306, 13410057602, 17199030030, 11820003982,
11820011168, 17336011214, 16206705155, 11820053030, 13410057340,
15230002545, 613598510062, 13410057340, 2710000106005, 13410057306,
11820004990, 18200072518, 17336013343, 18066095615, 11820053218,
13410048574, 13410015306, 11820096867, 13410015340, 18469400001,
13410048574, 11820053218, 13410001340, 11820053168, 18769233103,
13410050306, 13410010602, 15230002545, 18066095615, 11820000106,
11820002992, 11820000693, 17199000048, 13410057306, 11820000771,
13410015341, 17031000009, 13410078574, 27336097343, 21820000647,
13410015341, 13410057256, 31820000697, 15230000017, 13410030341,
13410000175, 16821309117, 11820000771, 21820086167, 613598510062,
13410048505, 13410001306, 13410007306, 13410001505, 11820079047,
18806705542, 37336097341, 12210007500, 13410072805, 18066095615,
11820011047, 13410078574, 31820000697, 18417341130, 16206705155,
11820053168, 13410015341, 13410057306, 13410017256, 18382023473,
15230000435, 613598512001, 14182700712, 13410057340, 13410057509,
11820053168, 11820011218, 15230000434, 15230002966, 13410001602,
17199000027, 13410057306, 13410050340, 13410057341, 15230000434,
13410057602, 11820053047, 15146051460, 27199077008, 13410057340,
13410001306, 23410000005, 11820053218, 11820003982, 23410068505,
11820000833, 17031037037, 11820000466, 16206705155, 11820043218,
11820011030, 27336082341, 11820003982, 23410017306, 11820043218,
17336013302, 13410057341, 17336035201, 17199030005, 11820000884,
18200072385, 13410017505, 11820096418, 15230000540, 11820015168,
715643501201, 16821302112, 613598512001, 11820053168, 11820053047,
13410010505, 13410000554, 21820086167, 15230000416, 13410001340,
11820053030, 13410001340, 11820096867, 23410003505, 11820053218,
23410000005, 18200072385, 15230002545, 23410000005, 11820096867,
11820001991, 21820086167, 13410001602, 13410015341, 13410057602,
13410000355, 13410007306, 13410057602, 18066095615, 18382012368,
12210001640, 15230000434, 13410057340, 13410015256, 28382012393,
13410050306, 11820053047, 11820000891, 13410000559, 11820000466,
18015761194, 11820096418, 11820000891, 11820096418, 17199030030,
13410057509, 18769241103, 11820096867, 16821309117, 16821309117,
11820079047, 27336097343, 2710000106744, 11820000784, 11820000884,
18066095675, 11820096418, 13410015341, 11820053168, 11820053168,
11820096867, 11820004990, 613598510062, 15230000434, 2710000106005,
15230000434, 11820053047, 613598512001, 31820000042, 11820096379,
15230000435, 11820011030, 11820053030, 12210001640, 13410003306,
18200072385, 18417340130, 11820053168, 13410072805, 11820053218,
11820015168, 13410001509, 13410031306, 17089701325, 17199048004,
11820096867, 13410001509, 18549811113, 18066095937, 17336011341,
11820011025, 11820011030, 11820096418, 18066095935, 11820015168,
18200072385, 13410007341, 17336011348, 13410007306, 13410057602,
13410001341, 18769241102, 13410057340, 13410001602, 17199036400,
17289000016, 11820096867, 16821302117, 13410057306, 13410057306,
11820000833, 14182700712, 11820011030, 11820011030, 15230000440
)
EDIT 2:
As was pointed out in the comments to remove the leading zeroes of the below data.
This data is the "correct" data in the correct format. What I am now trying to do is to just remove the leading zeros from each section in the below data. So taking 00-01-18200-00987 would be split into 4 columns as before and the leading zeros removed.
S = 0
G = 1
V = 18200
I = 0987
Data:
IDs <- c("00-01-41827-00712", "00-01-52300-01540", "00-01-18200-00987",
"00-01-83820-07131", "00-01-34100-01222", "00-01-34100-50602",
"00-01-52300-00536", "00-01-42572-05715", "00-01-34100-25574",
"00-01-73360-73149", "00-01-34100-51574", "00-01-34100-07602",
"00-01-89961-00420", "00-01-71990-90029", "00-01-34100-31341",
"00-02-34100-30602", "00-01-34100-17536", "00-01-34100-57602",
"00-01-18200-11047", "00-01-34100-00880", "00-01-34100-07602",
"07-01-67084-27455", "00-01-34100-07340", "00-01-80660-95615",
"00-01-34100-50222", "00-01-34100-15509", "00-01-72311-63009",
"00-01-18200-54028", "06-01-19699-02000", "00-01-73360-35201",
"06-01-85165-10504", "06-01-34986-10003", "00-03-70310-30703",
"00-01-18200-53168", "00-01-18200-01991", "00-01-89961-10120",
"00-01-82000-72385", "00-01-18200-00784", "00-01-71990-30030",
"00-01-72890-00011", "00-01-34100-00622", "00-01-18200-15168",
"00-01-52300-00440", "00-01-34100-00355", "00-01-71990-00048",
"00-01-34100-77435", "00-01-80157-11125", "00-01-52300-01301",
"06-01-85165-10505", "00-01-87692-83102", "00-01-34100-50505",
"00-01-34100-00355", "00-01-52300-00440", "00-01-34100-50340",
"00-01-73360-13343", "00-01-80660-95301", "00-01-34100-14505",
"00-01-34100-59574", "00-01-34100-07306", "00-01-18200-53168",
"00-01-34100-15256", "27-01-00001-06502", "00-01-71990-77828",
"00-01-18200-43218", "00-01-73360-13343", "00-01-72311-63001",
"00-01-18200-00987", "00-01-18200-79047", "00-01-18200-00466",
"00-01-82000-72385", "00-01-34100-57602", "00-02-34100-25505",
"00-01-34100-01341", "00-03-73360-97341", "00-01-18200-00987",
"00-01-34100-00488", "00-01-18200-15168", "00-01-34100-01306",
"00-02-18200-29031", "00-01-34100-48602", "00-01-85498-73837",
"00-02-34100-62509", "00-01-34100-00009", "00-02-34100-17306",
"00-01-18200-00106", "00-01-41827-00712", "00-01-71990-70002",
"00-01-82488-12700", "00-01-72890-00030", "00-01-18200-00956",
"00-01-84173-32130", "00-01-52300-00536", "00-01-80660-95625",
"00-01-22100-00157", "00-01-34100-03306", "00-01-18200-00639",
"00-01-18200-15047", "00-01-85498-73837", "00-01-22100-00170",
"00-01-52300-02540", "00-01-52300-02540", "00-01-34100-68574",
"00-01-34100-03509", "00-01-18200-00978", "00-01-71990-10006",
"00-01-52300-02540", "00-01-18200-01991", "00-03-34100-00354",
"00-01-18200-03982", "07-01-18200-53025", "00-01-18200-03982",
"00-01-72890-00016", "00-01-34100-15509", "00-01-84173-10545",
"00-01-34100-03340", "00-01-71990-48004", "00-01-34100-62340",
"00-01-71990-77828", "00-01-34100-00904", "00-01-71990-00047",
"00-01-87692-10012", "00-01-34100-07341", "00-01-18200-79047",
"00-01-85725-00005", "00-01-52300-00540", "00-01-71990-30030",
"00-01-34100-50574", "00-02-73360-82341", "00-01-34100-57306",
"00-01-72311-63011", "00-01-73360-35201", "00-01-34100-50574",
"00-01-71990-10033", "00-01-71990-00048", "00-01-34100-57536",
"00-01-70897-01331", "00-01-52300-00434", "00-01-71990-48016",
"00-01-34100-31602", "00-01-18200-00834", "00-01-34100-31306",
"00-01-18200-11168", "00-01-34100-00252", "00-02-72890-00012",
"00-01-52300-00022", "00-02-34100-17306", "00-01-52300-00017",
"00-01-82488-12356", "00-01-18200-04989", "00-01-34100-01222",
"00-03-34100-00354", "00-01-34100-14505", "00-01-18200-00933",
"00-01-52300-00416", "00-02-18200-29031", "00-01-18200-00865",
"00-01-82488-12910", "00-01-80660-95625", "00-01-41827-00076",
"00-01-18200-27168", "00-01-34100-53505", "00-01-34100-01340",
"00-01-18200-02989", "00-01-34100-62505", "00-01-73360-50202",
"00-01-34100-01256", "00-01-71250-40205", "00-01-34100-15340",
"00-02-18200-29031", "00-01-72311-63012", "00-03-18200-00697",
"00-02-18200-00166", "00-01-34100-00491", "00-01-52300-02966",
"00-01-22100-00171", "00-01-34100-14574", "00-01-49483-18000",
"00-01-71990-09511", "00-01-34100-50222", "00-02-71250-00019",
"00-01-34100-03509", "00-01-18200-53168", "00-01-34100-57306",
"00-01-34100-17505", "00-02-34100-17306", "00-01-87000-50882",
"00-01-34100-50574", "00-01-83820-12360", "00-01-34100-10505",
"00-01-71990-70002", "00-03-70897-01123", "00-01-18200-00833",
"00-01-34100-57256", "00-01-34100-62340", "07-01-19256-00058",
"00-01-71250-40205", "00-01-09487-00007", "00-01-18200-00833",
"00-01-83820-23473", "00-01-34100-00355", "00-01-34100-01256",
"00-01-71439-34806", "00-01-34100-51306", "00-01-34100-50306",
"06-01-33745-13000", "00-01-34100-00904", "00-01-18200-03982",
"00-01-18200-00769", "00-01-52300-00966", "00-01-52300-00022",
"00-01-52300-00540", "00-01-71990-10074", "00-02-18200-00801",
"00-01-71990-30030", "00-01-18200-96867", "00-02-18200-87418",
"00-01-34100-15222", "00-01-34100-15340", "00-01-87692-40020",
"00-01-18200-00126", "00-01-71439-34806", "00-01-34100-15256",
"00-02-18200-00701", "00-02-73360-82301", "00-01-68213-03112",
"00-01-73360-80301", "00-01-34100-46805", "00-01-18200-11025",
"00-01-34100-53505", "00-02-18200-00647", "00-01-18200-00974",
"00-01-62067-05172", "00-01-71990-30069", "00-01-34100-01528",
"00-02-83820-12393", "00-02-18200-87418", "00-01-34100-01509",
"00-01-34100-57602", "00-01-34100-15509", "00-01-34100-03509",
"00-01-34100-01602", "00-01-34100-50222", "00-01-34100-67505",
"00-01-84173-37133", "00-02-34100-25505", "00-01-18200-00834",
"00-01-71990-00028", "00-01-34100-03602", "00-01-22100-00171",
"00-01-18200-00106", "00-01-83741-10012", "00-01-73360-11348",
"00-01-80660-95935", "00-01-18200-86418", "00-01-22100-01640",
"00-01-84173-32130", "00-01-71990-48016", "00-01-62067-05172",
"00-01-18200-00891", "00-01-52300-00022", "00-01-34100-62340",
"00-01-34100-50306", "00-01-34100-17256", "00-01-34100-57306",
"00-01-62067-05172", "00-01-85725-11508", "00-03-18200-00697",
"00-01-34100-01505", "00-01-18200-00466", "00-01-34100-00271",
"00-01-18200-43218", "00-01-70897-01331", "00-01-18200-00974",
"00-03-34100-00304", "00-02-34100-00005", "00-01-80157-11016",
"00-01-34100-57256", "00-01-34100-17505", "06-01-13008-71310",
"00-01-34100-57306", "00-01-34100-00559", "00-01-52300-02540",
"00-01-82054-80441", "00-01-71990-10033", "00-02-73360-82341",
"00-01-83820-12360", "00-02-18200-00166", "00-01-18200-00834",
"00-01-62067-05172", "00-01-52300-02903", "00-02-34100-17306",
"00-01-80660-95937", "00-01-52300-00536", "00-01-34100-77435",
"00-01-70310-37037", "00-01-73360-35201", "00-01-34100-57306",
"00-01-18200-61047", "00-01-62067-38072", "00-01-34100-50574",
"07-01-19256-00054", "00-01-34100-62505", "00-02-83741-00006",
"00-03-70897-01123", "00-01-34100-57341", "00-01-34100-25574",
"00-01-34100-00554", "00-03-18200-00042", "06-01-35985-00016",
"00-01-34100-15340", "00-01-18200-04990", "00-01-73360-50661",
"00-01-52300-00022", "00-01-34100-50340", "00-02-18200-00801",
"00-01-18200-00769", "00-03-34100-00354", "00-01-49483-11200",
"00-01-73360-35301", "00-01-34100-50602", "07-02-39165-00125",
"00-01-71990-10074", "00-01-70897-01331", "00-01-71439-22033",
"00-02-82488-00006", "00-01-18200-00670", "06-01-35985-00016",
"00-01-71990-48016", "00-01-22100-07500", "00-01-34100-17602",
"00-01-73360-11214", "00-01-34100-10602", "00-01-18200-11168",
"00-01-34100-31306", "00-01-18200-00468", "00-02-82488-00006",
"00-01-87692-10012", "00-02-82488-00006", "00-01-18200-79047",
"00-01-87692-30040", "00-01-34100-01509", "00-02-83741-00006",
"27-01-00001-06505", "06-01-85165-10505", "00-01-18200-86418",
"00-01-18200-53168", "00-01-34100-67602", "00-01-80660-95625",
"00-01-71990-00048", "00-01-62067-05155", "00-01-71990-48004",
"00-01-18200-61047", "00-01-18200-00313", "00-02-83820-12393",
"00-01-71990-77828", "00-01-18200-00126", "00-01-71990-30030",
"00-01-34100-01602", "00-01-82488-12345", "00-01-71670-04064",
"00-01-34100-03306", "00-01-18200-00964", "00-01-34100-50505",
"00-01-18200-00974", "06-01-85165-10707", "00-02-18200-29031",
"00-01-68213-03112", "00-01-34100-10505", "00-01-18200-04989",
"00-01-34100-17505", "00-01-72890-00020", "00-01-72311-63011",
"00-01-34100-01222", "00-01-84173-32130", "07-01-60890-95602",
"00-01-70897-18331", "00-01-72890-00020", "00-01-87692-27102",
"06-01-35985-12001", "00-01-73360-35301", "00-01-70897-01331",
"00-01-18200-04990", "00-01-18200-00769", "00-01-18200-04997",
"00-01-70897-01125", "00-01-18200-41218", "00-01-18200-92867",
"00-04-34100-00152", "00-01-18200-53218", "00-01-34100-10505",
"00-01-84694-00001", "00-01-34100-62340", "00-01-52300-00435",
"00-01-34100-25602", "00-01-34100-62340", "00-01-62067-05155",
"00-01-34100-50505", "00-01-18200-79047", "00-01-34100-00555",
"00-01-18200-00466", "07-01-18200-53025", "00-01-71990-00007",
"00-01-34100-07341", "00-01-89961-00120", "06-01-19699-00006",
"00-02-18200-86167", "00-01-71439-22033", "00-01-09487-00007",
"00-01-72311-63009", "00-01-73360-11214", "00-01-42572-05715",
"00-01-34100-50340", "00-01-34100-31341", "00-02-22100-02500",
"00-02-80660-95785", "00-01-71990-70002", "07-01-98373-12603",
"00-01-18200-00865", "00-01-71990-00027", "00-01-85498-73837",
"00-02-71250-00019", "00-01-80660-95615", "00-02-70310-30701",
"00-01-85498-12345", "00-01-18200-86031", "00-01-87692-33103",
"00-01-62067-05155", "00-01-18200-53218", "00-01-87000-50901",
"00-01-71990-48016", "00-01-73360-11214", "00-01-34100-00579",
"00-01-34100-62340", "00-01-87692-10012", "00-01-34100-62340",
"00-01-70310-00012", "00-01-18200-00016", "00-01-80157-61147",
"00-01-18200-04997", "00-01-18200-00784", "00-01-71439-45712",
"00-01-18200-00833", "00-01-71990-77603", "00-01-34100-15340",
"00-01-71990-30030", "00-01-18200-61047", "00-01-34100-30306",
"00-01-34100-15505", "00-03-18200-00697", "00-04-25569-19231",
"00-01-18200-04997", "00-01-34100-15602", "00-01-71990-47712",
"00-01-22100-01640", "00-01-34100-15256", "06-01-85165-10502",
"00-01-71990-30005", "00-02-18200-29031", "00-02-71250-00019",
"06-01-35985-10062", "06-01-19699-00002", "00-01-18200-00468",
"00-01-34100-17505", "00-02-71990-77005", "00-01-34100-80706",
"00-02-18200-00801", "00-01-34100-48602", "00-01-34100-00904",
"00-01-73360-50202", "00-01-34100-30306", "00-01-89961-00120",
"00-01-34100-10602", "00-01-34100-03306", "00-02-72890-00012",
"00-01-62067-05142", "00-01-18200-53168", "00-01-34100-77435",
"00-01-34100-48574", "00-01-72890-00011", "00-01-83820-07531",
"00-01-34100-01222", "07-01-18200-53025", "00-01-62067-04955",
"00-01-18200-79047", "00-03-41827-00046", "00-01-18200-15047",
"06-01-85165-10106", "00-02-18200-87418", "00-02-18200-29031",
"00-01-18200-00773", "00-01-82488-13000", "00-01-73360-13343",
"00-01-62067-38055", "00-01-34100-50222", "00-01-71990-00008",
"00-01-85498-73837", "00-01-34100-00009", "00-01-71990-90029",
"00-01-34100-00009", "00-01-34100-01509")
EDIT 3:
Using the data in EDIT 2: I have the following examples.
00-01-34100-01509 which is one of the IDs in the second edit data. This should collapse to 1341001509.
Example 2:
00-01-62067-05155 should collapse to 16206705155
Example 3: 00-01-82488-12356 should collapse to 18248812356
Example 4: 06-01-19699-00002 should collapse to 611969900002
Example 5: 00-01-09439-00008 should collapse to 10943900008
Example 6: 00-01-09439-00008 should collapse to 10943900008
The common theme here is that it is just the first leading zeros being removed. That is the leading zeros in S and G.
So what I am now trying to do is to gsub the IDs data removing the - so I will have data which looks like the following (taking example 6) - 00010943900008 then from here remove the leading zeros so the data becomes 10943900008. Which is much simpler than what I had previously thought.
EDIT 4:
When I run my version
I get the following consol output:
> df_panel$COLUPC <- gsub("-","",df_panel$UPC)
> df_panel$COLUPC <- sub("^[0]+", "", df_panel$COLUPC)
> beer_PANEL_GR$COLUPCmatch <- beer_PANEL_GR$COLUPC %in% df_panel$COLUPC
> sum(beer_PANEL_GR$COLUPCmatch == FALSE)
[1] 896
> sum(beer_PANEL_GR$COLUPCmatch == TRUE)
[1] 19119
>
> beer_PANEL_GR$COLUPC <- as.character(beer_PANEL_GR$COLUPC)
> df <- full_join(df_panel, beer_PANEL_GR, by = "COLUPC") #Joining with UPC causes us to lose a lot of observations
> dim(df)
[1] 5293488 40
When I run your version I get the following consol output:
> # remove 0s at the beginning of the string, or preceded by "-"
> df_panel$COLUPC <- gsub("(?<=^|-)0","", df_panel$UPC, perl = TRUE)
>
> # remove dashes
> df_panel$COLUPC <- gsub("-", "", df_panel$COLUPC)
> # remove leading zeros
> df_panel$COLUPC <- gsub("^0+", "", df_panel$COLUPC)
>
> beer_PANEL_GR$COLUPCmatch <- beer_PANEL_GR$COLUPC %in% df_panel$COLUPC
> sum(beer_PANEL_GR$COLUPCmatch == FALSE)
[1] 7382
> sum(beer_PANEL_GR$COLUPCmatch == TRUE)
[1] 12633
>
> df2 <- full_join(df_panel, beer_PANEL_GR, by = "COLUPC")
> dim(df2)
[1] 3564132 40
Addressing your edit, how about:
library(dplyr)
# remove 0s at the beginning of the string, or preceded by "-"
gsub("(?<=^|-)0","", IDs, perl = TRUE) %>%
# remove dashes
gsub("-", "", .) %>%
# remove leading zeros
gsub("^0+", "", .)
[1] "1418270712" "1523001540" "1182000987" "1838207131" "1341001222"
[6] "13410050602"
I have a column in a data frame (factor type) that I want to convert to POSIXlt using strptime(). What I'm doing is:
file[, 2] <- strptime(file[, 2], format="%H:%M:%S")
Examples of values in the column are: 17:24:00 17:25:00 17:26:00
However, the function returns all 0s. Is the format specified incorrectly? Any help would be appreciated.
Here's the output of dput(head(file[,2])):
structure(1045:1050, .Label = c("00:00:00", "00:01:00", "00:02:00",
"00:03:00", "00:04:00", "00:05:00", "00:06:00", "00:07:00", "00:08:00",
"00:09:00", "00:10:00", "00:11:00", "00:12:00", "00:13:00", "00:14:00",
"00:15:00", "00:16:00", "00:17:00", "00:18:00", "00:19:00", "00:20:00",
"00:21:00", "00:22:00", "00:23:00", "00:24:00", "00:25:00", "00:26:00",
"00:27:00", "00:28:00", "00:29:00", "00:30:00", "00:31:00", "00:32:00",
"00:33:00", "00:34:00", "00:35:00", "00:36:00", "00:37:00", "00:38:00",
"00:39:00", "00:40:00", "00:41:00", "00:42:00", "00:43:00", "00:44:00",
"00:45:00", "00:46:00", "00:47:00", "00:48:00", "00:49:00", "00:50:00",
"00:51:00", "00:52:00", "00:53:00", "00:54:00", "00:55:00", "00:56:00",
"00:57:00", "00:58:00", "00:59:00", "01:00:00", "01:01:00", "01:02:00",
"01:03:00", "01:04:00", "01:05:00", "01:06:00", "01:07:00", "01:08:00",
"01:09:00", "01:10:00", "01:11:00", "01:12:00", "01:13:00", "01:14:00",
"01:15:00", "01:16:00", "01:17:00", "01:18:00", "01:19:00", "01:20:00",
"01:21:00", "01:22:00", "01:23:00", "01:24:00", "01:25:00", "01:26:00",
"01:27:00", "01:28:00", "01:29:00", "01:30:00", "01:31:00", "01:32:00",
"01:33:00", "01:34:00", "01:35:00", "01:36:00", "01:37:00", "01:38:00",
"01:39:00", "01:40:00", "01:41:00", "01:42:00", "01:43:00", "01:44:00",
"01:45:00", "01:46:00", "01:47:00", "01:48:00", "01:49:00", "01:50:00",
"01:51:00", "01:52:00", "01:53:00", "01:54:00", "01:55:00", "01:56:00",
"01:57:00", "01:58:00", "01:59:00", "02:00:00", "02:01:00", "02:02:00",
"02:03:00", "02:04:00", "02:05:00", "02:06:00", "02:07:00", "02:08:00",
"02:09:00", "02:10:00", "02:11:00", "02:12:00", "02:13:00", "02:14:00",
"02:15:00", "02:16:00", "02:17:00", "02:18:00", "02:19:00", "02:20:00",
"02:21:00", "02:22:00", "02:23:00", "02:24:00", "02:25:00", "02:26:00",
"02:27:00", "02:28:00", "02:29:00", "02:30:00", "02:31:00", "02:32:00",
"02:33:00", "02:34:00", "02:35:00", "02:36:00", "02:37:00", "02:38:00",
"02:39:00", "02:40:00", "02:41:00", "02:42:00", "02:43:00", "02:44:00",
"02:45:00", "02:46:00", "02:47:00", "02:48:00", "02:49:00", "02:50:00",
"02:51:00", "02:52:00", "02:53:00", "02:54:00", "02:55:00", "02:56:00",
"02:57:00", "02:58:00", "02:59:00", "03:00:00", "03:01:00", "03:02:00",
"03:03:00", "03:04:00", "03:05:00", "03:06:00", "03:07:00", "03:08:00",
"03:09:00", "03:10:00", "03:11:00", "03:12:00", "03:13:00", "03:14:00",
"03:15:00", "03:16:00", "03:17:00", "03:18:00", "03:19:00", "03:20:00",
"03:21:00", "03:22:00", "03:23:00", "03:24:00", "03:25:00", "03:26:00",
"03:27:00", "03:28:00", "03:29:00", "03:30:00", "03:31:00", "03:32:00",
"03:33:00", "03:34:00", "03:35:00", "03:36:00", "03:37:00", "03:38:00",
"03:39:00", "03:40:00", "03:41:00", "03:42:00", "03:43:00", "03:44:00",
"03:45:00", "03:46:00", "03:47:00", "03:48:00", "03:49:00", "03:50:00",
"03:51:00", "03:52:00", "03:53:00", "03:54:00", "03:55:00", "03:56:00",
"03:57:00", "03:58:00", "03:59:00", "04:00:00", "04:01:00", "04:02:00",
"04:03:00", "04:04:00", "04:05:00", "04:06:00", "04:07:00", "04:08:00",
"04:09:00", "04:10:00", "04:11:00", "04:12:00", "04:13:00", "04:14:00",
"04:15:00", "04:16:00", "04:17:00", "04:18:00", "04:19:00", "04:20:00",
"04:21:00", "04:22:00", "04:23:00", "04:24:00", "04:25:00", "04:26:00",
"04:27:00", "04:28:00", "04:29:00", "04:30:00", "04:31:00", "04:32:00",
"04:33:00", "04:34:00", "04:35:00", "04:36:00", "04:37:00", "04:38:00",
"04:39:00", "04:40:00", "04:41:00", "04:42:00", "04:43:00", "04:44:00",
"04:45:00", "04:46:00", "04:47:00", "04:48:00", "04:49:00", "04:50:00",
"04:51:00", "04:52:00", "04:53:00", "04:54:00", "04:55:00", "04:56:00",
"04:57:00", "04:58:00", "04:59:00", "05:00:00", "05:01:00", "05:02:00",
"05:03:00", "05:04:00", "05:05:00", "05:06:00", "05:07:00", "05:08:00",
"05:09:00", "05:10:00", "05:11:00", "05:12:00", "05:13:00", "05:14:00",
"05:15:00", "05:16:00", "05:17:00", "05:18:00", "05:19:00", "05:20:00",
"05:21:00", "05:22:00", "05:23:00", "05:24:00", "05:25:00", "05:26:00",
"05:27:00", "05:28:00", "05:29:00", "05:30:00", "05:31:00", "05:32:00",
"05:33:00", "05:34:00", "05:35:00", "05:36:00", "05:37:00", "05:38:00",
"05:39:00", "05:40:00", "05:41:00", "05:42:00", "05:43:00", "05:44:00",
"05:45:00", "05:46:00", "05:47:00", "05:48:00", "05:49:00", "05:50:00",
"05:51:00", "05:52:00", "05:53:00", "05:54:00", "05:55:00", "05:56:00",
"05:57:00", "05:58:00", "05:59:00", "06:00:00", "06:01:00", "06:02:00",
"06:03:00", "06:04:00", "06:05:00", "06:06:00", "06:07:00", "06:08:00",
"06:09:00", "06:10:00", "06:11:00", "06:12:00", "06:13:00", "06:14:00",
"06:15:00", "06:16:00", "06:17:00", "06:18:00", "06:19:00", "06:20:00",
"06:21:00", "06:22:00", "06:23:00", "06:24:00", "06:25:00", "06:26:00",
"06:27:00", "06:28:00", "06:29:00", "06:30:00", "06:31:00", "06:32:00",
"06:33:00", "06:34:00", "06:35:00", "06:36:00", "06:37:00", "06:38:00",
"06:39:00", "06:40:00", "06:41:00", "06:42:00", "06:43:00", "06:44:00",
"06:45:00", "06:46:00", "06:47:00", "06:48:00", "06:49:00", "06:50:00",
"06:51:00", "06:52:00", "06:53:00", "06:54:00", "06:55:00", "06:56:00",
"06:57:00", "06:58:00", "06:59:00", "07:00:00", "07:01:00", "07:02:00",
"07:03:00", "07:04:00", "07:05:00", "07:06:00", "07:07:00", "07:08:00",
"07:09:00", "07:10:00", "07:11:00", "07:12:00", "07:13:00", "07:14:00",
"07:15:00", "07:16:00", "07:17:00", "07:18:00", "07:19:00", "07:20:00",
"07:21:00", "07:22:00", "07:23:00", "07:24:00", "07:25:00", "07:26:00",
"07:27:00", "07:28:00", "07:29:00", "07:30:00", "07:31:00", "07:32:00",
"07:33:00", "07:34:00", "07:35:00", "07:36:00", "07:37:00", "07:38:00",
"07:39:00", "07:40:00", "07:41:00", "07:42:00", "07:43:00", "07:44:00",
"07:45:00", "07:46:00", "07:47:00", "07:48:00", "07:49:00", "07:50:00",
"07:51:00", "07:52:00", "07:53:00", "07:54:00", "07:55:00", "07:56:00",
"07:57:00", "07:58:00", "07:59:00", "08:00:00", "08:01:00", "08:02:00",
"08:03:00", "08:04:00", "08:05:00", "08:06:00", "08:07:00", "08:08:00",
"08:09:00", "08:10:00", "08:11:00", "08:12:00", "08:13:00", "08:14:00",
"08:15:00", "08:16:00", "08:17:00", "08:18:00", "08:19:00", "08:20:00",
"08:21:00", "08:22:00", "08:23:00", "08:24:00", "08:25:00", "08:26:00",
"08:27:00", "08:28:00", "08:29:00", "08:30:00", "08:31:00", "08:32:00",
"08:33:00", "08:34:00", "08:35:00", "08:36:00", "08:37:00", "08:38:00",
"08:39:00", "08:40:00", "08:41:00", "08:42:00", "08:43:00", "08:44:00",
"08:45:00", "08:46:00", "08:47:00", "08:48:00", "08:49:00", "08:50:00",
"08:51:00", "08:52:00", "08:53:00", "08:54:00", "08:55:00", "08:56:00",
"08:57:00", "08:58:00", "08:59:00", "09:00:00", "09:01:00", "09:02:00",
"09:03:00", "09:04:00", "09:05:00", "09:06:00", "09:07:00", "09:08:00",
"09:09:00", "09:10:00", "09:11:00", "09:12:00", "09:13:00", "09:14:00",
"09:15:00", "09:16:00", "09:17:00", "09:18:00", "09:19:00", "09:20:00",
"09:21:00", "09:22:00", "09:23:00", "09:24:00", "09:25:00", "09:26:00",
"09:27:00", "09:28:00", "09:29:00", "09:30:00", "09:31:00", "09:32:00",
"09:33:00", "09:34:00", "09:35:00", "09:36:00", "09:37:00", "09:38:00",
"09:39:00", "09:40:00", "09:41:00", "09:42:00", "09:43:00", "09:44:00",
"09:45:00", "09:46:00", "09:47:00", "09:48:00", "09:49:00", "09:50:00",
"09:51:00", "09:52:00", "09:53:00", "09:54:00", "09:55:00", "09:56:00",
"09:57:00", "09:58:00", "09:59:00", "10:00:00", "10:01:00", "10:02:00",
"10:03:00", "10:04:00", "10:05:00", "10:06:00", "10:07:00", "10:08:00",
"10:09:00", "10:10:00", "10:11:00", "10:12:00", "10:13:00", "10:14:00",
"10:15:00", "10:16:00", "10:17:00", "10:18:00", "10:19:00", "10:20:00",
"10:21:00", "10:22:00", "10:23:00", "10:24:00", "10:25:00", "10:26:00",
"10:27:00", "10:28:00", "10:29:00", "10:30:00", "10:31:00", "10:32:00",
"10:33:00", "10:34:00", "10:35:00", "10:36:00", "10:37:00", "10:38:00",
"10:39:00", "10:40:00", "10:41:00", "10:42:00", "10:43:00", "10:44:00",
"10:45:00", "10:46:00", "10:47:00", "10:48:00", "10:49:00", "10:50:00",
"10:51:00", "10:52:00", "10:53:00", "10:54:00", "10:55:00", "10:56:00",
"10:57:00", "10:58:00", "10:59:00", "11:00:00", "11:01:00", "11:02:00",
"11:03:00", "11:04:00", "11:05:00", "11:06:00", "11:07:00", "11:08:00",
"11:09:00", "11:10:00", "11:11:00", "11:12:00", "11:13:00", "11:14:00",
"11:15:00", "11:16:00", "11:17:00", "11:18:00", "11:19:00", "11:20:00",
"11:21:00", "11:22:00", "11:23:00", "11:24:00", "11:25:00", "11:26:00",
"11:27:00", "11:28:00", "11:29:00", "11:30:00", "11:31:00", "11:32:00",
"11:33:00", "11:34:00", "11:35:00", "11:36:00", "11:37:00", "11:38:00",
"11:39:00", "11:40:00", "11:41:00", "11:42:00", "11:43:00", "11:44:00",
"11:45:00", "11:46:00", "11:47:00", "11:48:00", "11:49:00", "11:50:00",
"11:51:00", "11:52:00", "11:53:00", "11:54:00", "11:55:00", "11:56:00",
"11:57:00", "11:58:00", "11:59:00", "12:00:00", "12:01:00", "12:02:00",
"12:03:00", "12:04:00", "12:05:00", "12:06:00", "12:07:00", "12:08:00",
"12:09:00", "12:10:00", "12:11:00", "12:12:00", "12:13:00", "12:14:00",
"12:15:00", "12:16:00", "12:17:00", "12:18:00", "12:19:00", "12:20:00",
"12:21:00", "12:22:00", "12:23:00", "12:24:00", "12:25:00", "12:26:00",
"12:27:00", "12:28:00", "12:29:00", "12:30:00", "12:31:00", "12:32:00",
"12:33:00", "12:34:00", "12:35:00", "12:36:00", "12:37:00", "12:38:00",
"12:39:00", "12:40:00", "12:41:00", "12:42:00", "12:43:00", "12:44:00",
"12:45:00", "12:46:00", "12:47:00", "12:48:00", "12:49:00", "12:50:00",
"12:51:00", "12:52:00", "12:53:00", "12:54:00", "12:55:00", "12:56:00",
"12:57:00", "12:58:00", "12:59:00", "13:00:00", "13:01:00", "13:02:00",
"13:03:00", "13:04:00", "13:05:00", "13:06:00", "13:07:00", "13:08:00",
"13:09:00", "13:10:00", "13:11:00", "13:12:00", "13:13:00", "13:14:00",
"13:15:00", "13:16:00", "13:17:00", "13:18:00", "13:19:00", "13:20:00",
"13:21:00", "13:22:00", "13:23:00", "13:24:00", "13:25:00", "13:26:00",
"13:27:00", "13:28:00", "13:29:00", "13:30:00", "13:31:00", "13:32:00",
"13:33:00", "13:34:00", "13:35:00", "13:36:00", "13:37:00", "13:38:00",
"13:39:00", "13:40:00", "13:41:00", "13:42:00", "13:43:00", "13:44:00",
"13:45:00", "13:46:00", "13:47:00", "13:48:00", "13:49:00", "13:50:00",
"13:51:00", "13:52:00", "13:53:00", "13:54:00", "13:55:00", "13:56:00",
"13:57:00", "13:58:00", "13:59:00", "14:00:00", "14:01:00", "14:02:00",
"14:03:00", "14:04:00", "14:05:00", "14:06:00", "14:07:00", "14:08:00",
"14:09:00", "14:10:00", "14:11:00", "14:12:00", "14:13:00", "14:14:00",
"14:15:00", "14:16:00", "14:17:00", "14:18:00", "14:19:00", "14:20:00",
"14:21:00", "14:22:00", "14:23:00", "14:24:00", "14:25:00", "14:26:00",
"14:27:00", "14:28:00", "14:29:00", "14:30:00", "14:31:00", "14:32:00",
"14:33:00", "14:34:00", "14:35:00", "14:36:00", "14:37:00", "14:38:00",
"14:39:00", "14:40:00", "14:41:00", "14:42:00", "14:43:00", "14:44:00",
"14:45:00", "14:46:00", "14:47:00", "14:48:00", "14:49:00", "14:50:00",
"14:51:00", "14:52:00", "14:53:00", "14:54:00", "14:55:00", "14:56:00",
"14:57:00", "14:58:00", "14:59:00", "15:00:00", "15:01:00", "15:02:00",
"15:03:00", "15:04:00", "15:05:00", "15:06:00", "15:07:00", "15:08:00",
"15:09:00", "15:10:00", "15:11:00", "15:12:00", "15:13:00", "15:14:00",
"15:15:00", "15:16:00", "15:17:00", "15:18:00", "15:19:00", "15:20:00",
"15:21:00", "15:22:00", "15:23:00", "15:24:00", "15:25:00", "15:26:00",
"15:27:00", "15:28:00", "15:29:00", "15:30:00", "15:31:00", "15:32:00",
"15:33:00", "15:34:00", "15:35:00", "15:36:00", "15:37:00", "15:38:00",
"15:39:00", "15:40:00", "15:41:00", "15:42:00", "15:43:00", "15:44:00",
"15:45:00", "15:46:00", "15:47:00", "15:48:00", "15:49:00", "15:50:00",
"15:51:00", "15:52:00", "15:53:00", "15:54:00", "15:55:00", "15:56:00",
"15:57:00", "15:58:00", "15:59:00", "16:00:00", "16:01:00", "16:02:00",
"16:03:00", "16:04:00", "16:05:00", "16:06:00", "16:07:00", "16:08:00",
"16:09:00", "16:10:00", "16:11:00", "16:12:00", "16:13:00", "16:14:00",
"16:15:00", "16:16:00", "16:17:00", "16:18:00", "16:19:00", "16:20:00",
"16:21:00", "16:22:00", "16:23:00", "16:24:00", "16:25:00", "16:26:00",
"16:27:00", "16:28:00", "16:29:00", "16:30:00", "16:31:00", "16:32:00",
"16:33:00", "16:34:00", "16:35:00", "16:36:00", "16:37:00", "16:38:00",
"16:39:00", "16:40:00", "16:41:00", "16:42:00", "16:43:00", "16:44:00",
"16:45:00", "16:46:00", "16:47:00", "16:48:00", "16:49:00", "16:50:00",
"16:51:00", "16:52:00", "16:53:00", "16:54:00", "16:55:00", "16:56:00",
"16:57:00", "16:58:00", "16:59:00", "17:00:00", "17:01:00", "17:02:00",
"17:03:00", "17:04:00", "17:05:00", "17:06:00", "17:07:00", "17:08:00",
"17:09:00", "17:10:00", "17:11:00", "17:12:00", "17:13:00", "17:14:00",
"17:15:00", "17:16:00", "17:17:00", "17:18:00", "17:19:00", "17:20:00",
"17:21:00", "17:22:00", "17:23:00", "17:24:00", "17:25:00", "17:26:00",
"17:27:00", "17:28:00", "17:29:00", "17:30:00", "17:31:00", "17:32:00",
"17:33:00", "17:34:00", "17:35:00", "17:36:00", "17:37:00", "17:38:00",
"17:39:00", "17:40:00", "17:41:00", "17:42:00", "17:43:00", "17:44:00",
"17:45:00", "17:46:00", "17:47:00", "17:48:00", "17:49:00", "17:50:00",
"17:51:00", "17:52:00", "17:53:00", "17:54:00", "17:55:00", "17:56:00",
"17:57:00", "17:58:00", "17:59:00", "18:00:00", "18:01:00", "18:02:00",
"18:03:00", "18:04:00", "18:05:00", "18:06:00", "18:07:00", "18:08:00",
"18:09:00", "18:10:00", "18:11:00", "18:12:00", "18:13:00", "18:14:00",
"18:15:00", "18:16:00", "18:17:00", "18:18:00", "18:19:00", "18:20:00",
"18:21:00", "18:22:00", "18:23:00", "18:24:00", "18:25:00", "18:26:00",
"18:27:00", "18:28:00", "18:29:00", "18:30:00", "18:31:00", "18:32:00",
"18:33:00", "18:34:00", "18:35:00", "18:36:00", "18:37:00", "18:38:00",
"18:39:00", "18:40:00", "18:41:00", "18:42:00", "18:43:00", "18:44:00",
"18:45:00", "18:46:00", "18:47:00", "18:48:00", "18:49:00", "18:50:00",
"18:51:00", "18:52:00", "18:53:00", "18:54:00", "18:55:00", "18:56:00",
"18:57:00", "18:58:00", "18:59:00", "19:00:00", "19:01:00", "19:02:00",
"19:03:00", "19:04:00", "19:05:00", "19:06:00", "19:07:00", "19:08:00",
"19:09:00", "19:10:00", "19:11:00", "19:12:00", "19:13:00", "19:14:00",
"19:15:00", "19:16:00", "19:17:00", "19:18:00", "19:19:00", "19:20:00",
"19:21:00", "19:22:00", "19:23:00", "19:24:00", "19:25:00", "19:26:00",
"19:27:00", "19:28:00", "19:29:00", "19:30:00", "19:31:00", "19:32:00",
"19:33:00", "19:34:00", "19:35:00", "19:36:00", "19:37:00", "19:38:00",
"19:39:00", "19:40:00", "19:41:00", "19:42:00", "19:43:00", "19:44:00",
"19:45:00", "19:46:00", "19:47:00", "19:48:00", "19:49:00", "19:50:00",
"19:51:00", "19:52:00", "19:53:00", "19:54:00", "19:55:00", "19:56:00",
"19:57:00", "19:58:00", "19:59:00", "20:00:00", "20:01:00", "20:02:00",
"20:03:00", "20:04:00", "20:05:00", "20:06:00", "20:07:00", "20:08:00",
"20:09:00", "20:10:00", "20:11:00", "20:12:00", "20:13:00", "20:14:00",
"20:15:00", "20:16:00", "20:17:00", "20:18:00", "20:19:00", "20:20:00",
"20:21:00", "20:22:00", "20:23:00", "20:24:00", "20:25:00", "20:26:00",
"20:27:00", "20:28:00", "20:29:00", "20:30:00", "20:31:00", "20:32:00",
"20:33:00", "20:34:00", "20:35:00", "20:36:00", "20:37:00", "20:38:00",
"20:39:00", "20:40:00", "20:41:00", "20:42:00", "20:43:00", "20:44:00",
"20:45:00", "20:46:00", "20:47:00", "20:48:00", "20:49:00", "20:50:00",
"20:51:00", "20:52:00", "20:53:00", "20:54:00", "20:55:00", "20:56:00",
"20:57:00", "20:58:00", "20:59:00", "21:00:00", "21:01:00", "21:02:00",
"21:03:00", "21:04:00", "21:05:00", "21:06:00", "21:07:00", "21:08:00",
"21:09:00", "21:10:00", "21:11:00", "21:12:00", "21:13:00", "21:14:00",
"21:15:00", "21:16:00", "21:17:00", "21:18:00", "21:19:00", "21:20:00",
"21:21:00", "21:22:00", "21:23:00", "21:24:00", "21:25:00", "21:26:00",
"21:27:00", "21:28:00", "21:29:00", "21:30:00", "21:31:00", "21:32:00",
"21:33:00", "21:34:00", "21:35:00", "21:36:00", "21:37:00", "21:38:00",
"21:39:00", "21:40:00", "21:41:00", "21:42:00", "21:43:00", "21:44:00",
"21:45:00", "21:46:00", "21:47:00", "21:48:00", "21:49:00", "21:50:00",
"21:51:00", "21:52:00", "21:53:00", "21:54:00", "21:55:00", "21:56:00",
"21:57:00", "21:58:00", "21:59:00", "22:00:00", "22:01:00", "22:02:00",
"22:03:00", "22:04:00", "22:05:00", "22:06:00", "22:07:00", "22:08:00",
"22:09:00", "22:10:00", "22:11:00", "22:12:00", "22:13:00", "22:14:00",
"22:15:00", "22:16:00", "22:17:00", "22:18:00", "22:19:00", "22:20:00",
"22:21:00", "22:22:00", "22:23:00", "22:24:00", "22:25:00", "22:26:00",
"22:27:00", "22:28:00", "22:29:00", "22:30:00", "22:31:00", "22:32:00",
"22:33:00", "22:34:00", "22:35:00", "22:36:00", "22:37:00", "22:38:00",
"22:39:00", "22:40:00", "22:41:00", "22:42:00", "22:43:00", "22:44:00",
"22:45:00", "22:46:00", "22:47:00", "22:48:00", "22:49:00", "22:50:00",
"22:51:00", "22:52:00", "22:53:00", "22:54:00", "22:55:00", "22:56:00",
"22:57:00", "22:58:00", "22:59:00", "23:00:00", "23:01:00", "23:02:00",
"23:03:00", "23:04:00", "23:05:00", "23:06:00", "23:07:00", "23:08:00",
"23:09:00", "23:10:00", "23:11:00", "23:12:00", "23:13:00", "23:14:00",
"23:15:00", "23:16:00", "23:17:00", "23:18:00", "23:19:00", "23:20:00",
"23:21:00", "23:22:00", "23:23:00", "23:24:00", "23:25:00", "23:26:00",
"23:27:00", "23:28:00", "23:29:00", "23:30:00", "23:31:00", "23:32:00",
"23:33:00", "23:34:00", "23:35:00", "23:36:00", "23:37:00", "23:38:00",
"23:39:00", "23:40:00", "23:41:00", "23:42:00", "23:43:00", "23:44:00",
"23:45:00", "23:46:00", "23:47:00", "23:48:00", "23:49:00", "23:50:00",
"23:51:00", "23:52:00", "23:53:00", "23:54:00", "23:55:00", "23:56:00",
"23:57:00", "23:58:00", "23:59:00"), class = "factor")
Use [[ instead of [ and it will work:
file[[2]] <- strptime(file[[2]], "%X")
The result:
[1] "2014-06-04 17:24:00" "2014-06-04 17:25:00"
[3] "2014-06-04 17:26:00" "2014-06-04 17:27:00"
[5] "2014-06-04 17:28:00" "2014-06-04 17:29:00"
By the way, "%X" is a short alternative for "%H:%M:%S".
Have a list of text-sections which are required to be split into sentences by:
> textList <- list(sections=sections[(length(sections)-2):length(sections)])
> textList$sentences <- sapply(textList$sections, function(x) strsplit(as.character(x), "(?<=und/KON)\\s(?!\\S+/V)|(?<=oder/KON)\\s|(?<=/\\$[[:punct:]])\\s(?!dass/KOUS)(?!dann/ADV)(?!weil/KOUS)", perl=TRUE))
> sent <- textList$sentences
The final goal is to add IDs to all sentences and arrange them together into a list of dataframes --one dataframe corresponding to each section.
> sent.list <- lapply(seq_along(sent), function(i)
+ data.frame(ID=paste(sprintf("%02d", i), sprintf("%03d", seq_along(sent[[i]])), sep = ""),
+ Sentence=sent[[i]]))
Error in data.frame(ID = paste(sprintf("%02d", i), sprintf("%03d", seq_along(sent[[i]])), :
arguments imply differing number of rows: 1, 0
ISSUE: However I try to variate the split in the first step, somehow it seems I get a list with exactly one character(0) element (the last one). This hinders the execution of the second step --creating the list of dataframes-- with the error above.
Please note that the structure of the list seems somehow corrupted. Downwards --R console copy-paste-- the first two sections are beginning (at #*) with $... #* (which btw. I cannot interpret meaningfully). However, the third section (at #**) starts with [[3]].
> sent
$... #*
[1] "Das/ART Spiel/NN besteht/VVFIN aus/APPR mehreren/PIAT Früchten/NN -LRB-/TRUNC rote/ADJA Kirschen/NN ,/$,"
.
.
.
[51] "-RRB-/TRUNC sie/PPER bleiben/VVFIN die/ART ganze/ADJA Zeit/NN über/APPR konzetriert/ADJD bei/APPR der/ART Sache/NN ./$."
[52] "Das/ART Spiel/NN ist/VAFIN eine/ART absolue/ADJA Kaufempfehlung/NN !!!!/CARD "
$... #*
[1] "Obstgarten/NN ist/VAFIN DAS/NE Einsteigerspiel/NN für/APPR Kinder/NN ab/APPR zwei/CARD Jahren/NN ./$."
.
.
.
[36] "hochgelobten/ADJA Klassiker/NN werden/VAFIN lassen/VVINF kann/VMFIN ./$."
[[3]] #**
character(0)
I tried much to reproduce the error on artificially reproduced data without much success. So please excuse the complicated code.
The smallest version of textList for which I could reproduce the error when executed in the R console:
> textList
$sections
[1] "Obstgarten/NN ist/VAFIN DAS/NE Einsteigerspiel/NN für/APPR Kinder/NN ab/APPR zwei/CARD Jahren/NN ./$. Preis/NN führt/VVFIN ,/$, aus/APPR einem/ART einfachen/ADJA Spiel/NN schnell/ADJD einen/ART hochwertigen/ADJA und/KON hochgelobten/ADJA Klassiker/NN werden/VAFIN lassen/VVINF kann/VMFIN ./$. "
[2] ""
Following the content of a dput file containing the smallest version of textList which reproduces the example.
structure(list(sections = c("Obstgarten/NN ist/VAFIN DAS/NE Einsteigerspiel/NN für/APPR Kinder/NN ab/APPR zwei/CARD Jahren/NN ./$. Die/ART Spielidee/NN ist/VAFIN wie/KOKOM bei/APPR allen/PIDAT Spielen/NN mit/APPR dieser/PDAT Zielaltersklasse/NN außerordentlich/ADJD einfach/ADJD ./$. Hier/ADV geht/VVFIN es/PPER darum/PROAV ,/$, reihum/ADV zu/PTKZU würfeln/VVINF ./$. Der/ART Würfel/NN zeigt/VVFIN keine/PIAT Zahlen/NN ,/$, sondern/KON vier/CARD Farben/NN ,/$, einen/ART Raben/NN und/KON einen/ART Obstkorb/NN ./$. Bei/APPR einer/ART Farbe/NN darf/VMFIN man/PIS ein/ART Stück/NN Obst/NN von/APPR einem/ART der/ART vier/CARD Obstbäume/NN im/APPRART Obstgarten/NN pflücken/VVFIN ,/$, bei/APPR einem/ART Raben/NN muss/APPR eines/ART von/APPR neun/CARD Rabenpuzzleteilen/NN gelegt/VVPP werden/VAINF ,/$, bei/APPR einem/ART Obstkorb/NN darf/VMFIN man/PIS zwei/CARD Obststücke/NN nach/APPR Wahl/NN abräumen/VVINF ./$. Entweder/KON es/PPER gewinnen/VVFIN alle/PIS ,/$, weil/KOUS alles/PIS Obst/NN abgeerntet/VVPP ist/VAFIN ,/$, bevor/KOUS der/ART Rabe/NN fertig/ADJD gepuzzlet/VVPP wurde/VAFIN oder/KON es/PPER verlieren/VVFIN alle/PIDAT gegen/APPR den/ART fertigen/ADJA Raben/NN ./$. Die/ART Idee/NN eines/ART ``/CARD kooperativen/ADJA ''/ADJA Spiels/NN hat/VAFIN viele/PIDAT Freunde/NN ,/$, macht/VVFIN das/ART Spiel/NN aber/ADV noch/ADV langweiliger/ADJD ,/$, als/KOUS es/PPER unbedingt/ADV nötig/ADJD wäre/VAFIN ./$. Unser/PPOSAT vierjähriger/ADJA Sohn/NN versucht/VVFIN schon/ADV so/ADV zu/PTKZU mogeln/VVINF ,/$, dass/KOUS der/ART Rabe/NN gewinnt/VVFIN -/$( einfach/ADV um/APPR mehr/PIAT Pepp/NN in/APPR das/ART Spiel/NN zu/PTKZU bringen/VVINF ./$. Selbst/ADV unsere/PPOSAT zweijährige/ADJA Tochter/NN wagt/VVFIN sich/PRF schon/ADV an/APPR die/ART Regeln/NN ,/$, wenn/KOUS sie/PPER sich/PRF spielerisch/ADJD dem/ART Diktat/NN des/ART Würfels/NN verweigert/VVFIN und/KON erklärt/VVFIN ,/$, jedes/PIDAT Obst/NN zu/PTKZU pflücken/VVINF ,/$, aber/KON bei/APPR einem/ART roten/ADJA Würfel/NN keine/PIAT rote/ADJA Kirsche/NN ./$. Das/ART Spiel/NN besticht/VVFIN vor/APPR allem/PIS durch/APPR die/ART Qualität/NN seiner/PPOSAT Verarbeitung/NN ./$. Die/ART Obstsorten/NN sind/VAFIN gut/ADJD gestaltete/ADJA und/KON lackierte/ADJA Holzstücke/NN ./$. Die/ART Kirschen/NN hängen/VVFIN paarweise/ADV am/APPRART Baum/NN und/KON auch/ADV die/ART Obstkörbe/NN sind/VAFIN liebevoll/ADJD geflochten/VVPP ./$. Solch/PIDAT ein/ART Spiel/NN packt/VVFIN man/PIS immer/ADV wieder/ADV gerne/ADV aus/PTKVZ ./$. Besonders/ADV schön/ADJD ist/VAFIN die/ART Sonderedition/NN im/APPRART Blechkasten/NN statt/APPR im/APPRART Pappkarton/NN ./$. Warum/PWAV Spielehersteller/NN sich/PRF immer/ADV wieder/ADV vor/APPR den/ART Kosten/NN einer/ART hochwertigen/ADJA Herstellung/NN drücken/VVINF bleibt/VVFIN ein/ART ungeklärtes/ADJA Geheimnis/NN ,/$, zumal/KOUS so/ADV schöne/ADJA Spiele/NN wie/KOKOM Obstgarten/NN beweisen/VVFIN ,/$, dass/KOUS eine/ART hochwertige/ADJA und/KON liebevolle/ADJA Gestaltung/NN ,/$, die/PRELS selbstverständlich/ADJD zu/APPR einem/ART etwas/ADV höheren/ADJA Preis/NN führt/VVFIN ,/$, aus/APPR einem/ART einfachen/ADJA Spiel/NN schnell/ADJD einen/ART hochwertigen/ADJA und/KON hochgelobten/ADJA Klassiker/NN werden/VAFIN lassen/VVINF kann/VMFIN ./$. ",
"")), .Names = "sections")
Just remove element with length equal to 0:
sent <- unlist(sent,recursive=FALSE)
sent <- sent[lapply(sent,length)>0]
EDIT OP seems to have problems on how to reproduce the error , I show here how to reproduce it:
Using this as sent for example:
sent = list("a",character(0)) ## you get an error because of character(0)
lapply(seq_along(sent),
function(i)
data.frame(ID=paste(sprintf("%02d", i),
sprintf("%03d", seq_along(sent[[i]])), sep = ""),
Sentence=sent[[i]]))
Reproduce the error :
Error in data.frame(ID = paste(sprintf("%02d", i), sprintf("%03d", seq_along(sent[[i]])), :
arguments imply differing number of rows: 1, 0