How to retain only samples with same rowlengths in R? - r
I want to remove rows of exp.normal that ends with ".1" substring.
Then, I want to retain only rows where the rownames of exp.normal match another dataframe exp.kirp. I want to combine the dataframes column-wise.
My code returns the two dataframe with different row lengths.
exp.normal <- exp.normal[!is.infinite(rowSums(exp.normal)),]
exp.normal <- na.omit(exp.normal)
exp.normal <- exp.normal[!grepl('.1$', rownames(exp.normal)),]
exp.kirp.samp <- exp.kirp[rownames(exp.kirp) %in% rownames(exp.normal),]
exp.norm <- exp.normal[rownames(exp.normal) %in% rownames(exp.kirp.samp),]
Output:
> dim(exp.normal)
[1] 19947 32
> dim(exp.kirp)
[1] 12097 202
Traceback:
Error in cbind(...) : number of rows of matrices must match (see arg 2)
Example data:
dput(exp.norm)
structure(c(45.7005, 14525.5304, 2691.0051, 3648.1196, 3785.6462,
508.7428, 3386.262, 1189.0624, 375.0458, 1767.0259, 27.3361,
17196.2434, 2821.7784, 3730.9721, 8095.7046, 955.9156, 2899.3971,
1115.2977, 457.7995, 1821.7784, 45.3806, 19112.246, 3016.1901,
4261.1092, 9791.2504, 967.2683, 2105.4082, 795.0396, 419.566,
1941.4399, 50.9688, 14891.6723, 3558.9722, 3323.9259, 4598.989,
451.9671, 1407.3294, 410.278, 435.5518, 1647.4305, 108.6162,
9145.2729, 2705.0238, 2702.0338, 991.7092, 820.278, 3857.5328,
2015.6448, 332.5631, 1643.2139, 57.4382, 16482.7822, 2320.9426,
2881.0338, 5242.4173, 434.1923, 2427.6701, 985.4808, 369.4413,
1952.1095, 17.378, 16061.5305, 2530.1829, 4110.9756, 16517.6829,
1559.0915, 1917.0732, 607.9268, 558.2317, 1896.3415, 20.626,
16527.3632, 2925.5966, 4156.8309, 10196.4714, 635.9169, 2173.8045,
781.4243, 469.7868, 1987.3547, 9.4012, 21390.7875, 2698.0142,
4198.1283, 15663.0906, 1081.0287, 1256.9429, 591.6457, 585.8632,
1796.5457, 70.6982, 36833.3219, 2334.0517, 3118.3636, 3214.0421,
1119.1551, 2489.5293, 1626.5344, 372.9573, 1927.1034, 27.0724,
41553.7462, 2221.6031, 3961.8517, 7251.1787, 1841.8174, 1867.5525,
1033.0047, 366.9096, 2048.0069, 93.6277, 20812.3777, 3167.1196,
3068.6141, 1033.9674, 238.3764, 3262.2283, 927.9891, 368.2065,
1723.5054, 61.5543, 29531.5934, 3283.48, 3599.218, 531.7693,
224.8289, 2014.6628, 1123.1672, 321.6031, 1888.563, 28.7045,
14833.5053, 2878.5984, 3181.6764, 840.2164, 340.3734, 2266.8694,
1306.6222, 430.3965, 1614.8444, 32.3228, 20450.3576, 2638.1234,
3181.1024, 7459.6457, 791.6043, 2367.126, 982.2835, 444.2257,
1665.3543, 9.6095, 16474.3224, 2779.0965, 4393.5681, 14546.3247,
1160.0268, 1175.3446, 641.2711, 529.4793, 1929.9387, 33.5985,
13559.3697, 2940.1548, 3312.0163, 741.4452, 493.1579, 4346.1672,
1557.4905, 370.7226, 1643.4799, 26.9466, 26391.888, 2614.1732,
3922.3097, 11886.2642, 1467.3491, 2908.8364, 740.1575, 431.4961,
1638.1452, 51.9065, 20112.0953, 2840.2768, 3421.635, 1817.6237,
567.9274, 2244.2088, 867.8801, 385.6534, 1525.9852, 91.0713,
21614.0244, 2868.2575, 2628.6353, 408.8988, 486.2043, 1470.0472,
2422.3217, 404.9217, 1386.5275, 28.0177, 16412.3814, 2963.8349,
4044.6946, 7816.1037, 625.0426, 3087.001, 940.9758, 393.0399,
1762.8796, 45.7438, 30459.0464, 2345.8452, 3155.2153, 9420.2711,
1107.0516, 1614.3134, 818.9615, 484.3678, 1613.9445, 17.0691,
14674.209, 3037.4688, 3741.8818, 5809.3256, 688.5928, 4134.4713,
828.4763, 341.3822, 1690.2581, 44.1648, 12330.3918, 2558.2269,
3040.4316, 2229.2957, 686.3165, 4200.9857, 959.8567, 420.3641,
1570.6747, 17.6861, 19647.681, 2496.6604, 4041.9243, 6503.5269,
749.796, 2689.1762, 1000.3566, 423.1116, 1590.8995, 41.1018,
40847.6969, 2306.5671, 3025.5213, 3386.5546, 861.7896, 1909.7417,
1051.6651, 394.6467, 1680.361, 53.3787, 12392.7638, 2879.6926,
3503.1038, 7530.2986, 559.5507, 2631.9834, 796.9258, 437.1859,
1609.8138, 20.4024, 15847.5751, 2082.0815, 2966.7382, 1372.3176,
962.441, 1013.412, 810.0858, 478.0043, 1715.6652, 44.1964, 27787.4338,
2517.8366, 3247.7944, 3787.8788, 680.8477, 2439.9693, 955.1208,
431.5305, 2040.2762, 38.2464, 11097.4617, 3136.9164, 3515.104,
1173.7936, 411.0357, 3917.6148, 1790.5061, 351.9027, 1664.5743,
41.0752, 12219.0158, 3045.3933, 3753.4768, 371.6285, 612.8901,
2541.1433, 1990.7641, 445.4353, 1827.0053, 29.0631, 22662.2977,
2744.6112, 4235.6337, 13412.0641, 1127.8605, 1697.7296, 904.7316,
513.0433, 1923.9901), dim = c(10L, 32L), dimnames = list(c("A1BG",
"A2M", "AAMP", "AARS", "ABAT", "ABCA1", "ABCA2", "ABCA3", "ABCB7",
"ABCF1"), c("TCGA-BQ-5887-11A-01R-1965-07", "TCGA-DZ-6133-11A-01R-1965-07",
"TCGA-BQ-5884-11A-01R-1592-07", "TCGA-BQ-7044-11A-01R-1965-07",
"TCGA-BQ-5888-11A-01R-1592-07", "TCGA-BQ-7051-11A-02R-1965-07",
"TCGA-BQ-5879-11A-01R-1592-07", "TCGA-BQ-5882-11A-01R-1592-07",
"TCGA-BQ-5894-11A-01R-1592-07", "TCGA-DZ-6132-11A-01R-1965-07",
"TCGA-BQ-7045-11A-01R-1965-07", "TCGA-GL-A59R-11A-11R-A26U-07",
"TCGA-P4-A5E8-11A-12R-A28H-07", "TCGA-P4-A5ED-11A-11R-A28H-07",
"TCGA-BQ-7059-11A-01R-1965-07", "TCGA-BQ-5877-11A-01R-1592-07",
"TCGA-A4-A4ZT-11A-11R-A26U-07", "TCGA-BQ-5891-11A-01R-1592-07",
"TCGA-A4-A57E-11A-11R-A26U-07", "TCGA-BQ-7061-11A-01R-1965-07",
"TCGA-GL-6846-11A-01R-1965-07", "TCGA-BQ-5890-11A-01R-1592-07",
"TCGA-DZ-6131-11A-01R-1965-07", "TCGA-BQ-7055-11A-01R-1965-07",
"TCGA-B9-4115-11A-01R-1758-07", "TCGA-BQ-5875-11A-01R-1592-07",
"TCGA-BQ-7046-11A-01R-1965-07", "TCGA-GL-7966-11A-01R-2204-07",
"TCGA-DZ-6134-11A-01R-1965-07", "TCGA-GL-A9DE-11A-11R-A37K-07",
"TCGA-Y8-A8RY-11A-11R-A37K-07", "TCGA-BQ-5878-11A-01R-1592-07"
)))
> dput(exp.kirp)
structure(c(7.65342121905285, 14.3511850042327, 10.3737643425674,
10.0819596419255, 9.44832324553207, 5.36085937172008, 9.78880184184623,
10.3776687573505, 11.16757118884, 9.53872845925388, 9.59256168492636,
11.6467250199966, 9.64995723240483, 7.72893066783674, 7.8938008505495,
10.2355345297148, 4.7113572383413, 10.307474405626, 5.1591164988591,
6.82029258613417, 11.8163747078537, 10.6949102196217, 11.2422803547626,
8.59352109955772, 3.36085586100493, 9.85271363828624, 8.10771088195776,
11.3942834720292, 7.22380769346318, 10.1931428909004, 8.38100686984167,
10.16821542222, 7.89538927254103, 9.15267449482738, 10.3931817074315,
12.7214881212179, 10.1435311751243, 11.5928015984057, 9.61257956084687,
6.32433538607651, 9.96819071167712, 10.0874192149778, 10.14614948871,
9.68818857795196, 10.4090748876599, 10.6687443412299, 10.8848143975244,
11.5872726722286, 6.84628770347992, 10.2373774088459, 5.09389393824392,
12.4136523086721, 11.1918237390263, 10.1912122382252, 9.9623840324273,
6.22565668754941, 10.3398477765017, 10.3103072842012, 11.1287210937383,
9.12552428380532, 10.0220514952103, 11.5514020529076, 9.33154847897224,
7.15215907641367, 8.30013562492319, 10.2772713571793, 3.07471093384149,
9.90686083971644, 5.84486014570242, 7.85453119568792, 10.1727165187711,
11.3218051787965, 11.6207019570471, 8.14964399360302, 2.57221059331231,
9.770250762185, 8.2978654585063, 11.084297043801, 5.60196387752462,
8.88994408176988, 8.11137409958519, 10.6640911447013, 7.63002607924622,
8.98301980527478, 9.95231536364664, 9.50590581220822, 12.3983662709548,
9.74297479204394, 9.19670588373063, 5.03164662255944, 9.10229620506499,
10.4765987884097, 10.4203162794891, 9.17212793704954, 10.6963363394643,
10.517658858193, 10.6383042190661, 9.89639568127639, 4.96453263672961,
10.153415608879, 4.70168212029528, 10.1689338100564, 9.96839262629172,
9.87305770150294, 9.75535162798677, 6.170389794965, 10.238532641469,
9.94050095178643, 11.0690397931313, 9.18120494347434, 10.1026705963882,
12.2134644534149, 9.76558563096394, 7.17400985460863, 8.51390402420206,
10.6841464369114, 4.46450490913191, 10.2220147915752, 6.26732229936107,
8.34111937214434, 9.4563488702199, 11.3165579369515, 11.3692227748971,
7.56443152168505, 3.28865386360517, 9.3642795739989, 7.97047579449743,
4.39174710202512, 1.99320334863612, 9.09364811306327, 7.99728901996415,
11.0296194917623, 7.76961109298212, 9.33304596974955, 9.89296486273821,
13.2154969925887, 10.5733689401427, 10.0743768517653, 8.12622125684463,
4.20618996652279, 9.22288882589791, 10.7973270736212, 10.0356016423526,
9.9017250769121, 10.2083981309069, 11.6540022938525, 11.3739522748552,
8.46892945061225, 3.85569076543445, 10.0627068729804, 7.99645936536463,
13.6832285748428, 10.3714563849361, 10.4176870383992, 10.0652444551968,
5.98935248863472, 10.1079093507719, 11.2050505161752, 11.6645692817891,
9.14131578384568, 10.4504026669882, 11.7286541625055, 9.0418201112886,
7.18088382351455, 8.02888128245278, 10.4406656190814, 4.29335576875801,
9.41768714387146, 5.17869870850833, 6.88790782664042, 10.6188892562694,
10.3842123858415, 11.7658466970944, 7.8638220194434, 4.34994552290862,
9.22504206592907, 7.63249652719248, 10.3723102883537, 5.34994198553451,
9.48564780343169, 8.04610739496541, 10.4463535299365, 7.68066614055097,
9.04610766787879, 9.95556447653129, 11.8525448023587, 10.4640851987958,
10.4308625115247, 9.91159766583895, 5.37742225475561, 9.4487843137183,
9.99933682779041, 10.2770366214296, 9.0418201112886, 10.5578953924552,
10.4808099887997, 10.9764939558956, 10.4242897631097, 4.93490330712903,
10.9389453617464, 5.13719199914349, 12.0367193976262, 10.8202555636581,
10.3262700402849, 9.91216810777653, 5.52284042813955, 10.0653680664815,
10.5954686012028, 11.2355920880251, 9.10909645558853, 9.94472260124272,
11.5783590097537, 9.29504775099388, 7.53455423776441, 9.73144632203062,
10.3357853667922, 2.69972929338667, 11.0827581319988, 5.12711734972685,
7.20385010562547, 10.2823150418909, 9.98084618229907, 10.5982618153731,
8.87118417188149, 4.52284042813955, 8.94910562068286, 8.00490358291707,
11.0657044429112, 7.06536766254031, 8.7728136979997, 8.38298319671723,
10.0327018868117, 7.20326283373569, 9.47196961605789, 9.90466577812603,
9.49413435666035, 10.19403569598, 10.4724772111563, 11.1448925980171,
7.28004970633421, 9.06939981937328, 10.8754120177161, 10.2352930230415,
9.35902847180107, 10.7267739242029, 10.5529081501417, 11.2956215011293,
8.85725977829128, 4.55387039485638, 11.0056049014623, 6.95117512427229,
10.9656928148969, 10.5991523452113, 10.4556415168452, 9.46537845450025,
6.72337288854289, 10.0139441477751, 9.28408724134641, 11.4833270722276,
9.49617650025352, 10.4425557194467, 11.449658606754, 8.69606512512137,
6.7730033396692, 9.79804905625499, 10.6006257403548, 6.41043269493023,
9.33218159403931, 6.79787807595744, 3.66448284036468, 9.36947738932311,
12.1844424384667, 11.1649886100581, 8.30833493415398, 1.07949758402178,
8.50996853808744, 8.25444192773356, 11.0514188710826, 7.25444192773355,
10.2092692396731, 8.12117551878819, 10.7242737250263, 7.07511328163257,
9.24190737941017, 9.38899238984847, 11.5242722006977, 10.8138023505891,
9.39692881604932, 7.33690372047785, 5.99837831118633, 9.20104960820531,
10.271192322104, 10.3421982587672, 9.9583334671237, 10.8104108086575,
11.3588364978417, 10.3011032299907, 9.79976033549459, 4.03371371384131,
9.4502035528435, 3.61712213221935, 11.9975977242282, 9.91379851626213,
9.95999222715916, 9.90779350021794, 7.85831302551685, 10.3997246047534,
11.7402171909708, 11.7246448152361, 9.75167259046219, 9.68598603763708,
11.889881014998, 10.3276744540453, 8.04315308658688, 9.09516358798289,
10.1351204181247, 2.58736499093646, 10.6921133041207, 6.09727578619146,
10.2388193351428, 11.1387141417635, 9.60973686632844, 9.91029869099744,
7.0969238361025, 1.22478119439559, 7.85207251858217, 8.9132990393591,
11.4041748741886, 7.92325557174226, 11.9555213595621, 8.66299657133128,
9.99675988081849, 6.52046939465285, 9.19469924394784, 11.4453499696318,
13.7764385227361, 10.9582985177207, 9.73202730526663, 11.8793070895146,
5.674830656291, 9.30694800222568, 11.6556994451055, 10.4213132544326,
10.4922544904181, 10.5534738814693, 10.6996637140919, 10.2332275555211,
9.52531486289245, 6.37164075239763, 11.3566558699168, 3.15639661659767,
10.9506421354115, 10.8015070949819, 9.51027580369917, 10.0114476969219,
5.9232585434528, 10.1410206692489, 10.9093204661635, 11.1601119970792,
9.35889972298825, 9.89445931817658, 11.7097666903094, 10.3085136675886,
7.97175793055237, 9.39274501543653, 10.3787624907905, 6.94163806584425,
11.8483258820978, 5.28766436585954, 6.24271872469928, 10.0904144275087,
10.7658517928209, 10.6194247493219, 8.72532024089938, 6.12970957941129,
7.37932464409351, 8.97175821789909, 13.5632882613554, 6.31028194625214,
8.64822705939532, 9.18732138776524, 10.4159346076083, 6.46711478729057,
8.74638197005123, 9.80945126361765, 11.0458332956411, 12.6596471469673,
7.83425486655716, 10.9194011668402, 4.66760264911311, 8.44306371542639,
11.6735729859861, 10.592053792505, 10.0394481089966, 10.738081785684,
10.6804287993454, 10.4484290791818, 8.73413344315171, 6.43659995275074,
10.6685246199734, 8.20162111992077, 12.7954861179578, 10.0094938620897,
9.93400880935778, 9.75330735360058, 7.0082672553098, 9.74081003982032,
10.7382235152475, 11.6720072357516, 8.44494819471576, 9.99223068991282,
11.0703963722836, 9.47126368282115, 7.81421970385892, 9.50816775056871,
11.4983748936735, 6.23485957395859, 11.5903647810385, 6.26655044400704,
5.21798676951157, 10.0355914707199, 10.2338111446746, 11.8068330986116,
7.98850462265062, 2.26801511865652, 8.75403900144463, 8.99470954056443,
13.8050246222504, 9.39637273263616, 9.25174607188694, 8.59323012958596,
9.64227593422842, 7.73613603148148, 8.71848835304267, 11.3433845026852,
11.2390478373181, 10.8940677306605, 10.8800576375586, 10.6070670903737,
5.6912382257558, 9.46054395023954, 9.9164661944151, 9.35162700413489,
9.86387775885121, 10.0058120312308, 10.7880204671486, 10.3793997040699,
9.6038232649825, 4.26803009485916, 10.2782371039023, 6.26475409489153,
12.8198023802381, 11.7916439373724, 9.53606689182685, 10.3591574288036,
5.71406413888836, 10.0630462305102, 10.2195932783632, 11.455724780085,
8.42239448735832, 9.24745049099849, 12.1236985893816, 9.92518871297862,
7.89993146943836, 8.72944026793844, 10.4251666934438, 3.90670784266208,
9.20041190448898, 6.65613736826174, 10.2389705995139, 9.53818563335118,
11.7495320050765, 12.0646887131882, 8.44896824561834, 3.71406963555001,
9.8415805435622, 8.31270283399943, 5.24299494486063, 6.49167354977479,
9.98614168480194, 8.4582261368972, 9.59531026428392, 8.57655917226128,
8.95437954428976, 10.6495249285462, 14.9290954211312, 10.5422470205379,
9.17443338491354, 3.94233621005718, 5.67224428652735, 8.7869060376421,
10.8632316688763, 9.68413003969761, 10.0472843996015, 10.3973606067328,
10.2873877943623, 10.1185985217259, 9.54984491532867, 3.90670784266208,
8.67224499381939, 5.31084177712261, 13.1978705809921, 9.83862580682156,
9.54417855835282, 10.3479785104067, 6.95803180123955, 10.5275135616911,
10.7557094705532, 11.5723066760841, 9.32468663898682, 9.95090752477796,
11.895394953164, 10.1730449599892, 7.96865297090106, 8.25269807010003,
10.3946717952085, 3.87765591897716, 9.72523791794449, 6.55356786048423,
6.95803180123955, 10.5069909263905, 10.2877565830483, 10.5166977513248,
8.4726051481255, 4.38016145559557, 8.85875002019905, 8.67423888569003,
8.68286505123176, 5.31506259026344, 10.3333282396821, 9.31228692654063,
10.4142296315944, 7.85981553329836, 8.44751431678991, 11.0858929239232,
9.28987549215634, 11.1939401219005, 9.81036549777261, 8.34254452326977,
5.73563860631975, 9.00522312099486, 10.3688436329726, 9.94464471906402,
10.1522308353126, 10.7920437057173, 10.4456146711682, 10.5376544440661,
9.5106538108757, 6.00694202058687, 10.5239172935582, 6.07015744755466,
13.1209957412573, 10.9576837919022, 9.8945869439465, 9.23740931858995,
4.16326524153571, 10.523017510386, 10.532865308348, 10.619622367405,
9.45558963540716, 11.5136546699209, 11.209027561542, 8.50311772962086,
6.94462773667725, 9.43239470040379, 10.9762474117231, 3.74822505823549,
8.22396403818439, 5.28179467379438, 5.07015744755466, 9.40882053452885,
9.15195265459219, 11.7917134419519, 8.19669113197608, 4.74823042627325,
10.2400835226029, 7.76318049103673, 10.5252116829711, 5.07015744755466,
9.41356636990191, 8.69464935705204, 10.5448108847847, 6.4670479262821,
8.25602512546126, 10.0954826060933, 9.24807628369884, 10.2848014332547,
9.6103510748559, 7.88436713970017, 6.04591033802914, 7.98344684046261,
9.83392420760322, 10.426537121557, 8.77797787921888, 11.1932452952465,
10.889486091222, 10.329430966151, 9.26655583156462, 3.62269612816767,
10.5609425545211, 5.59521719360418, 13.9452588877727, 11.136365018286,
10.0338699199786, 9.01178744914112, 7.04236745871339, 9.77739489226773,
10.1944306108943, 11.0829417734315, 9.2483306049673, 10.2526052400654,
11.2392043296817, 9.78826123015354, 7.37009277188134, 9.28978490856306,
10.770835486621, 4.07112766139071, 8.79617819177588, 6.50274907789938,
4.95248267443048, 10.0124053945822, 10.8791588020481, 10.1400457232732,
9.64018220112663, 7.9855897990336, 8.44341851204054, 8.1851425389976,
11.3009567982918, 7.87032217794857, 9.12571179465448, 8.10396912418597,
10.0952450966319, 7.11418480239357, 9.3240403215499, 9.55790821677093,
12.5506935970456, 11.2890199649612, 9.74578287914873, 8.83302700041379,
6.80619256918401, 8.91595554395471, 10.4010993451482, 10.426361210739,
9.65687923171916, 10.3180541086121, 10.2515587448026, 10.4679560946132,
9.3710569683502, 5.63055399711973, 9.66161458306801, 3.49639843291562,
12.046947416625, 7.48250063297666, 10.3197777792499, 9.47365522397164,
6.95629018836077, 9.81146637438849, 9.67304984423569, 11.9690923038785,
9.99940969863312, 10.4101699932371, 12.2230772873117, 9.90523370112278,
7.10116469832952, 8.84409058546722, 11.2100883041446, 2.89400283481279,
12.1723192712031, 7.02813039834545, 7.76654547216671, 9.33005660517673,
9.71268261052322, 10.1695833227844, 7.83721098924632, 1.12849137851042,
7.14639353071401, 7.90194088386521, 10.2858186675831, 4.09194519835092,
9.82683604431664, 9.28935150377015, 10.3990006824976, 7.03632419457704,
9.25259305937555, 9.96009564626238, 13.6495813760682, 11.2087562804992,
8.62991100391626, 10.8093586506308, 4.36112967113519, 8.66452479510135,
10.680372183305, 10.4092425218598, 10.166842081886, 11.1148826551454,
11.5218621620828, 10.8830252212276, 8.50698349076537, 6.57472823565416,
10.546746035965, 7.53012147669141, 15.4681548425702, 9.79693513247838,
9.38793395115019, 8.98389796929759, 5.4990039364996, 9.68780798972713,
10.2221443965538, 11.2238327266469, 8.86631579791574, 9.0839648421511,
11.7688762222962, 9.43339244300495, 7.75707059749995, 7.30848010698361,
10.3788698682696, 5.10854118358023, 9.397938764198, 5.14397700761083,
5.54294517859488, 11.3477092783888, 10.1821779329165, 10.5068822633094,
7.95165772727702, 6.07899829224459, 9.17521394310747, 8.44018606106649,
10.2131062858423, 5.98655201209625, 9.76585079853419, 8.60644889080884,
9.71847037220014, 7.04446188071599, 9.27521498813589, 10.9901943954469,
11.5209125420426, 9.65147026322989, 10.6316810922306, 5.74458092333422,
8.26977703134258, 10.0141332026215, 9.03736135461269, 9.96719613631738,
9.93388860627356, 10.7248434525252, 10.2215812006449, 10.3621030533842,
11.4668080776894, 7.99184662847717, 9.33580121861008, 5.66820140316967,
11.9947030005322, 9.64794224178718, 10.1925126545798, 10.2674620603105,
7.5327081297796, 9.69101099541322, 11.2548391298699, 11.7689577047578,
9.19050300947907, 9.56006949444913, 11.2747943605218, 10.0095406093405,
8.00727063981421, 9.97974599674609, 10.3602156709497, 3.94458674388554,
12.5277668843542, 5.61301589155362, 10.2348465241763, 11.2408106474539,
10.7817980294952, 11.3929731417988, 8.56239088440517, 2.39709102017243,
8.66820104853111, 9.02757458085238, 12.6567500587125, 6.6095466615844,
11.4664830513311, 8.87780988126623, 10.3650994493445, 8.35292135920633,
7.93983129663749, 9.4812924824758, 12.99012924981, 10.4898544816469,
8.58540087815148, 10.8618786972288, 7.71067834148438, 8.93864071660805,
11.4031536223651, 10.7979767770093, 9.84830839236378, 9.47801724034487,
10.6468489299639, 10.3611048811716, 8.96812071185483, 5.42457950821431,
10.7176375280321, 5.99997294921438, 11.249476392449, 10.9207261360998,
10.3485991467441, 9.01994649367039, 6.53868559158395, 9.41478373733666,
10.432374028565, 11.0171355028297, 9.38017184771789, 10.5614675309011,
11.5465057188577, 9.17787519818794, 7.04065904752138, 11.0984136358699,
10.7076152049804, 2.37928824997193, 12.014117542008, 6.92057199739813,
4.98912992235826, 10.120606346187, 10.2084736088023, 10.4873799256012,
9.27923169328214, 1.52972101613862, 9.22178985762886, 7.95596194920281,
12.8045414410067, 7.92201464494711, 9.47902214483086, 7.4885399127133,
10.1807490909157, 6.70138409679205, 9.69962242189537, 10.1778751981879,
9.210584414225, 10.547619289895, 10.7420721677751, 11.6561438053443,
5.89426967756534, 9.09193991076902, 10.4509432609399, 10.2168981211661,
9.50697734395297, 10.1421973186731, 10.289252816822, 11.0579561962947,
8.44378357961471, 4.8801957289431, 11.4458754454719, 4.65344988200111,
12.0454913320053, 10.7230307474822, 9.54770815725829, 9.91346136214138,
6.41298301444827, 10.2791008470934, 11.2856294138809, 11.2550839389669,
9.39663175119314, 10.3644682533465, 12.3720956896476, 9.96448281969001,
7.26859346154833, 9.52846036210416, 10.6382351536235, 4.31017286300409,
11.0690011128305, 5.82641628715103, 6.80493638177069, 9.89786451034248,
9.94481833785949, 11.6774877172589, 9.01061202172265, 6.17266952106643,
9.5752544613585, 8.43194131360949, 11.0206656082034, 7.72110534236959,
10.5081439758347, 8.68994452434197, 10.8685929396885, 6.5444359272107,
8.68679113793189, 10.8525006396094, 11.1533795693109, 11.5873539730162,
8.99794551516942, 9.87102611145061, 3.93820109105871, 9.06141196830279,
10.3168170965086, 10.0968873453464, 9.78516864210074, 10.6089638880433,
10.6772891469749, 10.6027030400136, 10.5354892373923, 5.69623034672501,
10.4314703380043, 3.77147362287765, 11.9414284466395, 8.62911943026167,
9.47885474304884, 9.32603460205856, 7.52941181265447, 10.1897935554257,
10.6869529617348, 10.9824945325978, 9.54925041577186, 9.24348510543789,
12.6197561225204, 10.4229927556855, 7.78641143939821, 10.3518043181888,
10.2176900900617, 2.72462844562171, 13.9420477990178, 6.05669840185354,
8.84668667779751, 10.976770882955, 11.2310420929738, 11.9128532309879,
8.82897202130395, 2.92626515369416, 9.02841610083176, 9.70477480883974,
13.4396963975546, 6.34522248637767, 10.2353559766448, 8.78641143939821,
10.5578417013582, 6.64287771079513, 9.06181129266044, 10.6360273009065,
12.9150155941383, 11.9719694740862, 8.95056581144122, 10.0640399701312,
7.2355968744756, 9.21728961537032, 11.5561009223631, 9.95201054312933,
10.0229270264038, 10.7583335783419, 10.7564054571028, 10.3137246220883,
8.76465102394471, 5.79502400957859, 9.85443348382268, 5.2076547248541,
12.2695992772649, 10.7969615679355, 9.78033864553782, 9.39835742542764,
6.02727350425616, 9.5278442796299, 11.038246992221, 11.1457431696807,
9.00232447215102, 9.03646624386732, 12.062072892785, 10.0526651781329,
7.29576250712518, 7.86933843816656, 9.61223637364334, 3.32380238041871,
8.70355167900769, 5.73415681989236, 8.69454663545621, 11.3998442086989,
11.3703586767908, 12.3235644563292, 9.05367152133834, 10.196743500022,
9.91601721982026, 8.03544796058228, 12.1704251930224, 5.87162190579696,
8.07993112355535, 8.03136652207373, 9.91767640083016, 5.98915036355025,
9.10990689967612, 11.1609999554059, 11.726925238744, 12.2657872278982,
9.73588750953117, 11.4370505772681, 5.57906848944612, 9.27950936832482,
9.65861638042757, 9.17054127127384, 9.78579998093652, 10.5632561528303,
10.254342831578, 10.148100924125, 10.2726105411389, 4.18254929202166,
10.6197309312023), dim = c(50L, 20L), dimnames = list(c("A1BG",
"A2M", "A4GALT", "AAAS", "AACS", "AADAT", "AAGAB", "AAK1", "AAMP",
"AARS2", "AARSD1", "AARS", "AASDHPPT", "AASDH", "AASS", "AATF",
"AATK", "ABAT", "ABCA11P", "ABCA12", "ABCA1", "ABCA2", "ABCA3",
"ABCA5", "ABCA6", "ABCA7", "ABCB10", "ABCB1", "ABCB4", "ABCB6",
"ABCB7", "ABCB8", "ABCB9", "ABCC10", "ABCC1", "ABCC3", "ABCC4",
"ABCC5", "ABCC6", "ABCC9", "ABCD1", "ABCD3", "ABCD4", "ABCE1",
"ABCF1", "ABCF2", "ABCF3", "ABCG1", "ABCG2", "ABHD10"), c("TCGA.2K.A9WE.01A",
"TCGA.2Z.A9J1.01A", "TCGA.2Z.A9J3.01A", "TCGA.2Z.A9J5.01A", "TCGA.2Z.A9J6.01A",
"TCGA.2Z.A9J7.01A", "TCGA.2Z.A9J8.01A", "TCGA.2Z.A9JD.01A", "TCGA.2Z.A9JI.01A",
"TCGA.2Z.A9JJ.01A", "TCGA.2Z.A9JO.01A", "TCGA.2Z.A9JQ.01A", "TCGA.4A.A93W.01A",
"TCGA.4A.A93X.01A", "TCGA.4A.A93Y.01A", "TCGA.5P.A9JU.01A", "TCGA.5P.A9JY.01A",
"TCGA.5P.A9KE.01A", "TCGA.A4.7288.01A", "TCGA.A4.7583.01A")))
We may use intersect
nm1 <- intersect(row.names(exp.normal), row.names(exp.kirp))
exp.kirp.samp <- exp.kirp[nm1,]
exp.norm <- exp.normal[nm1,]
dim(exp.norm)
#[1] 8 32
dim(exp.kirp.samp)
#[1] 8 20
In the sample data showed, there are no duplicates for row names. It may be better to check for any duplicates with frequency count on the row names
> table(row.names(exp.normal))
A1BG A2M AAMP AARS ABAT ABCA2 ABCA3 ABCB7
1 1 1 1 1 1 1 1
> table(row.names(exp.kirp))
A1BG A2M A4GALT AAAS AACS AADAT AAGAB AAK1 AAMP AARS AARS2 AARSD1 AASDH AASDHPPT
1 1 1 1 1 1 1 1 1 1 1 1 1 1
AASS AATF AATK ABAT ABCA1 ABCA11P ABCA12 ABCA2 ABCA3 ABCA5 ABCA6 ABCA7 ABCB1 ABCB10
1 1 1 1 1 1 1 1 1 1 1 1 1 1
ABCB4 ABCB6 ABCB7 ABCB8 ABCB9 ABCC1 ABCC10 ABCC3 ABCC4 ABCC5 ABCC6 ABCC9 ABCD1 ABCD3
1 1 1 1 1 1 1 1 1 1 1 1 1 1
ABCD4 ABCE1 ABCF1 ABCF2 ABCF3 ABCG1 ABCG2 ABHD10
1 1 1 1 1 1 1 1
If there are duplicates, it is not clear about the logic for processing based on the post.
Related
I have 2 graphs on R. They have different x axis, but similar trend profile. how do I overlay them on r?
I have 2 datasets (First and Second) shown below on their respective raw datasets. They have different x-axis, but similar trend profile. How do I align and overlay them to occur on a single plot on R using ggplot2? My codes for their plots on R are: For First: First <- ggplot(data = First, aes(x, y)) + geom_line(pch = 1) For Second: Second <- ggplot(data = Second, aes(x, y)) + geom_line(pch = 1) Raw dataset for First: x y 129.46 532.87 129.44 533.97 129.43 534.48 129.42 524.14 129.40 525.10 129.39 517.73 129.37 517.06 129.36 517.98 129.35 511.68 129.33 506.21 129.32 503.39 129.31 492.87 129.29 484.60 129.28 481.26 129.26 473.19 129.25 469.08 129.24 464.39 129.22 456.28 129.21 452.46 129.19 447.01 129.18 439.83 129.17 434.11 129.15 426.85 129.14 421.21 129.12 414.52 129.11 409.71 129.10 404.59 129.08 399.91 129.07 393.89 129.05 388.65 129.04 383.33 129.03 379.13 129.01 375.56 129.00 370.54 128.98 366.30 128.97 362.54 128.96 356.00 128.94 351.95 128.93 347.81 128.91 343.64 128.90 339.57 128.89 335.33 128.87 331.19 128.86 328.30 128.84 325.86 128.83 323.46 128.82 321.77 128.80 319.47 128.79 316.96 128.77 314.35 128.76 311.30 128.75 308.95 128.73 307.41 128.72 304.59 128.70 302.33 128.69 299.55 128.68 297.95 128.66 296.19 128.65 294.39 128.63 292.42 128.62 289.79 128.61 287.52 128.59 285.54 128.58 283.74 128.57 281.68 128.55 279.89 128.54 278.65 128.52 277.48 128.51 275.45 128.50 273.93 128.48 272.46 128.47 271.14 128.45 269.65 128.44 267.75 128.43 266.05 128.41 264.15 128.40 262.82 128.38 261.77 128.37 261.36 128.36 260.28 128.34 259.67 128.33 258.81 128.31 258.05 128.30 258.05 128.29 257.27 128.27 256.64 128.26 256.02 128.24 254.40 128.23 253.57 128.22 252.97 128.20 252.69 128.19 252.08 128.17 251.61 128.16 250.88 128.15 250.67 128.13 250.52 128.12 249.97 128.10 249.84 128.09 248.82 128.08 249.06 128.06 248.00 128.05 247.06 128.03 246.84 128.02 247.20 128.01 248.07 127.99 247.46 127.98 246.58 127.96 246.86 127.95 247.03 127.94 246.67 127.92 247.20 127.91 247.80 127.90 247.61 127.88 247.87 127.87 247.77 127.85 247.42 127.84 248.48 127.83 248.90 127.81 249.92 127.80 251.29 127.78 252.16 127.77 253.10 127.76 254.39 127.74 255.47 127.73 256.43 127.71 257.68 127.70 258.32 127.69 259.63 127.67 261.89 127.66 263.23 127.64 265.47 127.63 267.10 127.62 269.05 127.60 271.09 127.59 272.48 127.57 274.91 127.56 276.54 127.55 278.50 127.53 279.27 127.52 280.13 127.50 280.96 127.49 281.58 127.48 281.73 127.46 282.27 127.45 282.77 127.43 282.81 127.42 282.59 127.41 282.14 127.39 281.05 127.38 280.53 127.36 279.07 127.35 277.24 127.34 276.30 127.32 274.52 127.31 272.61 127.29 271.43 127.28 270.06 127.27 268.06 127.25 267.17 127.24 265.80 127.23 264.93 127.21 264.38 127.20 263.39 127.18 263.05 127.17 262.48 127.16 261.55 127.14 261.36 127.13 260.32 127.11 259.54 127.10 260.12 127.09 260.55 127.07 260.92 127.06 261.55 127.04 262.40 127.03 262.71 127.02 263.56 127.00 264.18 126.99 264.76 126.97 264.76 126.96 264.48 126.95 265.54 126.93 267.23 126.92 268.28 126.90 269.27 126.89 270.39 126.88 271.40 126.86 272.81 126.85 273.91 126.83 275.63 126.82 277.38 126.81 277.79 126.79 279.41 126.78 279.75 126.76 280.53 126.75 282.72 126.74 284.13 126.72 286.31 126.71 288.78 126.69 290.37 126.68 292.47 126.67 294.45 126.65 296.41 126.64 299.01 126.62 300.27 126.61 300.60 126.60 302.39 126.58 304.41 126.57 306.27 126.56 309.08 126.54 311.47 126.53 314.92 126.51 317.62 126.50 320.79 126.49 324.88 126.47 327.88 126.46 331.98 126.44 334.43 126.43 336.38 126.42 339.31 126.40 342.30 126.39 345.26 126.37 349.00 126.36 353.23 126.35 355.80 126.33 359.43 126.32 362.46 126.30 365.44 126.29 368.90 126.28 371.33 126.26 373.43 126.25 375.84 126.23 376.66 126.22 377.24 126.21 378.86 126.19 380.56 126.18 382.81 126.16 384.93 126.15 386.63 126.14 389.33 126.12 392.04 126.11 393.12 126.09 395.23 126.08 397.14 126.07 397.97 126.05 398.70 126.04 400.18 126.02 402.96 126.01 406.16 126.00 410.46 125.98 414.02 125.97 419.10 125.95 423.51 125.94 429.04 125.93 433.63 125.91 439.10 125.90 445.74 125.88 448.74 125.87 454.18 125.86 458.68 125.84 464.89 125.83 471.47 125.82 479.85 125.80 487.35 125.79 495.42 125.77 505.03 125.76 514.95 125.75 525.05 125.73 536.33 125.72 545.53 125.70 555.22 125.69 566.94 125.68 578.38 125.66 592.60 125.65 610.46 125.63 627.96 125.62 644.92 125.61 667.07 125.59 690.26 125.58 716.45 125.56 743.96 125.55 772.56 125.54 802.98 125.52 834.70 125.51 861.03 125.49 893.29 125.48 928.74 125.47 959.44 125.45 986.00 125.44 1007.16 125.42 1025.04 125.41 1037.34 125.40 1045.97 125.38 1047.54 125.37 1046.52 125.35 1040.06 125.34 1033.93 125.33 1028.62 125.31 1019.46 125.30 1009.75 125.28 998.56 125.27 985.23 125.26 969.51 125.24 954.00 125.23 937.87 125.21 921.84 125.20 904.31 125.19 886.50 125.17 869.52 125.16 855.01 125.15 841.79 125.13 826.35 125.12 812.49 125.10 798.08 125.09 783.09 125.08 768.02 125.06 751.49 125.05 735.61 125.03 720.00 125.02 705.38 125.01 690.72 124.99 676.87 124.98 663.52 124.96 652.62 124.95 642.21 124.94 631.57 124.92 620.73 124.91 609.34 124.89 599.22 124.88 589.48 124.87 578.93 124.85 569.27 124.84 557.89 124.82 548.03 124.81 539.04 124.80 529.46 124.78 520.41 124.77 512.79 124.75 504.41 124.74 494.50 124.73 484.16 124.71 474.33 124.70 463.87 124.68 453.91 124.67 442.96 124.66 432.59 124.64 421.67 124.63 412.34 124.61 402.25 124.60 391.99 124.59 384.48 124.57 375.79 124.56 366.30 124.54 357.78 124.53 349.52 124.52 340.83 124.50 333.56 124.49 324.78 124.48 316.03 124.46 308.79 124.45 301.12 124.43 294.10 124.42 287.40 124.41 280.85 124.39 275.99 124.38 269.42 124.36 264.00 124.35 258.31 124.34 252.82 124.32 248.27 124.31 243.83 124.29 239.23 124.28 234.31 124.27 230.57 124.25 226.70 124.24 222.75 124.22 219.43 124.21 215.93 124.20 212.76 124.18 209.68 124.17 206.41 124.15 203.55 124.14 200.64 124.13 198.50 124.11 196.15 124.10 193.52 124.08 191.50 124.07 189.29 124.06 187.49 124.04 185.83 124.03 184.40 124.01 182.50 124.00 181.13 123.99 179.58 123.97 178.32 123.96 177.52 123.94 176.60 123.93 175.97 123.92 175.14 123.90 174.42 123.89 173.82 123.87 173.33 123.86 172.90 123.85 172.59 123.83 172.14 123.82 171.81 123.80 171.40 123.79 171.32 123.78 171.27 123.76 171.26 123.75 171.29 123.74 171.30 123.72 171.15 123.71 171.20 123.69 171.05 123.68 170.71 123.67 170.44 123.65 170.09 123.64 169.57 123.62 168.99 123.61 168.32 123.60 167.77 123.58 167.32 123.57 166.69 123.55 166.03 123.54 165.45 123.53 164.87 123.51 164.10 123.50 163.33 123.48 162.63 123.47 162.00 123.46 161.37 123.44 160.56 123.43 159.85 123.41 159.23 123.40 158.66 123.39 158.23 123.37 157.77 123.36 157.43 123.34 157.03 123.33 156.67 123.32 156.09 123.30 155.41 123.29 155.02 123.27 154.63 123.26 154.00 123.25 153.36 123.23 152.90 123.22 152.52 123.20 152.22 123.19 151.95 123.18 151.78 123.16 151.64 123.15 151.54 123.13 151.23 123.12 150.99 123.11 150.81 123.09 150.59 123.08 150.38 123.07 150.11 123.05 149.89 123.04 149.75 123.02 149.66 123.01 149.63 123.00 149.83 122.98 150.07 122.97 150.18 122.95 150.38 122.94 150.48 122.93 150.76 122.91 151.21 122.90 151.06 122.88 151.18 122.87 151.47 122.86 151.93 122.84 152.12 122.83 152.41 122.81 152.93 122.80 153.56 122.79 154.44 122.77 155.16 122.76 155.76 122.74 156.56 122.73 157.35 122.72 158.24 122.70 159.00 122.69 159.72 122.67 160.70 122.66 161.41 122.65 162.03 122.63 162.70 122.62 163.31 122.60 163.98 122.59 164.61 122.58 165.13 122.56 165.54 122.55 165.72 122.53 165.78 122.52 165.61 122.51 165.27 122.49 164.97 122.48 164.62 122.46 164.08 122.45 163.49 122.44 162.59 122.42 161.87 122.41 161.26 122.40 160.59 122.38 160.01 122.37 159.52 122.35 158.90 122.34 158.05 122.33 157.02 122.31 156.18 122.30 155.43 122.28 154.64 122.27 153.81 122.26 153.00 122.24 152.30 122.23 151.48 122.21 150.83 122.20 150.15 122.19 149.72 122.17 149.32 122.16 148.91 122.14 148.41 122.13 148.05 122.12 147.78 122.10 147.31 122.09 146.96 122.07 146.90 122.06 146.74 122.05 146.55 122.03 146.53 122.02 147.33 122.00 146.93 121.99 146.75 121.98 146.76 121.96 146.89 121.95 147.08 121.93 147.47 121.92 147.95 121.91 148.47 121.89 148.91 121.88 149.44 121.86 150.03 121.85 150.46 121.84 150.94 121.82 151.46 121.81 152.04 121.79 152.43 121.78 152.67 121.77 152.92 121.75 153.19 121.74 153.50 121.72 153.58 121.71 153.69 121.70 153.81 121.68 153.71 121.67 153.58 121.66 153.20 121.64 152.85 121.63 152.70 121.61 152.24 121.60 151.67 121.59 150.90 121.57 150.41 121.56 149.84 121.54 149.28 121.53 148.58 121.52 148.05 121.50 147.70 121.49 147.15 121.47 146.79 121.46 146.48 121.45 146.24 121.43 145.94 121.42 145.52 121.40 145.30 121.39 145.38 121.38 145.36 121.36 145.28 121.35 145.65 121.33 145.55 121.32 145.75 121.31 146.25 121.29 146.42 121.28 146.81 121.26 147.12 121.25 147.17 121.24 147.47 121.22 147.71 121.21 147.78 121.19 147.95 121.18 148.34 121.17 148.32 121.15 148.54 121.14 148.44 121.12 148.52 121.11 148.70 121.10 148.77 121.08 148.92 121.07 148.95 121.05 148.73 121.04 148.28 121.03 148.15 121.01 147.66 121.00 147.44 120.99 147.17 120.97 146.65 120.96 146.66 120.94 146.30 120.93 146.32 120.92 146.36 120.90 146.05 120.89 146.16 120.87 145.92 120.86 145.57 120.85 145.71 120.83 145.05 120.82 145.49 120.80 145.59 120.79 145.24 120.78 145.48 120.76 146.02 120.75 145.67 120.73 146.44 120.72 147.36 120.71 147.80 120.69 148.87 120.68 147.89 120.66 148.12 120.65 148.79 120.64 147.28 120.62 148.47 120.61 149.10 120.59 149.42 120.58 149.45 120.57 149.90 120.55 150.28 120.54 150.52 120.52 150.43 120.51 150.94 120.50 150.73 120.48 151.13 120.47 151.24 120.45 151.32 120.44 150.96 120.43 150.80 120.41 150.61 120.40 150.41 120.38 150.48 120.37 150.96 120.36 151.60 120.34 152.14 120.33 152.05 120.32 152.51 120.30 152.53 120.29 152.56 120.27 152.63 120.26 152.53 120.25 152.28 120.23 151.96 120.22 150.96 120.20 149.81 120.19 149.15 120.18 148.75 120.16 148.42 120.15 147.90 120.13 147.60 120.12 147.37 120.11 146.73 120.09 146.94 120.08 146.99 120.06 146.53 120.05 146.26 120.04 147.40 120.02 149.56 120.01 148.57 119.99 150.23 119.98 148.50 119.97 149.44 119.95 153.75 119.94 154.59 119.92 158.31 119.91 163.60 119.90 170.53 119.88 176.49 119.87 183.77 119.85 195.72 119.84 199.95 119.83 203.86 119.81 196.98 119.80 186.12 119.78 181.83 The second is: x y 142.06 483.07 142.05 481.22 142.03 480.65 142.02 477.31 142.01 469.69 141.99 461.74 141.98 455.80 141.96 450.03 141.95 440.94 141.94 436.92 141.92 439.83 141.91 448.89 141.89 451.64 141.88 445.06 141.87 436.29 141.85 436.91 141.84 439.85 141.82 438.04 141.81 437.54 141.80 440.88 141.78 440.12 141.77 441.93 141.75 441.75 141.74 443.65 141.73 437.05 141.71 435.76 141.70 438.81 141.68 442.95 141.67 445.62 141.66 445.92 141.64 445.68 141.63 441.25 141.62 440.84 141.60 435.75 141.59 429.87 141.57 429.70 141.56 435.20 141.55 434.71 141.53 433.26 141.52 433.86 141.50 435.97 141.49 436.62 141.48 438.29 141.46 436.82 141.45 436.19 141.43 430.53 141.42 425.53 141.41 423.40 141.39 422.70 141.38 427.22 141.36 429.55 141.35 430.31 141.34 433.64 141.32 437.53 141.31 436.35 141.29 436.65 141.28 439.47 141.27 437.66 141.25 436.88 141.24 428.98 141.22 426.74 141.21 431.80 141.20 434.16 141.18 436.85 141.17 439.57 141.15 441.25 141.14 446.21 141.13 445.51 141.11 446.65 141.10 448.60 141.08 445.50 141.07 442.42 141.06 439.73 141.04 437.68 141.03 439.24 141.01 445.00 141.00 446.63 140.99 451.07 140.97 452.34 140.96 453.97 140.94 458.24 140.93 459.39 140.92 462.71 140.90 464.21 140.89 462.70 140.87 462.00 140.86 460.58 140.85 460.49 140.83 464.55 140.82 471.15 140.80 470.22 140.79 472.05 140.78 472.89 140.76 475.38 140.75 478.31 140.73 479.60 140.72 483.60 140.71 486.64 140.69 490.09 140.68 490.27 140.67 490.00 140.65 493.38 140.64 499.44 140.62 499.82 140.61 501.45 140.60 502.86 140.58 503.88 140.57 505.28 140.55 506.91 140.54 511.23 140.53 515.51 140.51 517.53 140.50 517.70 140.48 517.27 140.47 517.27 140.46 514.41 140.44 513.87 140.43 513.18 140.41 510.40 140.40 502.88 140.39 499.08 140.37 494.34 140.36 493.15 140.34 497.87 140.33 499.36 140.32 498.40 140.30 495.46 140.29 490.72 140.27 485.64 140.26 479.75 140.25 474.79 140.23 470.13 140.22 461.47 140.20 459.50 140.19 457.55 140.18 455.43 140.16 461.16 140.15 469.09 140.13 471.04 140.12 469.66 140.11 462.89 140.09 454.46 140.08 448.36 140.06 440.22 140.05 432.27 140.04 424.39 140.02 418.62 140.01 416.53 139.99 414.79 139.98 418.52 139.97 429.46 139.95 439.80 139.94 446.26 139.92 443.80 139.91 438.85 139.90 432.84 139.88 431.29 139.87 427.68 139.85 422.87 139.84 419.23 139.83 414.42 139.81 411.25 139.80 413.78 139.79 419.72 139.77 424.95 139.76 429.25 139.74 427.59 139.73 422.81 139.72 417.27 139.70 416.84 139.69 417.09 139.67 414.80 139.66 412.47 139.65 413.25 139.63 412.05 139.62 416.88 139.60 421.99 139.59 425.06 139.58 434.19 139.56 436.34 139.55 435.10 139.53 430.10 139.52 431.28 139.51 433.26 139.49 434.26 139.48 431.66 139.46 433.82 139.45 436.17 139.44 438.31 139.42 445.14 139.41 452.12 139.39 460.34 139.38 468.53 139.37 469.48 139.35 467.94 139.34 471.17 139.32 475.65 139.31 478.09 139.30 477.27 139.28 478.26 139.27 477.40 139.25 480.09 139.24 485.09 139.23 491.05 139.21 496.55 139.20 500.31 139.18 502.52 139.17 498.99 139.16 497.95 139.14 498.37 139.13 500.68 139.11 503.28 139.10 505.85 139.09 506.35 139.07 507.11 139.06 513.07 139.04 520.05 139.03 527.38 139.02 532.70 139.00 536.39 138.99 541.80 138.97 544.73 138.96 547.06 138.95 551.20 138.93 554.44 138.92 558.82 138.90 564.68 138.89 569.71 138.88 580.95 138.86 593.55 138.85 606.50 138.84 621.86 138.82 632.23 138.81 639.43 138.79 649.10 138.78 661.02 138.77 672.71 138.75 683.65 138.74 697.95 138.72 711.85 138.71 721.70 138.70 742.52 138.68 764.57 138.67 786.43 138.65 812.39 138.64 838.32 138.63 862.37 138.61 882.57 138.60 908.42 138.58 937.86 138.57 962.48 138.56 986.73 138.54 1015.64 138.53 1040.43 138.51 1068.36 138.50 1104.88 138.49 1143.82 138.47 1190.99 138.46 1232.34 138.44 1273.42 138.43 1296.43 138.42 1323.50 138.40 1347.81 138.39 1363.65 138.37 1369.67 138.36 1382.39 138.35 1388.82 138.33 1389.04 138.32 1391.43 138.30 1393.68 138.29 1398.80 138.28 1394.21 138.26 1384.65 138.25 1364.55 138.23 1337.52 138.22 1326.20 138.21 1306.90 138.19 1283.38 138.18 1270.16 138.16 1249.03 138.15 1230.29 138.14 1223.17 138.12 1213.08 138.11 1211.40 138.09 1212.51 138.08 1200.52 138.07 1185.42 138.05 1161.96 138.04 1143.77 138.02 1123.02 138.01 1093.99 138.00 1077.22 137.98 1059.70 137.97 1035.84 137.96 1027.20 137.94 1025.29 137.93 1015.19 137.91 1012.58 137.90 1006.32 137.89 984.20 137.87 964.25 137.86 941.66 137.84 922.75 137.83 906.69 137.82 882.85 137.80 871.76 137.79 857.74 137.77 848.72 137.76 846.38 137.75 839.06 137.73 833.21 137.72 822.04 137.70 804.83 137.69 783.16 137.68 774.40 137.66 758.48 137.65 744.32 137.63 732.52 137.62 722.43 137.61 712.14 137.59 704.13 137.58 699.86 137.56 697.26 137.55 692.86 137.54 684.29 137.52 669.33 137.51 650.79 137.49 639.53 137.48 630.92 137.47 619.08 137.45 607.80 137.44 599.49 137.42 587.80 137.41 579.81 137.40 571.73 137.38 564.87 137.37 559.58 137.35 549.88 137.34 538.16 137.33 525.07 137.31 514.06 137.30 505.49 137.28 497.80 137.27 487.99 137.26 479.18 137.24 470.91 137.23 460.88 137.21 455.19 137.20 448.80 137.19 440.92 137.17 434.03 137.16 424.79 137.14 416.53 137.13 408.00 137.12 401.20 137.10 394.36 137.09 387.62 137.07 380.90 137.06 374.20 137.05 367.24 137.03 360.99 137.02 354.70 137.01 348.64 136.99 342.50 136.98 335.56 136.96 329.23 136.95 322.95 136.94 317.64 136.92 312.24 136.91 308.07 136.89 303.21 136.88 298.65 136.87 293.95 136.85 288.35 136.84 283.98 136.82 280.04 136.81 275.83 136.80 272.23 136.78 268.40 136.77 264.82 136.75 262.04 136.74 259.04 136.73 256.31 136.71 253.72 136.70 250.91 136.68 248.53 136.67 246.17 136.66 243.85 136.64 241.94 136.63 239.81 136.61 238.02 136.60 235.93 136.59 233.98 136.57 232.39 136.56 230.67 136.54 229.24 136.53 227.66 136.52 226.07 136.50 224.55 136.49 222.98 136.47 221.41 136.46 219.70 136.45 218.23 136.43 216.48 136.42 214.75 136.40 213.16 136.39 211.33 136.38 209.93 136.36 208.55 136.35 206.95 136.33 205.56 136.32 204.10 136.31 202.87 136.29 201.66 136.28 200.54 136.26 199.10 136.25 197.71 136.24 196.47 136.22 195.42 136.21 194.51 136.19 193.55 136.18 192.66 136.17 191.81 136.15 191.09 136.14 190.37 136.13 189.78 136.11 189.06 136.10 188.53 136.08 187.81 136.07 187.02 136.06 186.32 136.04 185.86 136.03 185.72 136.01 185.46 136.00 185.06 135.99 184.91 135.97 184.74 135.96 184.66 135.94 184.70 135.93 184.75 135.92 184.67 135.90 184.74 135.89 185.58 135.87 184.94 135.86 184.83 135.85 185.37 135.83 185.96 135.82 186.52 135.80 187.16 135.79 187.97 135.78 188.76 135.76 189.75 135.75 190.56 135.73 191.43 135.72 192.48 135.71 193.43 135.69 194.49 135.68 195.61 135.66 196.96 135.65 198.34 135.64 199.56 135.62 200.90 135.61 202.40 135.59 203.76 135.58 205.23 135.57 206.56 135.55 207.97 135.54 209.31 135.52 210.44 135.51 211.36 135.50 212.20 135.48 212.95 135.47 213.47 135.45 213.92 135.44 214.11 135.43 214.10 135.41 213.94 135.40 213.64 135.38 213.19 135.37 212.59 135.36 211.82 135.34 210.75 135.33 209.66 135.31 208.46 135.30 207.14 135.29 205.82 135.27 204.44 135.26 203.15 135.24 201.80 135.23 200.48 135.22 199.35 135.20 198.28 135.19 197.23 135.18 196.15 135.16 195.07 135.15 194.03 135.13 192.96 135.12 192.21 135.11 191.53 135.09 190.87 135.08 190.32 135.06 190.02 135.05 189.82 135.04 189.84 135.02 189.89 135.01 189.82 134.99 190.02 134.98 189.88 134.97 190.09 134.95 190.45 134.94 190.82 134.92 191.60 134.91 192.45 134.90 193.26 134.88 194.27 134.87 195.37 134.85 196.61 134.84 197.86 134.83 199.11 134.81 200.35 134.80 201.58 134.78 202.68 134.77 203.60 134.76 204.22 134.74 205.07 134.73 206.51 134.71 209.37 134.70 206.97 134.69 207.18 134.67 207.52 134.66 207.90 134.64 208.21 134.63 208.21 134.62 208.27 134.60 207.19 134.59 206.58 134.57 205.72 134.56 204.81 134.55 204.11 134.53 203.64 134.52 202.92 134.50 202.02 134.49 201.22 134.48 200.36 134.46 199.68 134.45 198.92 134.43 198.29 134.42 197.56 134.41 196.73 134.39 196.08 134.38 195.75 134.36 195.52 134.35 195.63 134.34 195.81 134.32 196.02 134.31 196.20 134.30 196.80 134.28 196.90 134.27 197.19 134.25 197.74 134.24 198.08 134.23 198.31 134.21 198.68 134.20 199.22 134.18 199.70 134.17 200.18 134.16 200.93 134.14 201.64 134.13 202.24 134.11 202.68 134.10 203.27 134.09 203.68 134.07 204.09 134.06 204.19 134.04 204.23 134.03 204.12 134.02 204.37 134.00 203.50 133.99 202.88 133.97 202.47 133.96 202.08 133.95 201.85 133.93 201.56 133.92 201.16 133.90 201.05 133.89 200.73 133.88 200.97 133.86 202.35 133.85 201.84 133.83 198.75 133.82 197.11 133.81 196.25 133.79 195.58 133.78 195.22 133.76 195.54 133.75 195.44 133.74 195.13 133.72 195.43 133.71 195.90 133.69 196.28 133.68 196.45 133.67 197.47 133.65 197.88 133.64 199.96 133.62 205.28 133.61 198.80 133.60 196.61 133.58 194.43 133.57 193.35 133.55 191.96 133.54 190.94 133.53 189.94 133.51 188.91 133.50 187.44 133.48 187.05 133.47 200.13 133.46 194.78 133.44 183.44 133.43 183.11 133.41 182.48 133.40 181.97 133.39 184.17 133.37 181.21 133.36 184.86 133.35 183.46 133.33 181.41 133.32 181.87 133.30 182.53 133.29 182.31 133.28 181.29 133.26 181.50 133.25 181.17 133.23 184.41 133.22 183.61 133.21 186.67 133.19 182.59 133.18 181.21 133.16 180.85 133.15 184.65 133.14 184.11 133.12 182.34 133.11 189.83 133.09 190.95 133.08 199.73 133.07 214.60 133.05 223.41 133.04 220.76 133.02 248.98 133.01 296.96 133.00 308.09 132.98 263.16 enter code here
These data look like some kind of spectra, so I understand the desire to plot them on top of each other to compare shape. The following code aligns the peaks on each set, but you will have an arbitrary x-axis (so I removed the labels). first$match <- first$x second$match <- second$x - second$x[second$y == max(second$y)] + first$x[first$y == max(first$y)] first$series = "first" second$series = "second" all_data = rbind(first, second) ggplot(all_data) + geom_line(aes(x = match, y, color = series) + scale_x_continuous(name = "X, arbitrary units") + theme(axis.text.x = element_blank())
par(mfrow=c(1,2)) First Second should plot the two next to each other but not on top of each other.
Depending on how you want to visualize this you should combine the dataframes into a single dataframe with the source as a column. Then either have each on the same plot with a different colour etc., or use facet_wrap. Example: library(tidyverse) first <- tibble(x = 1:1000, y = x + runif(1000)) second <- tibble(x = 1001:2000, y = x + runif(1000)) combo <- first %>% mutate(source = "first") %>% bind_rows( second %>% mutate(source = "second") ) combo %>% ggplot(aes(x,y, colour = source))+ geom_line() #or combo %>% ggplot(aes(x,y))+ geom_line()+ facet_wrap(~source)
SMOTE function 'subscript out of bond'
I'm trying to implement a logistic regression as follows: However I can't get good predictions because my class output 1 is under-represented in my data. Therefore I'm trying to apply SMOTE algorithm to my trainset in order to get better results. However I get the message error: Error in T[i, ] : subscript out of bounds There is my code: set.seed(157) split <- createDataPartition(df_statique$Y, p = .50,list = FALSE,times = 1) trainSplit <- df_statique[ split,] testSplit <- df_statique[-split,] trainSplit <- SMOTE(Y ~ insolvency + efficiency + DebtToAssetsRatio + taille + CashAssetRatio + current + netWorth + REA, trainSplit, perc.over = 300, perc.under=100) There is a part of my dataframe df_statique: index countryIsoCode insolvency efficiency CashAssetRatio DebtToAssetsRatio netWorth REA taille Y 41807 IT 0.00360 0.5193711 0.8686575 0.49446355 4387182 1.657145e-03 2 1 41808 IT 0.00050 1.5269309 1.6295765 0.36543122 30916838 6.601092e-03 3 0 41809 IT 0.00050 2.2635592 1.3427063 0.15809120 2200087 1.218576e-03 1 0 41810 IT 0.00280 1.3989753 0.9345793 0.69642554 2940473 3.852093e-04 2 0 41811 IT 0.00140 2.1440221 3.5781748 0.07951644 28418622 8.845920e-04 2 0 41812 IT 0.00040 1.0068491 1.7238305 0.47561418 22486133 2.703242e-04 2 0 41813 IT 0.00130 1.5569114 1.4459704 0.57632716 9769040 9.741611e-04 2 0 41814 IT 0.00510 5.0143711 0.1035034 0.71267895 3610152 2.391447e-03 2 0 41815 IT 0.00090 3.3280521 0.5160867 0.34998732 218965703 2.550272e-04 3 0 41816 IT 0.00040 1.7217051 2.2758391 0.29638050 29868519 1.136387e-04 3 0 41817 IT 0.00360 1.7261580 0.8490392 0.41231551 106020226 2.304773e-06 3 0 41818 IT 0.00040 1.3600893 1.6298656 0.57789518 55408765 4.841743e-04 3 1 41819 IT 0.00510 5.5565821 0.1376145 0.19679467 9491245 1.398124e-03 2 0 41820 IT 0.00131 3.8312347 1.1365521 0.73639696 8921497 4.701300e-06 3 0 41821 IT 0.00400 1.8218620 0.9113375 0.62646234 24134486 9.435248e-04 3 0 41822 IT 0.00100 1.8215702 1.0690901 0.82764828 777547 6.335832e-03 2 0 41823 IT 0.00090 1.8153513 0.9320536 0.80258849 2437903 6.035954e-04 2 0 41824 IT 0.00050 2.1300765 1.7388457 0.31394248 27009000 3.507500e-04 3 0 41825 IT 0.00100 1.8697385 1.4438289 0.56198890 35917 5.765082e-03 1 0 41826 IT 0.00230 6.5298138 1.1726536 0.56654516 2675415 1.038839e-02 2 0 41827 IT 0.00220 9.8201528 0.4794298 0.63618554 488924 1.336866e-05 2 0 Finally, my output Y is a dummy indicating a default or not at horizon 1 year
This error occurs when the target variable you use for SMOTE function is of INT data type. SMOTE can only work with factor target variable.
Sort column names in ascending order - string followed by a number
I would like to sort/order my columns in data frame in ascending order. However, the problem is that number is preceded by a string. structure(list(H = c("P01050.1", "P01080.1", "P01090.1"), Gr_1 = c(0, 1107200, 17096000), Gr_10 = c(0, 37259000, 1104800000), Gr_11 = c(1835800, 53909000, 623960000), Gr_12 = c(0, 19117000, 808600000), Gr_13 = c(2544200, 2461400, 418770000), Gr_14 = c(5120400, 1373700, 117330000 ), Gr_15 = c(6623500, 0, 73336000), Gr_16 = c(0, 0, 31761000), Gr_17 = c(13475000, 0, 29387000), Gr_18 = c(7883300, 0, 27476000), Gr_19 = c(82339000, 3254700, 50825000 ), Gr_2 = c(1584100, 84847000, 5219500000), Gr_20 = c(205860000, 0, 67685000), Gr_21 = c(867120000, 1984400, 2.26e+08 ), Gr_22 = c(1144300000, 11342000, 256440000), Gr_23 = c(1.179e+09, 4391600, 141230000), Gr_24 = c(1408700000, 2830100, 328190000), Gr_25 = c(757020000, 5588500, 113360000 ), Gr_26 = c(456930000, 4694700, 78221000), Gr_27 = c(162310000, 12425000, 29132000), Gr_28 = c(71199000, 13883000, 39425000), Gr_29 = c(220140000, 48225000, 22240000), Gr_3 = c(4206000, 291080000, 1.5496e+10), Gr_30 = c(418310000, 248370000, 2998900), Gr_31 = c(214340000, 342310000, 6181100), Gr_32 = c(203520000, 545960000, 17214000 ), Gr_33 = c(353320000, 1048300000, 31244000), Gr_34 = c(163870000, 1026300000, 23291000), Gr_35 = c(134430000, 773530000, 19527000), Gr_36 = c(159630000, 547270000, 12743000), Gr_37 = c(214110000, 311250000, 7546600 ), Gr_38 = c(199410000, 118250000, 4917500), Gr_39 = c(73076000, 41591000, 2751100), Gr_4 = c(0, 348100000, 3.0977e+10 ), Gr_40 = c(57703000, 28495000, 2336400), Gr_41 = c(91657000, 11727000, 7568200), Gr_5 = c(0, 176770000, 8752700000 ), Gr_6 = c(0, 174870000, 6783500000), Gr_7 = c(0, 138470000, 2386900000), Gr_8 = c(0, 84046000, 1371600000 ), Gr_9 = c(0, 82060000, 567990000)), .Names = c("H", "Gr_1", "Gr_10", "Gr_11", "Gr_12", "Gr_13", "Gr_14", "Gr_15", "Gr_16", "Gr_17", "Gr_18", "Gr_19", "Gr_2", "Gr_20", "Gr_21", "Gr_22", "Gr_23", "Gr_24", "Gr_25", "Gr_26", "Gr_27", "Gr_28", "Gr_29", "Gr_3", "Gr_30", "Gr_31", "Gr_32", "Gr_33", "Gr_34", "Gr_35", "Gr_36", "Gr_37", "Gr_38", "Gr_39", "Gr_4", "Gr_40", "Gr_41", "Gr_5", "Gr_6", "Gr_7", "Gr_8", "Gr_9"), row.names = c(NA, 3L), class = "data.frame") I was trying it apply function sort and order but both of them does not change the order. Any ideas how it can be sorted properly ? I would like to start from Gr_1 : Gr_10 and then Gr_11 : "Gr_20", etc.
Another version which is perhaps easier to read: n <- sub("Gr_", "", names(df)) # replace Gr_ with nothing n[1] <- 0 # keep "H" column as first n <- as.numeric(n) # convert to numeric (H becomes zero) df[,order(n)] # use that to sort columns (But I like akrun's suggestion to use gtools::mixedsort /-order)
d[,order(as.numeric(sub("\\D*","",colnames(d))))] Gr_1 Gr_2 Gr_3 Gr_4 Gr_5 Gr_6 Gr_7 Gr_8 1 0 1584100 4.2060e+06 0.0000e+00 0 0 0 0 2 1107200 84847000 2.9108e+08 3.4810e+08 176770000 174870000 138470000 84046000 3 17096000 5219500000 1.5496e+10 3.0977e+10 8752700000 6783500000 2386900000 1371600000 Gr_9 Gr_10 Gr_11 Gr_12 Gr_13 Gr_14 Gr_15 Gr_16 Gr_17 1 0 0 1835800 0 2544200 5120400 6623500 0 13475000 2 82060000 37259000 53909000 19117000 2461400 1373700 0 0 0 3 567990000 1104800000 623960000 808600000 418770000 117330000 73336000 31761000 29387000 Gr_18 Gr_19 Gr_20 Gr_21 Gr_22 Gr_23 Gr_24 Gr_25 1 7883300 82339000 205860000 867120000 1144300000 1179000000 1408700000 757020000 2 0 3254700 0 1984400 11342000 4391600 2830100 5588500 3 27476000 50825000 67685000 226000000 256440000 141230000 328190000 113360000 Gr_26 Gr_27 Gr_28 Gr_29 Gr_30 Gr_31 Gr_32 Gr_33 1 456930000 162310000 71199000 220140000 418310000 214340000 203520000 353320000 2 4694700 12425000 13883000 48225000 248370000 342310000 545960000 1048300000 3 78221000 29132000 39425000 22240000 2998900 6181100 17214000 31244000 Gr_34 Gr_35 Gr_36 Gr_37 Gr_38 Gr_39 Gr_40 Gr_41 H 1 163870000 134430000 159630000 214110000 199410000 73076000 57703000 91657000 P01050.1 2 1026300000 773530000 547270000 311250000 118250000 41591000 28495000 11727000 P01080.1 3 23291000 19527000 12743000 7546600 4917500 2751100 2336400 7568200 P01090.1 To put H first: d[,order(as.numeric(sub("\\D*","",colnames(d))),na.last = F)] H Gr_1 Gr_2 Gr_3 Gr_4 Gr_5 Gr_6 Gr_7 1 P01050.1 0 1584100 4.2060e+06 0.0000e+00 0 0 0 2 P01080.1 1107200 84847000 2.9108e+08 3.4810e+08 176770000 174870000 138470000 3 P01090.1 17096000 5219500000 1.5496e+10 3.0977e+10 8752700000 6783500000 2386900000 Gr_8 Gr_9 Gr_10 Gr_11 Gr_12 Gr_13 Gr_14 Gr_15 1 0 0 0 1835800 0 2544200 5120400 6623500 2 84046000 82060000 37259000 53909000 19117000 2461400 1373700 0 3 1371600000 567990000 1104800000 623960000 808600000 418770000 117330000 73336000 Gr_16 Gr_17 Gr_18 Gr_19 Gr_20 Gr_21 Gr_22 Gr_23 Gr_24 1 0 13475000 7883300 82339000 205860000 867120000 1144300000 1179000000 1408700000 2 0 0 0 3254700 0 1984400 11342000 4391600 2830100 3 31761000 29387000 27476000 50825000 67685000 226000000 256440000 141230000 328190000 Gr_25 Gr_26 Gr_27 Gr_28 Gr_29 Gr_30 Gr_31 Gr_32 1 757020000 456930000 162310000 71199000 220140000 418310000 214340000 203520000 2 5588500 4694700 12425000 13883000 48225000 248370000 342310000 545960000 3 113360000 78221000 29132000 39425000 22240000 2998900 6181100 17214000 Gr_33 Gr_34 Gr_35 Gr_36 Gr_37 Gr_38 Gr_39 Gr_40 Gr_41 1 353320000 163870000 134430000 159630000 214110000 199410000 73076000 57703000 91657000 2 1048300000 1026300000 773530000 547270000 311250000 118250000 41591000 28495000 11727000 3 31244000 23291000 19527000 12743000 7546600 4917500 2751100 2336400 7568200
Irregular gsub in R
I can't get my head around the following odd thing: I have a data.frame that I need to tidy up, and in this step I want to delete all entries that start with [ANONYMOUS] or a year in (). Consider this example, a subset with two entries to illustrate my problem. > print(allstacked2) #mysample- subset values ind 711 [ANONYMOUS], 2010, COMMUNICATION. V1 1588 [ANONYMOUS], 2008, END UNLAWFUL ATTACKS. V2 1297 (1986) ACTIVE MEASURES, , U.S. DEPARTMENT OF STATE, AUGUST 1986 V1 3994 (1900) NEW YORK TIMES, P. 17. , 22 APRIL V3 > pat <- "(?:^)\\(.*" > repl <- NA > allstacked2$V3 <- gsub(pat, repl, allstacked2$values, perl=TRUE) > > pat <- "(^)\\[ANONYMOUS]" > repl <- NA > allstacked2$V3 <- gsub(pat, repl, allstacked2$V3, perl=TRUE) > > pat <- "[\\s]+" > repl <- " " > allstacked2$V3 <- gsub(pat, repl, allstacked2$V3, perl=TRUE) > > allstacked2[is.na(allstacked2)] = 'notavailable' > allstacked2$V4 <- ifelse(allstacked2$V3==allstacked2$values,1,0) #to compare what has changed > print(allstacked2) values ind V3 V4 711 [ANONYMOUS], 2010, COMMUNICATION. V1 notavailable 0 1588 [ANONYMOUS], 2008, END UNLAWFUL ATTACKS. V2 [ANONYMOUS], 2008, END UNLAWFUL ATTACKS. 1 1297 (1986) ACTIVE MEASURES, , U.S. DEPARTMENT OF STATE, AUGUST 1986 V1 notavailable 0 3994 (1900) NEW YORK TIMES, P. 17. , 22 APRIL V3 (1900) NEW YORK TIMES, P. 17. , 22 APRIL 1 > It seems to me that the problem has to do with the index colum, which I need to keep in order to unstack my data later. I just don't get why. Thanks for any help here!! EDIT: Wiktor to the rescue - this is what happens when I use his pattern. It takes the stuff I want to replace, but won't replace the whole pattern. And I still wonder why it would return different results in my own regex. > print(allstacked2) values ind V3 V4 711 [ANONYMOUS], 2010, COMMUNICATION. V1 notavailable 0 1297 (1986) ACTIVE MEASURES, , U.S. DEPARTMENT OF STATE, AUGUST 1986 V1 notavailable 0 1588 [ANONYMOUS], 2008, END UNLAWFUL ATTACKS. V2 [ANONYMOUS], 2008, END UNLAWFUL ATTACKS. 1 3994 (1900) NEW YORK TIMES, P. 17. , 22 APRIL V3 (1900) NEW YORK TIMES, P. 17. , 22 APRIL 1 > allstacked2$V3 <- gsub("^[[:space:]]*(\\(\\d{4}\\)|\\[ANONYMOUS])[[:space:]]*", "", allstacked2$V3) > allstacked2$V3 <- gsub("[[:space:]]+", " ", allstacked2$V3) > > > print(allstacked2) values ind V3 V4 711 [ANONYMOUS], 2010, COMMUNICATION. V1 notavailable 0 1297 (1986) ACTIVE MEASURES, , U.S. DEPARTMENT OF STATE, AUGUST 1986 V1 notavailable 0 1588 [ANONYMOUS], 2008, END UNLAWFUL ATTACKS. V2 , 2008, END UNLAWFUL ATTACKS. 1 3994 (1900) NEW YORK TIMES, P. 17. , 22 APRIL V3 NEW YORK TIMES, P. 17. , 22 APRIL 1 > Edit: Here the dput! > dput(allstacked2) structure(list(values = c("[ANONYMOUS], 2010, COMMUNICATION.", "(1986) ACTIVE MEASURES, , U.S. DEPARTMENT OF STATE, AUGUST 1986", " [ANONYMOUS], 2008, END UNLAWFUL ATTACKS.", " (1900) NEW YORK TIMES, P. 17. , 22 APRIL" ), ind = structure(c(1L, 1L, 2L, 3L), .Label = c("V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12", "V13", "V14", "V15", "V16", "V17", "V18", "V19", "V20", "V21", "V22", "V23", "V24", "V25", "V26", "V27", "V28", "V29", "V30", "V31", "V32", "V33", "V34", "V35", "V36", "V37", "V38", "V39", "V40", "V41", "V42", "V43", "V44", "V45", "V46", "V47", "V48", "V49", "V50", "V51", "V52", "V53", "V54", "V55", "V56", "V57", "V58", "V59", "V60", "V61", "V62", "V63", "V64", "V65", "V66", "V67", "V68", "V69", "V70", "V71", "V72", "V73", "V74", "V75", "V76", "V77", "V78", "V79", "V80", "V81", "V82", "V83", "V84", "V85", "V86", "V87", "V88", "V89", "V90", "V91", "V92", "V93", "V94", "V95", "V96", "V97", "V98", "V99", "V100", "V101", "V102", "V103", "V104", "V105", "V106", "V107", "V108", "V109", "V110", "V111", "V112", "V113", "V114", "V115", "V116", "V117", "V118", "V119", "V120", "V121", "V122", "V123", "V124", "V125", "V126", "V127", "V128", "V129", "V130", "V131", "V132", "V133", "V134", "V135", "V136", "V137", "V138", "V139", "V140", "V141", "V142", "V143", "V144", "V145", "V146", "V147", "V148", "V149", "V150", "V151", "V152", "V153", "V154", "V155", "V156", "V157", "V158", "V159", "V160", "V161", "V162", "V163", "V164", "V165", "V166", "V167", "V168", "V169", "V170", "V171", "V172", "V173", "V174", "V175", "V176", "V177", "V178", "V179", "V180", "V181", "V182", "V183", "V184", "V185", "V186", "V187", "V188", "V189", "V190", "V191", "V192", "V193", "V194", "V195", "V196", "V197", "V198", "V199", "V200", "V201", "V202", "V203", "V204", "V205", "V206", "V207", "V208", "V209", "V210", "V211", "V212", "V213", "V214", "V215", "V216", "V217", "V218", "V219", "V220", "V221", "V222", "V223", "V224", "V225", "V226", "V227", "V228", "V229", "V230", "V231", "V232", "V233", "V234", "V235", "V236", "V237", "V238", "V239", "V240", "V241", "V242", "V243", "V244", "V245", "V246", "V247", "V248", "V249", "V250", "V251", "V252", "V253", "V254", "V255", "V256", "V257", "V258", "V259", "V260", "V261", "V262", "V263", "V264", "V265", "V266", "V267", "V268", "V269", "V270", "V271", "V272", "V273", "V274", "V275", "V276", "V277", "V278", "V279", "V280", "V281", "V282", "V283", "V284", "V285", "V286", "V287", "V288", "V289", "V290", "V291", "V292", "V293", "V294", "V295", "V296", "V297", "V298", "V299", "V300", "V301", "V302", "V303", "V304", "V305", "V306", "V307", "V308", "V309", "V310", "V311", "V312", "V313", "V314", "V315", "V316", "V317", "V318", "V319", "V320", "V321", "V322", "V323", "V324", "V325", "V326", "V327", "V328", "V329", "V330", "V331", "V332", "V333", "V334", "V335", "V336", "V337", "V338", "V339", "V340", "V341", "V342", "V343", "V344", "V345", "V346", "V347", "V348", "V349", "V350", "V351", "V352", "V353", "V354", "V355", "V356", "V357", "V358", "V359", "V360", "V361", "V362", "V363", "V364", "V365", "V366", "V367", "V368", "V369", "V370", "V371", "V372", "V373", "V374", "V375", "V376", "V377", "V378", "V379", "V380", "V381", "V382", "V383", "V384", "V385", "V386", "V387", "V388", "V389", "V390", "V391", "V392", "V393", "V394", "V395", "V396", "V397", "V398", "V399", "V400", "V401", "V402", "V403", "V404", "V405", "V406", "V407", "V408", "V409", "V410", "V411", "V412", "V413", "V414", "V415", "V416", "V417", "V418", "V419", "V420", "V421", "V422", "V423", "V424", "V425", "V426", "V427", "V428", "V429", "V430", "V431", "V432", "V433", "V434", "V435", "V436", "V437", "V438", "V439", "V440", "V441", "V442", "V443", "V444", "V445", "V446", "V447", "V448", "V449", "V450", "V451", "V452", "V453", "V454", "V455", "V456", "V457", "V458", "V459", "V460", "V461", "V462", "V463", "V464", "V465", "V466", "V467", "V468", "V469", "V470", "V471", "V472", "V473", "V474", "V475", "V476", "V477", "V478", "V479", "V480", "V481", "V482", "V483", "V484", "V485", "V486", "V487", "V488", "V489", "V490", "V491", "V492", "V493", "V494", "V495", "V496", "V497", "V498", "V499", "V500", "V501", "V502", "V503", "V504", "V505", "V506", "V507", "V508", "V509", "V510", "V511", "V512", "V513", "V514", "V515", "V516", "V517", "V518", "V519", "V520", "V521", "V522", "V523", "V524", "V525", "V526", "V527", "V528", "V529", "V530", "V531", "V532", "V533", "V534", "V535", "V536", "V537", "V538", "V539", "V540", "V541", "V542", "V543", "V544", "V545", "V546", "V547", "V548", "V549", "V550", "V551", "V552", "V553", "V554", "V555", "V556", "V557", "V558", "V559", "V560", "V561", "V562", "V563", "V564", "V565", "V566", "V567", "V568", "V569", "V570", "V571", "V572", "V573", "V574", "V575", "V576", "V577", "V578", "V579", "V580", "V581", "V582", "V583", "V584", "V585", "V586", "V587", "V588", "V589", "V590", "V591", "V592", "V593", "V594", "V595", "V596", "V597", "V598", "V599", "V600", "V601", "V602", "V603", "V604", "V605", "V606", "V607", "V608", "V609", "V610", "V611", "V612", "V613", "V614", "V615", "V616", "V617", "V618", "V619", "V620", "V621", "V622", "V623", "V624", "V625", "V626", "V627", "V628", "V629", "V630", "V631", "V632", "V633", "V634", "V635", "V636", "V637", "V638", "V639", "V640", "V641", "V642", "V643", "V644", "V645", "V646", "V647", "V648", "V649", "V650", "V651", "V652", "V653", "V654", "V655", "V656", "V657", "V658", "V659", "V660", "V661", "V662", "V663", "V664", "V665", "V666", "V667", "V668", "V669", "V670", "V671", "V672", "V673", "V674", "V675", "V676", "V677", "V678", "V679", "V680", "V681", "V682", "V683", "V684", "V685", "V686", "V687", "V688", "V689", "V690", "V691", "V692", "V693", "V694", "V695", "V696", "V697", "V698", "V699", "V700", "V701", "V702", "V703", "V704", "V705", "V706", "V707", "V708", "V709", "V710", "V711", "V712", "V713", "V714", "V715", "V716", "V717", "V718", "V719", "V720", "V721", "V722", "V723", "V724", "V725", "V726", "V727", "V728", "V729", "V730", "V731", "V732", "V733", "V734", "V735", "V736", "V737", "V738", "V739", "V740", "V741", "V742", "V743", "V744", "V745", "V746", "V747", "V748", "V749", "V750", "V751", "V752", "V753", "V754", "V755", "V756", "V757", "V758", "V759", "V760", "V761", "V762", "V763", "V764", "V765", "V766", "V767", "V768", "V769", "V770", "V771", "V772", "V773", "V774", "V775", "V776", "V777", "V778", "V779", "V780", "V781", "V782", "V783", "V784", "V785", "V786", "V787", "V788", "V789", "V790", "V791", "V792", "V793", "V794", "V795", "V796", "V797", "V798", "V799", "V800", "V801", "V802", "V803", "V804", "V805", "V806", "V807", "V808", "V809", "V810", "V811", "V812", "V813", "V814", "V815", "V816", "V817", "V818", "V819", "V820", "V821", "V822", "V823", "V824", "V825", "V826", "V827", "V828", "V829", "V830", "V831", "V832", "V833", "V834", "V835", "V836", "V837", "V838", "V839", "V840", "V841", "V842", "V843", "V844", "V845", "V846", "V847", "V848", "V849", "V850", "V851", "V852", "V853", "V854", "V855", "V856", "V857", "V858", "V859", "V860", "V861", "V862", "V863", "V864", "V865", "V866", "V867", "V868", "V869", "V870", "V871", "V872", "V873", "V874", "V875", "V876", "V877", "V878", "V879", "V880", "V881", "V882", "V883", "V884", "V885", "V886", "V887", "V888", "V889", "V890", "V891", "V892", "V893", "V894", "V895", "V896", "V897", "V898", "V899", "V900", "V901", "V902", "V903", "V904", "V905", "V906", "V907", "V908", "V909", "V910", "V911", "V912", "V913", "V914", "V915", "V916", "V917", "V918", "V919", "V920", "V921", "V922", "V923", "V924", "V925", "V926", "V927", "V928", "V929", "V930", "V931", "V932", "V933", "V934", "V935", "V936", "V937", "V938", "V939", "V940", "V941", "V942", "V943", "V944", "V945", "V946", "V947", "V948", "V949", "V950", "V951", "V952", "V953", "V954", "V955", "V956", "V957", "V958", "V959", "V960", "V961", "V962", "V963", "V964", "V965", "V966", "V967", "V968", "V969", "V970", "V971", "V972", "V973", "V974", "V975", "V976", "V977", "V978", "V979", "V980", "V981", "V982", "V983", "V984", "V985", "V986", "V987", "V988", "V989", "V990", "V991", "V992", "V993", "V994", "V995", "V996", "V997", "V998", "V999", "V1000", "V1001", "V1002", "V1003", "V1004", "V1005", "V1006", "V1007", "V1008", "V1009", "V1010", "V1011", "V1012", "V1013", "V1014", "V1015", "V1016", "V1017", "V1018", "V1019", "V1020", "V1021", "V1022", "V1023", "V1024", "V1025", "V1026", "V1027", "V1028", "V1029", "V1030", "V1031", "V1032", "V1033", "V1034", "V1035", "V1036", "V1037", "V1038", "V1039", "V1040", "V1041", "V1042", "V1043", "V1044", "V1045", "V1046", "V1047", "V1048", "V1049", "V1050", "V1051", "V1052", "V1053", "V1054", "V1055", "V1056", "V1057", "V1058", "V1059", "V1060", "V1061", "V1062", "V1063", "V1064", "V1065", "V1066", "V1067", "V1068", "V1069", "V1070", "V1071", "V1072", "V1073", "V1074", "V1075", "V1076", "V1077", "V1078", "V1079", "V1080", "V1081", "V1082", "V1083", "V1084", "V1085", "V1086", "V1087", "V1088", "V1089", "V1090", "V1091", "V1092", "V1093", "V1094", "V1095", "V1096", "V1097", "V1098", "V1099", "V1100", "V1101", "V1102", "V1103", "V1104", "V1105", "V1106", "V1107", "V1108", "V1109", "V1110", "V1111", "V1112", "V1113", "V1114", "V1115", "V1116", "V1117", "V1118", "V1119", "V1120", "V1121", "V1122", "V1123", "V1124", "V1125", "V1126", "V1127", "V1128", "V1129", "V1130", "V1131", "V1132", "V1133", "V1134", "V1135", "V1136", "V1137", "V1138", "V1139", "V1140", "V1141", "V1142", "V1143", "V1144", "V1145", "V1146", "V1147", "V1148", "V1149", "V1150", "V1151", "V1152", "V1153", "V1154", "V1155", "V1156", "V1157", "V1158", "V1159", "V1160", "V1161", "V1162", "V1163", "V1164", "V1165", "V1166", "V1167", "V1168", "V1169", "V1170", "V1171", "V1172", "V1173", "V1174", "V1175", "V1176", "V1177", "V1178", "V1179", "V1180", "V1181", "V1182", "V1183", "V1184", "V1185", "V1186", "V1187", "V1188", "V1189", "V1190", "V1191", "V1192", "V1193", "V1194", "V1195", "V1196", "V1197", "V1198", "V1199", "V1200", "V1201", "V1202", "V1203", "V1204", "V1205", "V1206", "V1207", "V1208", "V1209", "V1210", "V1211", "V1212", "V1213", "V1214", "V1215", "V1216", "V1217", "V1218", "V1219", "V1220", "V1221", "V1222", "V1223", "V1224", "V1225", "V1226", "V1227", "V1228", "V1229", "V1230", "V1231", "V1232", "V1233", "V1234", "V1235", "V1236", "V1237", "V1238", "V1239", "V1240", "V1241", "V1242", "V1243", "V1244", "V1245", "V1246", "V1247", "V1248", "V1249", "V1250", "V1251", "V1252", "V1253", "V1254", "V1255", "V1256", "V1257", "V1258", "V1259", "V1260", "V1261", "V1262", "V1263", "V1264", "V1265", "V1266", "V1267", "V1268", "V1269", "V1270", "V1271", "V1272", "V1273", "V1274", "V1275", "V1276", "V1277", "V1278", "V1279", "V1280", "V1281", "V1282", "V1283", "V1284", "V1285", "V1286", "V1287", "V1288", "V1289", "V1290", "V1291", "V1292", "V1293", "V1294", "V1295", "V1296", "V1297", "V1298", "V1299", "V1300", "index", "index2"), class = "factor")), .Names = c("values", "ind"), row.names = c(711L, 1297L, 1588L, 3994L), class = "data.frame") >
You have some whitespaces at the start of the input. You need to make sure you match those with [[:space:]]*: allstacked2$V3 <- gsub("^[[:space:]]*(\\(\\d{4}\\)|\\[ANONYMOUS],)[[:space:]]*", "", allstacked2$V3) allstacked2$V3 <- gsub("[[:space:]]+", " ", allstacked2$V3) See the R demo online. Details ^ - start of string [[:space:]]* - 0+ whitespaces (\\(\\d{4}\\)|\\[ANONYMOUS],) - one of the two alternatives: \\(\\d{4}\\) - a (, 4 digits, ) | - or \\[ANONYMOUS], - [ANONYMOUS], substring [[:space:]]* - 0+ whitespaces The [[:space:]] in a TRE regex will match Unicode whitespaces, so it is preferable than \s.
How to get the KMeans Between/Within accuracy percentage in R?
Data (already scaled): structure(c(0.160485413118994, -0.325277232672307, 1.25345136614942, -0.0823959097766563, -1.05392120135926, 1.6177733504929, 0.403366736014645, -0.446717894120132, -0.932480539911433, -0.0823959097766563, 0.508649308224364, -0.625897284357438, 0.83280547753345, -0.625897284357438, -0.139663030393809, 1.96735207011525, -0.301741115048352, -0.301741115048352, -1.76044387693924, -0.139663030393809, 0.0577843342673918, 0.121189712724219, 1.51610803877442, -0.0056210441894354, -0.766485585671361, 1.51610803877442, -0.57626945030088, -1.02010709949867, -1.27372861332598, 1.0088650111198, 0.270645677706219, -0.834749808144522, 1.19678784152711, -0.296989842054972, 1.07728562684054, 0.0913923556763693, 0.121267909348011, -0.356740949398255, -0.864625361816163, 0.83828119746741, 1.30651856124931, -0.595043606688955, 0.545893694074002, 0.16558126048635, 5.23641370832172, -1.48243928506014, 0.419122882878118, 0.419122882878118, -0.0879603619054184, 0.0388104492904658, 0.311357673194754, 0.0940238518063364, 0.311357673194754, 0.0940238518063364, 9.43937817150828, -0.557977612358916, -0.557977612358916, 0.0940238518063364, -0.340643790970498, 0.528691494583171, -0.205722605421789, -0.596759109816679, 1.14786529440668, -0.747157765353175, -0.596759109816679, 2.59169238755704, -0.476440185387482, -0.95771588310427, -1.40891184971376, 0.425951747831495, 0.136489240515638, 0.520535462720394, -1.14366483350021, 0.648550870121979, 0.520535462720394, -1.91175727790973, 0.136489240515638, 0.90458168492515, 1.67267412933466, -0.631603203893873, -0.224811427137598, -0.610593308189137, 0.932534216017016, -0.610593308189137, -0.610593308189137, 2.86144362127471, -0.610593308189137, -0.996375189240675, -1.38215707029221, 0.546752334965478, 0.757884134731298, -0.344374342514091, 0.688992979903461, -0.344374342514091, -0.275483187686254, 1.44679568300967, -0.344374342514091, -0.137700878030581, -1.44663281975948, -0.137700878030581, -0.40214560409626, -0.593259833699538, 1.09491586112942, -0.911783549705002, 1.66825854993926, 2.91050104236056, -0.40214560409626, -0.84807880650391, -1.51697861011538, 0.426016057517946, -0.34473058195366, -0.622048342996663, 1.10411323084244, -0.740898812015093, -0.650346073715337, 2.92648708912503, -0.446602412540886, -0.899366104039667, -1.22761978037628, 0.36837223215692, 0.285643408957403, -0.513213860391233, 1.39175347420936, -1.46569752769153, 0.408544527318731, 2.74366577618398, -0.0830599461265831, -0.32886218284924, -1.92657672154651, -0.697565537933226, -0.328861155501701, -0.061730538550728, 0.0718347699247587, -1.26381831483011, 7.28436142760104, 1.6746184716306, -0.863122389403648, -0.729557080928162, -1.13025300635462, -1.13025300635462, -0.0766656163662548, 0.533294679362756, 1.54989517224444, -0.0766656163662548, 0.533294679362756, -0.279985714942592, 1.3465750736681, -0.686625912095266, -0.889946010671603, -0.483305813518929, 0.380765617904849, 0.156832553777284, -0.403000106541628, -0.291033574477846, -0.179067042414063, -0.403000106541628, -1.29873236305189, -1.07479929892432, 0.156832553777284, 0.156832553777284, -0.313536663149827, 0.0109306359630201, -0.151303013593403, 1.63326713152726, -1.44917221004479, -1.28693856048837, 1.79550078108368, 0.659865234188715, 1.79550078108368, 0.984332533301562, 0.183848573129592, 0.452709234957825, 0.0494182422154748, 1.52815188227076, -1.69817605966804, -1.69817605966804, 1.12486088952841, 0.856000227700176, 1.66258221318488, 1.12486088952841), .Dim = c(10L, 18L), .Dimnames = list( NULL, c("COMPACTNESS", "CIRCULARITY", "DISTANCE.CIRCULARITY", "RADIUS.RATIO", "PR.AXIS.ASPECT.RATIO", "MAX.LENGTH.ASPECT.RATIO", "SCATTER.RATIO", "ELONGATEDNESS", "PR.AXIS.RECTANGULARITY", "MAX.LENGTH.RECTANGULARITY", "SCALED.VARIANCE_MAJOR", "SCALED.VARIANCE_MINOR", "SCALED.RADIUS.OF.GYRATION", "SKEWNESS.ABOUT_MAJOR", "SKEWNESS.ABOUT_MINOR", "KURTOSIS.ABOUT_MAJOR", "KURTOSIS.ABOUT_MINOR", "HOLLOWS.RATIO" ))) I ran a kmeans on this data like so: kc<-kmeans(d,4) And printed the results: print(kc) It gives all this stuff, then it gives the between_ss/total_ss somewhere in there: Within cluster sum of squares by cluster: [1] 1245.577 1787.394 1089.873 2126.642 (between_SS / total_SS = 58.9 %) How do I get just the 58.9%? I tried kcc$betweenss/kcc$tot.withinss but got 1.24..., nothing near 58.9%.
The total sum of squares is in kc$totss, not in kc$tot.withinss, so that you can do: d <- structure(c(0.160485413118994, -0.325277232672307, 1.25345136614942, -0.0823959097766563, -1.05392120135926, 1.6177733504929, 0.403366736014645, -0.446717894120132, -0.932480539911433, -0.0823959097766563, 0.508649308224364, -0.625897284357438, 0.83280547753345, -0.625897284357438, -0.139663030393809, 1.96735207011525, -0.301741115048352, -0.301741115048352, -1.76044387693924, -0.139663030393809, 0.0577843342673918, 0.121189712724219, 1.51610803877442, -0.0056210441894354, -0.766485585671361, 1.51610803877442, -0.57626945030088, -1.02010709949867, -1.27372861332598, 1.0088650111198, 0.270645677706219, -0.834749808144522, 1.19678784152711, -0.296989842054972, 1.07728562684054, 0.0913923556763693, 0.121267909348011, -0.356740949398255, -0.864625361816163, 0.83828119746741, 1.30651856124931, -0.595043606688955, 0.545893694074002, 0.16558126048635, 5.23641370832172, -1.48243928506014, 0.419122882878118, 0.419122882878118, -0.0879603619054184, 0.0388104492904658, 0.311357673194754, 0.0940238518063364, 0.311357673194754, 0.0940238518063364, 9.43937817150828, -0.557977612358916, -0.557977612358916, 0.0940238518063364, -0.340643790970498, 0.528691494583171, -0.205722605421789, -0.596759109816679, 1.14786529440668, -0.747157765353175, -0.596759109816679, 2.59169238755704, -0.476440185387482, -0.95771588310427, -1.40891184971376, 0.425951747831495, 0.136489240515638, 0.520535462720394, -1.14366483350021, 0.648550870121979, 0.520535462720394, -1.91175727790973, 0.136489240515638, 0.90458168492515, 1.67267412933466, -0.631603203893873, -0.224811427137598, -0.610593308189137, 0.932534216017016, -0.610593308189137, -0.610593308189137, 2.86144362127471, -0.610593308189137, -0.996375189240675, -1.38215707029221, 0.546752334965478, 0.757884134731298, -0.344374342514091, 0.688992979903461, -0.344374342514091, -0.275483187686254, 1.44679568300967, -0.344374342514091, -0.137700878030581, -1.44663281975948, -0.137700878030581, -0.40214560409626, -0.593259833699538, 1.09491586112942, -0.911783549705002, 1.66825854993926, 2.91050104236056, -0.40214560409626, -0.84807880650391, -1.51697861011538, 0.426016057517946, -0.34473058195366, -0.622048342996663, 1.10411323084244, -0.740898812015093, -0.650346073715337, 2.92648708912503, -0.446602412540886, -0.899366104039667, -1.22761978037628, 0.36837223215692, 0.285643408957403, -0.513213860391233, 1.39175347420936, -1.46569752769153, 0.408544527318731, 2.74366577618398, -0.0830599461265831, -0.32886218284924, -1.92657672154651, -0.697565537933226, -0.328861155501701, -0.061730538550728, 0.0718347699247587, -1.26381831483011, 7.28436142760104, 1.6746184716306, -0.863122389403648, -0.729557080928162, -1.13025300635462, -1.13025300635462, -0.0766656163662548, 0.533294679362756, 1.54989517224444, -0.0766656163662548, 0.533294679362756, -0.279985714942592, 1.3465750736681, -0.686625912095266, -0.889946010671603, -0.483305813518929, 0.380765617904849, 0.156832553777284, -0.403000106541628, -0.291033574477846, -0.179067042414063, -0.403000106541628, -1.29873236305189, -1.07479929892432, 0.156832553777284, 0.156832553777284, -0.313536663149827, 0.0109306359630201, -0.151303013593403, 1.63326713152726, -1.44917221004479, -1.28693856048837, 1.79550078108368, 0.659865234188715, 1.79550078108368, 0.984332533301562, 0.183848573129592, 0.452709234957825, 0.0494182422154748, 1.52815188227076, -1.69817605966804, -1.69817605966804, 1.12486088952841, 0.856000227700176, 1.66258221318488, 1.12486088952841), .Dim = c(10L, 18L), .Dimnames = list( NULL, c("COMPACTNESS", "CIRCULARITY", "DISTANCE.CIRCULARITY", "RADIUS.RATIO", "PR.AXIS.ASPECT.RATIO", "MAX.LENGTH.ASPECT.RATIO", "SCATTER.RATIO", "ELONGATEDNESS", "PR.AXIS.RECTANGULARITY", "MAX.LENGTH.RECTANGULARITY", "SCALED.VARIANCE_MAJOR", "SCALED.VARIANCE_MINOR", "SCALED.RADIUS.OF.GYRATION", "SKEWNESS.ABOUT_MAJOR", "SKEWNESS.ABOUT_MINOR", "KURTOSIS.ABOUT_MAJOR", "KURTOSIS.ABOUT_MINOR", "HOLLOWS.RATIO" ))) kc<-kmeans(d,4) print(kc) # ... # Clustering vector: # [1] 4 2 4 2 1 3 2 2 2 4 # # Within cluster sum of squares by cluster: # [1] 0.00000 22.36071 0.00000 16.25689 # (between_SS / total_SS = 88.1 %) # ... kc$betweenss/kc$totss*100 # [1] 88.09212