How do I separate the pattern counts with R? - r
via a program I have received the following pattern count.
Counter({'CCCC': 22115, 'TTTT': 22043, 'AAAA': 22037, 'GGGG': 21930, 'AAAC': 154, 'TTAT': 152, 'CCCA': 152, 'CCTC': 152, 'GGGC': 151, 'TTTG': 150, 'GTGG': 149, 'GCCC': 148, 'CCGC': 145, 'CGGG': 145, 'TGGG': 144, 'AGAA': 144, 'TTGT': 144, 'GAAA': 142, 'CCCG': 142, 'CCCT': 142, 'TCCC': 141, 'CAAA': 139, 'ATTT': 137, 'CGCC': 134, 'GGTG': 133, 'GAGG': 133, 'TTTA': 132, 'CTTT': 131, 'TCTT': 131, 'ACCC': 130, 'AGGG': 130, 'GGAG': 129, 'AACA': 129, 'TAAA': 129, 'TATT': 128, 'TTTC': 128, 'AAGA': 127, 'GGGA': 126, 'ACAA': 126, 'TTCT': 125, 'CTCC': 124, 'GCGG': 124, 'ATAA': 123, 'GGCG': 120, 'CACC': 119, 'AAAT': 118, 'AATA': 117, 'AAAG': 114, 'GTTT': 114, 'TGTT': 112, 'GGGT': 112, 'CCAC': 110, 'CGCG': 45, 'AACC': 43, 'TTAA': 41, 'CTCT': 41, 'GGCC': 41, 'ACTC': 40, 'CTTC': 40, 'GCCG': 39, 'ATTA': 39, 'ACCT': 39, 'TGCG': 39, 'ATAT': 39, 'TCTC': 38, 'ACGG': 38, 'TATA': 37, 'ATCA': 37, 'CGGC': 37, 'CGAG': 36, 'AGAG': 36, 'GACA': 35, 'GTTG': 35, 'TGAG': 35, 'TGGT': 35, 'CCAA': 35, 'TTGG': 34, 'GTGT': 34, 'GCGC': 34, 'CACA': 34, 'GTAA': 34, 'GTAG': 34, 'TCCA': 34, 'TCCT': 34, 'AAGG': 34, 'GAGA': 34, 'GCTT': 34, 'GTGC': 33, 'CTAT': 33, 'TTGC': 33, 'CGGA': 33, 'AGGA': 32, 'GACG': 32, 'AATT': 32, 'CAAC': 32, 'CTGC': 32, 'CTAC': 32, 'ACGA': 32, 'CGAC': 32, 'CCGG': 32, 'TCTG': 32, 'GGAA': 32, 'GGAT': 32, 'TGCT': 32, 'TTAG': 32, 'GCTG': 32, 'GAGT': 31, 'AGGC': 31, 'TTCC': 31, 'ATGA': 31, 'TTCA': 31, 'CCAT': 31, 'AAGT': 31, 'GAGC': 31, 'GTAT': 31, 'CGAA': 31, 'TCAT': 31, 'ATTC': 31, 'TGTG': 30, 'AGTT': 30, 'ATCC': 30, 'AGCA': 30, 'GTCT': 30, 'TGTC': 30, 'TCAC': 30, 'CACT': 30, 'ACTA': 30, 'TAAT': 30, 'CCGT': 30, 'CCTA': 29, 'TCGG': 29, 'GGTA': 29, 'TATG': 29, 'AACG': 29, 'CACG': 29, 'GATT': 29, 'ATCT': 29, 'TGGC': 29, 'AGCC': 29, 'TATC': 29, 'GCTC': 29, 'GGCT': 29, 'TCTA': 29, 'AACT': 28, 'CCTT': 28, 'CTTA': 28, 'TGTA': 28, 'TAGT': 28, 'AGTG': 28, 'CCGA': 27, 'AATG': 27, 'CCTG': 27, 'CTGT': 27, 'AGTC': 27, 'GTCC': 27, 'GGTT': 27, 'ACAC': 26, 'TACC': 26, 'CATC': 26, 'CATA': 26, 'GTGA': 26, 'TGAA': 26, 'GGTC': 26, 'CTTG': 26, 'GCAC': 26, 'GGCA': 26, 'CGTC': 26, 'CTGG': 26, 'TAAG': 26, 'TCGT': 26, 'TGAT': 25, 'CAGA': 25, 'GAAC': 25, 'ACCA': 25, 'TTAC': 25, 'CATT': 25, 'AGAT': 25, 'CGGT': 25, 'ATTG': 25, 'TTGA': 25, 'GATA': 24, 'GGAC': 24, 'AAGC': 24, 'GTCA': 24, 'CAAT': 24, 'GCAG': 24, 'ACAT': 24, 'TGCC': 24, 'ATAG': 24, 'CGTG': 24, 'CGCA': 24, 'TAGG': 23, 'ACCG': 23, 'TTCG': 23, 'AGCG': 23, 'GTTC': 23, 'ACTT': 23, 'CGTT': 23, 'AGAC': 23, 'GCAT': 22, 'TCCG': 22, 'TAAC': 22, 'ACGC': 22, 'CAGC': 22, 'GACC': 22, 'CATG': 22, 'TCGA': 22, 'TAGA': 22, 'GCAA': 22, 'CTCG': 22, 'TACT': 22, 'AATC': 21, 'CGCT': 21, 'GAAT': 21, 'GCGT': 21, 'AGTA': 21, 'GCCA': 21, 'ATGG': 21, 'TCAA': 21, 'CTCA': 21, 'TGGA': 20, 'GAAG': 20, 'GATC': 20, 'TGCA': 20, 'GCCT': 19, 'GTCG': 19, 'CAAG': 19, 'TCGC': 19, 'CTGA': 19, 'GATG': 19, 'CTAA': 19, 'GCGA': 19, 'ATAC': 18, 'GTTA': 18, 'GCTA': 18, 'AGGT': 18, 'CCAG': 18, 'ACAG': 18, 'CTAG': 17, 'CGTA': 17, 'ACGT': 17, 'TACA': 17, 'AGCT': 16, 'CAGG': 16, 'ATGT': 16, 'ATCG': 16, 'ATGC': 15, 'TGAC': 14, 'TAGC': 14, 'ACTG': 14, 'TCAG': 14, 'CGAT': 14, 'TACG': 13, 'CAGT': 11, 'GTAC': 10, 'GACT': 9})
I want to convert it now as a list, so that in the first column "AAAA" there are all corresponding values and so also for all combinations. Does anyone have an idea how to program this well?
This is how I read the data into R:
daten <- read.table("/PATTERN.txt", header = FALSE, sep = "\t");
So far I've tried direct reading, but somehow it doesn't really work. It should look like this:
AAAA CCCC
1 22128 22127
Thank you very much!
If Lines shown reproducibly in the Note at the end contains the data then in it replace Counter( with [, ) with ] and ' with " and read that in using fromJSON:
library(jsonlite)
fromJSON(gsub("'", '"',
sub("\\)", "]",
sub("Counter.","[", Lines))))
giving:
CCCC TTTT AAAA GGGG AAAC TTAT CCCA CCTC GGGC TTTG GTGG GCCC CCGC CGGG
1 22115 22043 22037 21930 154 152 152 152 151 150 149 148 145 145
TGGG AGAA TTGT GAAA CCCG CCCT TCCC CAAA ATTT CGCC GGTG GAGG TTTA CTTT TCTT
1 144 144 144 142 142 142 141 139 137 134 133 133 132 131 131
ACCC AGGG GGAG AACA TAAA TATT TTTC AAGA GGGA ACAA TTCT CTCC GCGG ATAA GGCG
1 130 130 129 129 129 128 128 127 126 126 125 124 124 123 120
CACC AAAT AATA AAAG GTTT TGTT GGGT CCAC CGCG AACC TTAA CTCT GGCC ACTC CTTC
1 119 118 117 114 114 112 112 110 45 43 41 41 41 40 40
GCCG ATTA ACCT TGCG ATAT TCTC ACGG TATA ATCA CGGC CGAG AGAG GACA GTTG TGAG
1 39 39 39 39 39 38 38 37 37 37 36 36 35 35 35
TGGT CCAA TTGG GTGT GCGC CACA GTAA GTAG TCCA TCCT AAGG GAGA GCTT GTGC CTAT
1 35 35 34 34 34 34 34 34 34 34 34 34 34 33 33
TTGC CGGA AGGA GACG AATT CAAC CTGC CTAC ACGA CGAC CCGG TCTG GGAA GGAT TGCT
1 33 33 32 32 32 32 32 32 32 32 32 32 32 32 32
TTAG GCTG GAGT AGGC TTCC ATGA TTCA CCAT AAGT GAGC GTAT CGAA TCAT ATTC TGTG
1 32 32 31 31 31 31 31 31 31 31 31 31 31 31 30
AGTT ATCC AGCA GTCT TGTC TCAC CACT ACTA TAAT CCGT CCTA TCGG GGTA TATG AACG
1 30 30 30 30 30 30 30 30 30 30 29 29 29 29 29
CACG GATT ATCT TGGC AGCC TATC GCTC GGCT TCTA AACT CCTT CTTA TGTA TAGT AGTG
1 29 29 29 29 29 29 29 29 29 28 28 28 28 28 28
CCGA AATG CCTG CTGT AGTC GTCC GGTT ACAC TACC CATC CATA GTGA TGAA GGTC CTTG
1 27 27 27 27 27 27 27 26 26 26 26 26 26 26 26
GCAC GGCA CGTC CTGG TAAG TCGT TGAT CAGA GAAC ACCA TTAC CATT AGAT CGGT ATTG
1 26 26 26 26 26 26 25 25 25 25 25 25 25 25 25
TTGA GATA GGAC AAGC GTCA CAAT GCAG ACAT TGCC ATAG CGTG CGCA TAGG ACCG TTCG
1 25 24 24 24 24 24 24 24 24 24 24 24 23 23 23
AGCG GTTC ACTT CGTT AGAC GCAT TCCG TAAC ACGC CAGC GACC CATG TCGA TAGA GCAA
1 23 23 23 23 23 22 22 22 22 22 22 22 22 22 22
CTCG TACT AATC CGCT GAAT GCGT AGTA GCCA ATGG TCAA CTCA TGGA GAAG GATC TGCA
1 22 22 21 21 21 21 21 21 21 21 21 20 20 20 20
GCCT GTCG CAAG TCGC CTGA GATG CTAA GCGA ATAC GTTA GCTA AGGT CCAG ACAG CTAG
1 19 19 19 19 19 19 19 19 18 18 18 18 18 18 17
CGTA ACGT TACA AGCT CAGG ATGT ATCG ATGC TGAC TAGC ACTG TCAG CGAT TACG CAGT
1 17 17 17 16 16 16 16 15 14 14 14 14 14 13 11
GTAC GACT
1 10 9
Note
Lines <- "
Counter({'CCCC': 22115, 'TTTT': 22043, 'AAAA': 22037, 'GGGG':21930, 'AAAC': 154, 'TTAT': 152, 'CCCA': 152, 'CCTC': 152, 'GGGC': 151, 'TTTG': 150, 'GTGG': 149, 'GCCC': 148, 'CCGC': 145, 'CGGG': 145, 'TGGG': 144, 'AGAA': 144, 'TTGT': 144, 'GAAA': 142, 'CCCG': 142, 'CCCT': 142, 'TCCC': 141, 'CAAA': 139, 'ATTT': 137, 'CGCC': 134, 'GGTG': 133, 'GAGG': 133, 'TTTA': 132, 'CTTT': 131, 'TCTT': 131, 'ACCC': 130, 'AGGG': 130, 'GGAG': 129, 'AACA': 129, 'TAAA': 129, 'TATT': 128, 'TTTC': 128, 'AAGA': 127, 'GGGA': 126, 'ACAA': 126, 'TTCT': 125, 'CTCC': 124, 'GCGG': 124, 'ATAA': 123, 'GGCG': 120, 'CACC': 119, 'AAAT': 118, 'AATA': 117, 'AAAG': 114, 'GTTT': 114, 'TGTT': 112, 'GGGT': 112, 'CCAC': 110, 'CGCG': 45, 'AACC': 43, 'TTAA': 41, 'CTCT': 41, 'GGCC': 41, 'ACTC': 40, 'CTTC': 40, 'GCCG': 39, 'ATTA': 39, 'ACCT': 39, 'TGCG': 39, 'ATAT': 39, 'TCTC': 38, 'ACGG': 38, 'TATA': 37, 'ATCA': 37, 'CGGC': 37, 'CGAG': 36, 'AGAG': 36, 'GACA': 35, 'GTTG': 35, 'TGAG': 35, 'TGGT': 35, 'CCAA': 35, 'TTGG': 34, 'GTGT': 34, 'GCGC': 34, 'CACA': 34, 'GTAA': 34, 'GTAG': 34, 'TCCA': 34, 'TCCT': 34, 'AAGG': 34, 'GAGA': 34, 'GCTT': 34, 'GTGC': 33, 'CTAT': 33, 'TTGC': 33, 'CGGA': 33, 'AGGA': 32, 'GACG': 32, 'AATT': 32, 'CAAC': 32, 'CTGC': 32, 'CTAC': 32, 'ACGA': 32, 'CGAC': 32, 'CCGG': 32, 'TCTG': 32, 'GGAA': 32, 'GGAT': 32, 'TGCT': 32, 'TTAG': 32, 'GCTG': 32, 'GAGT': 31, 'AGGC': 31, 'TTCC': 31, 'ATGA': 31, 'TTCA': 31, 'CCAT': 31, 'AAGT': 31, 'GAGC': 31, 'GTAT': 31, 'CGAA': 31, 'TCAT': 31, 'ATTC': 31, 'TGTG': 30, 'AGTT': 30, 'ATCC': 30, 'AGCA': 30, 'GTCT': 30, 'TGTC': 30, 'TCAC': 30, 'CACT': 30, 'ACTA': 30, 'TAAT': 30, 'CCGT': 30, 'CCTA': 29, 'TCGG': 29, 'GGTA': 29, 'TATG': 29, 'AACG': 29, 'CACG': 29, 'GATT': 29, 'ATCT': 29, 'TGGC': 29, 'AGCC': 29, 'TATC': 29, 'GCTC': 29, 'GGCT': 29, 'TCTA': 29, 'AACT': 28, 'CCTT': 28, 'CTTA': 28, 'TGTA': 28, 'TAGT': 28, 'AGTG': 28, 'CCGA': 27, 'AATG': 27, 'CCTG': 27, 'CTGT': 27, 'AGTC': 27, 'GTCC': 27, 'GGTT': 27, 'ACAC': 26, 'TACC': 26, 'CATC': 26, 'CATA': 26, 'GTGA': 26, 'TGAA': 26, 'GGTC': 26, 'CTTG': 26, 'GCAC': 26, 'GGCA': 26, 'CGTC': 26, 'CTGG': 26, 'TAAG': 26, 'TCGT': 26, 'TGAT': 25, 'CAGA': 25, 'GAAC': 25, 'ACCA': 25, 'TTAC': 25, 'CATT': 25, 'AGAT': 25, 'CGGT': 25, 'ATTG': 25, 'TTGA': 25, 'GATA': 24, 'GGAC': 24, 'AAGC': 24, 'GTCA': 24, 'CAAT': 24, 'GCAG': 24, 'ACAT': 24, 'TGCC': 24, 'ATAG': 24, 'CGTG': 24, 'CGCA': 24, 'TAGG': 23, 'ACCG': 23, 'TTCG': 23, 'AGCG': 23, 'GTTC': 23, 'ACTT': 23, 'CGTT': 23, 'AGAC': 23, 'GCAT': 22, 'TCCG': 22, 'TAAC': 22, 'ACGC': 22, 'CAGC': 22, 'GACC': 22, 'CATG': 22, 'TCGA': 22, 'TAGA': 22, 'GCAA': 22, 'CTCG': 22, 'TACT': 22, 'AATC': 21, 'CGCT': 21, 'GAAT': 21, 'GCGT': 21, 'AGTA': 21, 'GCCA': 21, 'ATGG': 21, 'TCAA': 21, 'CTCA': 21, 'TGGA': 20, 'GAAG': 20, 'GATC': 20, 'TGCA': 20, 'GCCT': 19, 'GTCG': 19, 'CAAG': 19, 'TCGC': 19, 'CTGA': 19, 'GATG': 19, 'CTAA': 19, 'GCGA': 19, 'ATAC': 18, 'GTTA': 18, 'GCTA': 18, 'AGGT': 18, 'CCAG': 18, 'ACAG': 18, 'CTAG': 17, 'CGTA': 17, 'ACGT': 17, 'TACA': 17, 'AGCT': 16, 'CAGG': 16, 'ATGT': 16, 'ATCG': 16, 'ATGC': 15, 'TGAC': 14, 'TAGC': 14, 'ACTG': 14, 'TCAG': 14, 'CGAT': 14, 'TACG': 13, 'CAGT': 11, 'GTAC': 10, 'GACT': 9})"
This answer may help you in this particular case, but you should insist that whoever produced that result to export in such a way that can be easily imported with every programming language. Here you have a string representation of a python object which is definitely not a good way for exchanging data.
However, you can try this:
#place here the correct path to the file
fn <- "pattern.txt"
#here we read the content of the file as is
filecontent <- readChar(fn,file.info(fn)$size)
#we manipulate the string a bit to have an R list
res <- eval(parse(text = gsub("[\\{\\}\n]", "",
gsub(":", "=", sub("Counter", "list", filecontent)))))
Related
Fill new column with different numbers depending on how often values appear in other column, R
I have a dataframe with different company IDs appearing from once to over 30 times in different rows. I want to add a new column "di_Flex" and fill it with specific values depending on how often the same company ID appears in a column: If it appears twice in the column, add the value 6 to the new column "di_Flex", if it appears 3x, add "8", if it appears 4x add "10", if it appears 5x add "12.8", if it appears 6x add "14.67", if it appears 7 or more times add "16". Here is the dataframe: c(0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 15, 16, 17, 17, 18, 18, 19, 20, 21, 22, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, 29, 30, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, 36, 36, 37, 38, 38, 38, 38, 38, 38, 39, 40, 41, 41, 41, 42, 42, 42, 43, 43, 43, 44, 45, 45, 46, 46, 46, 47, 48, 49, 50, 50, 51, 53, 54, 54, 54, 54, 55, 57, 57, 57, 59, 59, 59, 59, 60, 60, 60, 60, 61, 61, 62, 62, 62, 63, 63, 64, 64, 64, 64, 65, 65, 66, 66, 66, 66, 66, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA) Thank you for your help!
Assuming your data is called df with a column value: library(tidyverse) left_join(df, df %>% group_by(value) %>% tally()) %>% mutate(di_Flex = case_when(n == 2 ~ 6, n == 3 ~ 8, n == 4 ~ 10, n == 5 ~ 12.8, n == 6 ~ 14.67, n >= 7 ~ 16)) %>% select(-n) This gives us: 1 0 12.8 2 0 12.8 3 0 12.8 4 0 12.8 5 0 12.8 6 1 NA 7 2 NA 8 3 NA 9 4 NA 10 5 8.0 11 5 8.0 12 5 8.0 13 6 16.0 14 6 16.0 15 6 16.0 16 6 16.0 17 6 16.0 18 6 16.0 19 6 16.0 20 6 16.0 Data: df <- data.frame(value = c(0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 15, 16, 17, 17, 18, 18, 19, 20, 21, 22, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, 29, 30, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, 36, 36, 37, 38, 38, 38, 38, 38, 38, 39, 40, 41, 41, 41, 42, 42, 42, 43, 43, 43, 44, 45, 45, 46, 46, 46, 47, 48, 49, 50, 50, 51, 53, 54, 54, 54, 54, 55, 57, 57, 57, 59, 59, 59, 59, 60, 60, 60, 60, 61, 61, 62, 62, 62, 63, 63, 64, 64, 64, 64, 65, 65, 66, 66, 66, 66, 66, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA))
tidyverse and dplyr: Conditional replacement of values in a column based on other column [duplicate]
This question already has answers here: Can dplyr package be used for conditional mutating? (5 answers) Closed 2 years ago. I want to mutate a column A4 by A3 but reducing value of A3 by 1 if Total == 63. What am I doing wrong here? tb1 %>% mutate(A4 = replace(A3, Total == 63, A3-1)) The complete code with data is here library(tidyverse) tb1 <- structure( list( A1 = c(16, 11, 16, 18, 20, 19, 16, 18, 20, 15, 17, 19, 19, 19, 16, 19, 16, 15, 19, 19, 16, 18, 18, 19, 19, 18, 20, 18, 19, 19, 19, 19, 17, 19, 17, 16, 18, 19, 16, 18, 17, 19, 19, 20, 17, 16, 18, 16, 15, 19, 19, 17, 20, 18, 16, 19, 19, 15, 17, 17, 19, 19, 16, 17, 18, 19, 17, 19, 17, 15, 19, 16, 17 ) , A2 = c(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ) , A3 = c(33, 34, 38, 36, 36, 34, 41, 36, 40, 38, 38, 41, 38, 34, 33, 36, 41, 40, 41, 38, 41, 33, 40, 38, 40, 38, 41, 41, 40, 41, 40, 38, 34, 40, 36, 41, 40, 40, 33, 38, 36, 41, 40, 40, 28, 41, 40, 41, 33, 41, 36, 36, 40, 34, 41, 41, 38, 38, 41, 38, 41, 41, 36, 40, 38, 38, 40, 41, 38, 22, 36, 34, 38 ) , Total = c(57, 53, 62, 62, 64, 61, 65, 62, 68, 61, 63, 68, 65, 61, 57, 63, 65, 63, 68, 65, 65, 59, 66, 65, 67, 64, 69, 67, 67, 68, 67, 65, 59, 67, 61, 65, 66, 67, 57, 64, 61, 68, 67, 68, 53, 65, 66, 65, 56, 68, 63, 61, 68, 60, 65, 68, 65, 61, 66, 63, 68, 68, 60, 65, 64, 65, 65, 68, 63, 45, 63, 58, 63 ) ) , class = "data.frame" , row.names = c(NA, -73L) ) tb1 %>% filter(Total == 63) #> A1 A2 A3 Total #> 1 17 8 38 63 #> 2 19 8 36 63 #> 3 15 8 40 63 #> 4 19 8 36 63 #> 5 17 8 38 63 #> 6 17 8 38 63 #> 7 19 8 36 63 #> 8 17 8 38 63 tb2 <- tb1 %>% mutate(A4 = replace(A3, Total == 63, A3-1)) %>% mutate(Total = A1 + A2 + A3) #> Warning: Problem with `mutate()` input `A4`. #> x number of items to replace is not a multiple of replacement length #> ℹ Input `A4` is `replace(A3, Total == 63, A3 - 1)`. tb2 %>% filter(Total == 62) #> A1 A2 A3 Total #> 1 16 8 38 62 #> 2 18 8 36 62 #> 3 18 8 36 62
You are better using ifelse here : library(dplyr) tb1 %>% mutate(A4 = ifelse(Total == 63, A3 -1, A3)) As far as why replace does not work if you check the source code of replace : replace function (x, list, values) { x[list] <- values x } It assigns values to x after subsetting for list. When you use : tb1 %>% mutate(A4 = replace(A3, Total == 63, A3-1)) your values is of length length(tb1$A3) but list is of length sum(tb1$Total == 63) which do not match hence you get the warning of number of items to replace is not a multiple of replacement length, since it tries recycling those values but still the length is unequal. If you want to make replace work you can try : tb1 %>% mutate(A4 = replace(A3, Total == 63, A3[Total == 63] -1)) but again as I mentioned it is easier to just use ifelse here.
creating named vector from a csv file did not work
Creating named vector where names are associated to GO id from a csv file did not work. > head(read.delim("~/GOmapping.tsv", sep = '\t')) V1 V14 1 sp0000005 GO:0003723 2 sp0000006 GO:0016021 3 sp0000007 GO:0003700,GO:0006355,GO:0043565 4 sp0000016 GO:0046983 5 sp0000017 GO:0004672,GO:0005524,GO:0006468 6 sp0000022 GO:0003677,GO:0046983 > head(read.delim("~/GOmapping.tsv", sep = '\t'))[1] V1 1 sp0000005 2 sp0000006 3 sp0000007 4 sp0000016 5 sp0000017 6 sp0000022 > head(read.delim("~/GOmapping.tsv", sep = '\t'))[2] V14 1 GO:0003723 2 GO:0016021 3 GO:0003700,GO:0006355,GO:0043565 4 GO:0046983 5 GO:0004672,GO:0005524,GO:0006468 6 GO:0003677,GO:0046983 > geneID2GO <- read.delim("~/GOmapping.tsv", sep = '\t'))[2] > geneID2GO <- read.delim("~/GOmapping.tsv", sep = '\t')[2] > names(geneID2GO) <- read.delim("~/GOmapping.tsv", sep = '\t')[1] > head(geneID2GO) c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 57, 58, 59, 60, 6 ... 1 GO:0003723 2 GO:0016021 3 GO:0003700,GO:0006355,GO:0043565 4 GO:0046983 5 GO:0004672,GO:0005524,GO:0006468 6 GO:0003677,GO:0046983 What did I miss? Thank you in advance.
If you want a vector as result, maybe you could try to coerce your values and names (column 1) to character. data <- read.delim("~/GOmapping.tsv", sep = '\t') geneID2GO <- as.character(data[,2]) names(geneID2GO) <- as.character(data[,1]) head(geneID2GO) sp0000005 sp0000006 sp0000007 "GO:0003723" "GO:0016021" "GO:0003700,GO:0006355,GO:0043565" sp0000016 "GO:0046983" Alternatively, you can display the result as follows: cbind(geneID2GO) geneID2GO sp0000005 "GO:0003723" sp0000006 "GO:0016021" sp0000007 "GO:0003700,GO:0006355,GO:0043565" sp0000016 "GO:0046983"
networkx can't calculate algebraic connectivity
I can compute the algebraic connectivity of the complete graph on 20 vertices in fraction of a second using import networkx D = {} for i in range(20): D[i] = [j for j in range(20)] G = networkx.Graph(D) networkx.algebraic_connectivity(G) However, in a process I generate a graph (on 20 nodes) that I ask networkx to compute its algebraic connectivity, and it keeps running for ever with no errors. Here is the graph: import networkx D = {0: [32, 33, 19, 5, 21, 37, 6, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 5: [32, 0, 33, 19, 37, 21, 6, 22, 38, 39, 41, 26, 42, 11, 43, 44, 28, 15, 31], 6: [0, 32, 33, 19, 5, 37, 21, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 11: [32, 0, 33, 19, 21, 37, 5, 6, 22, 38, 39, 41, 26, 42, 43, 28, 44, 15, 31], 15: [0, 32, 33, 19, 5, 21, 37, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 31], 19: [0, 32, 33, 5, 21, 37, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 21: [32, 0, 33, 19, 37, 5, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 22: [32, 33, 19, 5, 21, 37, 6, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 26: [0, 32, 33, 19, 5, 21, 37, 6, 22, 38, 39, 41, 42, 11, 43, 28, 44, 15, 31], 28: [32, 0, 33, 19, 21, 37, 5, 6, 22, 38, 39, 41, 26, 42, 11, 43, 44, 15, 31], 31: [32, 0, 33, 19, 5, 21, 37, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15], 32: [0, 33, 19, 5, 21, 37, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 31, 15], 33: [0, 32, 19, 5, 21, 37, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 37: [32, 0, 33, 19, 5, 21, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 31, 15], 38: [32, 0, 33, 19, 21, 37, 5, 6, 22, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 39: [0, 32, 33, 19, 5, 21, 37, 6, 22, 38, 41, 26, 42, 11, 43, 28, 44, 15, 31], 41: [32, 0, 33, 19, 21, 37, 5, 38, 6, 22, 39, 26, 42, 11, 43, 28, 44, 15, 31], 42: [32, 0, 33, 19, 21, 37, 5, 6, 22, 38, 39, 41, 26, 11, 43, 28, 44, 15, 31], 43: [32, 0, 33, 19, 21, 37, 5, 6, 22, 38, 39, 41, 26, 42, 11, 28, 44, 15, 31], 44: [32, 0, 33, 19, 5, 21, 37, 38, 6, 22, 39, 41, 42, 26, 11, 43, 28, 15, 31]} G = networkx.Graph(D) networkx.algebraic_connectivity(G) Any reasons why it is so, and how to fix it?
There seems to be a bug in the tracemin method that is the default with networkx.algebraic_connectivity(). Try using networkx.algebraic_connectivity(G, method='lanczos')
Convert list of lists to dataframe
I got a nested list, named mylist which has length 4. Each element of this list is an experiment: exp1.1, exp1.2, exp2.1 and exp2.2. Each experiment contains observations of length (in days) of four plant growth stages: EM-V6 V6-R0 R0-R4 and R4-R9. Each growth stage is organized as a data frame with year and mean. Here is the complete data: mylist=structure(list(exp1.1 = structure(list(`EM-V6` = structure(list( year = 2011:2100, mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31, 33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34, 30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28, 28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28, 30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27, 26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28, 25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100, mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30, 32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33, 31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31, 30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29, 31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29, 30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30, 29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100, mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33, 32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34, 33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29, 32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31, 30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30, 31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100, mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28, 29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30, 28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25, 26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25, 28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25, 25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26, 26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4", "R4-R9")), exp1.2 = structure(list(`EM-V6` = structure(list(year = 2011:2100, mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31, 33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34, 30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28, 28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28, 30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27, 26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28, 25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100, mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30, 32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33, 31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31, 30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29, 31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29, 30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30, 29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100, mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33, 32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34, 33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29, 32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31, 30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30, 31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100, mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28, 29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30, 28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25, 26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25, 28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25, 25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26, 26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4", "R4-R9")), exp2.1 = structure(list(`EM-V6` = structure(list(year = 2011:2100, mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31, 33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34, 30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28, 28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28, 30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27, 26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28, 25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100, mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30, 32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33, 31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31, 30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29, 31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29, 30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30, 29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100, mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33, 32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34, 33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29, 32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31, 30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30, 31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100, mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28, 29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30, 28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25, 26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25, 28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25, 25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26, 26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4", "R4-R9")), exp2.2 = structure(list(`EM-V6` = structure(list(year = 2011:2100, mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31, 33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34, 30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28, 28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28, 30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27, 26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28, 25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100, mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30, 32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33, 31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31, 30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29, 31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29, 30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30, 29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100, mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33, 32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34, 33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29, 32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31, 30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30, 31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100, mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28, 29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30, 28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25, 26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25, 28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25, 25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26, 26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA, -90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4", "R4-R9"))), .Names = c("exp1.1", "exp1.2", "exp2.1", "exp2.2" )) What I need to do is to "unlist" this nested list to a data frame that will look like this: YEAR EXP EM-V6 V6-R0 R0-R4 R4-R9 2011 exp1.1 34 30 31 27 2011 exp1.2 34 30 31 27 2011 exp2.1 34 30 31 27 2011 exp1.1 34 30 31 27 Which means: - first year, first experiment, and growth stages. - first year, second experiment and growth stages. - first year, third experiment and growth stages - first year, fourth experiment and growth stages - second year, first experiment and growth stages and so on. How to perform that data transformation?
An alternative using rbindlist from the data.table-package twice: library(data.table) # bind the dataframes in the 'listed lists' together and include the year with the 'id'-parameter # the resulting 'data.table's are returned as a list step1 <- lapply(mylist, rbindlist, id = 'stages') # bind the resulting list together and include the experiment id step2 <- rbindlist(step1, id = 'experiment') # reshape to wide format dcast(step2, year + experiment ~ stages, value.var = 'mean') Or in one go: dcast(rbindlist(lapply(mylist, rbindlist, id = 'stages'), id = 'experiment'), year + experiment ~ stages, value.var = 'mean') which gives: year experiment EM-V6 R0-R4 R4-R9 V6-R0 1: 2011 exp1.1 34 31 27 30 2: 2011 exp1.2 34 31 27 30 3: 2011 exp2.1 34 31 27 30 4: 2011 exp2.2 34 31 27 30 5: 2012 exp1.1 34 32 29 33 --- 356: 2099 exp2.2 30 30 25 29 357: 2100 exp1.1 26 30 24 29 358: 2100 exp1.2 26 30 24 29 359: 2100 exp2.1 26 30 24 29 360: 2100 exp2.2 26 30 24 29
Alternate tidyverse: library(tidyverse) map_df(mylist, ~bind_rows(., .id="id"), .id="EXP") %>% spread(id, mean)
We can use tidyverse with more compact and readable code library(dplyr) library(tidyr) library(purrr) res1 <- mylist %>% #bind the inner datasets and create an id column map(bind_rows, .id = "id") %>% #bind the outer datasets and create an EXP column bind_rows(.id = "EXP") %>% #reshape to wide format spread(id, mean) head(res1, 4) # EXP year EM-V6 R0-R4 R4-R9 V6-R0 #1 exp1.1 2011 34 31 27 30 #2 exp1.1 2012 34 32 29 33 #3 exp1.1 2013 32 32 28 33 #4 exp1.1 2014 28 33 28 32 Or we can approach this by looping through the mylist with lapply, then create a new column 'name' usign Map by cbinding the names of the inner list elements, then rbind the list elements with do.call(rbind, now do a second Map to create a new column based on the names of 'mylist', rbind the list elements and then reshape from base R to convert it to 'wide' res <- do.call(rbind, Map(cbind, lapply(mylist, function(x) do.call(rbind, Map(cbind, x, name = names(x)))), EXP= names(mylist))) res2 <- reshape(res, idvar = c("year", "EXP"), timevar = "name", direction = "wide") row.names(res2) <- NULL head(res2, 4) # year EXP mean.EM-V6 mean.V6-R0 mean.R0-R4 mean.R4-R9 #1 2011 exp1.1 34 30 31 27 #2 2012 exp1.1 34 33 32 29 #3 2013 exp1.1 32 33 32 28 #4 2014 exp1.1 28 32 33 28 NOTE: No external packages used (100% base R) or use dcast from reshape2 to transform to 'wide' format library(reshape2) res2 <- dcast(res, year + EXP~name, value.var = "mean") head(res2, 4) # year EXP EM-V6 V6-R0 R0-R4 R4-R9 #1 2011 exp1.1 34 30 31 27 #2 2011 exp1.2 34 30 31 27 #3 2011 exp2.1 34 30 31 27 #4 2011 exp2.2 34 30 31 27