Diffie Hellman with not prime modulus - math

i've been reading a lot about Diffie-Hellman backdoor when the modulus is not prime.
But I need help to resolve an example in order to finally understand.
So I have the next parameters:
p = 16289129
g = 5
A = 7835875
B = 3410160
I know that p is factorizable: 23 · 708223
And how could I resolve the discrete_log with a not prime modulus; this doesn't work:
p = 16289129
g = 5
A = 7835875
B = 3410160
discrete_log(A, g, p)

The only way i know to do this is brute force using a for loop to the range of p, you can recover A and B quite quickly with small numbers, but obviously impossible for anything much larger than what you are doing. Here is some code that recovers A & B.
def egcd(a, b):
if a == 0:
return (b, 0, 1)
else:
g, y, x = egcd(b % a, a)
#print(g,y,x)
#print(g,x - (b // a) * y, x-(b//a), (b//a)*y, ( b//a ), y)
return (g, x - (b // a) * y, y)
def modinv(a, m):
g, x, y = egcd(a, m)
if g != 1:
raise Exception('modular inverse does not exist')
else:
return x % m
for x in range(0, 16289129):
# Variables Used
sharedPrime = 16289129 # p
sharedBase = 5 #1009 # g
#sharedPrime = 1031267
#sharedBase = 111029
aliceSecret = x # a
bobSecret = 3410160 # b
# Begin
#print( "Publicly Shared Variables:")
#print( " Publicly Shared Prime: " , sharedPrime )
#print( " Publicly Shared Base: " , sharedBase )
# Alice Sends Bob A = g^a mod p
#A = (sharedBase**aliceSecret) % sharedPrime
A = pow(sharedBase,aliceSecret, sharedPrime)
#print( "\n Alice Sends Over Public Chanel: " , A )
# Bob Sends Alice B = g^b mod p
#B = (sharedBase ** bobSecret) % sharedPrime
B = pow(sharedBase,bobSecret, sharedPrime)
#print( "\n Bob Sends Over Public Chanel: ", B )
#print( "\n------------\n" )
#print( "Privately Calculated Shared Secret:" )
# Alice Computes Shared Secret: s = B^a mod p
# aliceSharedSecret = (B ** aliceSecret) % sharedPrime
#aliceSharedSecret = pow(B,aliceSecret,sharedPrime)
#print( " Alice Shared Secret: ", aliceSharedSecret )
# Bob Computes Shared Secret: s = A^b mod p
#bobSharedSecret = (A**bobSecret) % sharedPrime
bobSharedSecret = pow(A,bobSecret,sharedPrime)
#print( " Bob Shared Secret: ", bobSharedSecret )
compare = bobSecret ^ aliceSecret
mi = modinv(sharedBase,sharedPrime)
c=A
d=A
iterx=0
oldc=A
oldd=A
while c !=0:
oldc = c
oldd = d
c=c*mi%sharedPrime
d=(c*mi**2)%sharedPrime
#print(mi)
mi>>=1
iterx+=1
#print(iterx, c)
dfcompare = oldc ^ A
if dfcompare == aliceSecret: # or dfcompare == A or dfcompare == A or dfcompare == bobSharedSecret:
print("alice Key found inside: ", aliceSecret,bobSecret,aliceSecret^bobSecret, mi, c, oldc, A,bobSharedSecret,B, dfcompare == bobSecret, dfcompare == aliceSecret, dfcompare == 266)
if dfcompare == bobSecret: # or dfcompare == A or dfcompare == B or dfcompare == bobSharedSecret:
print("bob Key found inside: ", aliceSecret,bobSecret,aliceSecret^bobSecret, mi, c, oldc, A,bobSharedSecret,B, dfcompare == bobSecret, dfcompare == aliceSecret, dfcompare == 266)
dfcompare = oldd ^ A
if dfcompare == aliceSecret: # or dfcompare == A or dfcompare == A or dfcompare == bobSharedSecret:
print("alice Key found inside: ", aliceSecret,bobSecret,aliceSecret^bobSecret, mi, c, oldc, A,bobSharedSecret,B, dfcompare == bobSecret, dfcompare == aliceSecret, dfcompare == 266)
if dfcompare == bobSecret: # or dfcompare == A or dfcompare == B or dfcompare == bobSharedSecret:
print("bob Key found inside: ", aliceSecret,bobSecret,aliceSecret^bobSecret, mi, c, oldc, A,bobSharedSecret,B, dfcompare == bobSecret, dfcompare == aliceSecret, dfcompare == 266)
#print(iterx, oldc)
dfcompare = oldc ^ A
if dfcompare == aliceSecret: # or dfcompare == A or dfcompare == B or dfcompare == bobSharedSecret:
print("alice Key found outside: ", aliceSecret,bobSecret, aliceSecret^bobSecret, mi, c, oldc, A,bobSharedSecret,B, dfcompare == bobSecret, dfcompare == aliceSecret, dfcompare == 266)
if dfcompare == bobSecret: # or dfcompare == A or dfcompare == B or dfcompare == bobSharedSecret:
print("bob Key found outside: ", aliceSecret,bobSecret, aliceSecret^bobSecret, mi, c, oldc, A,bobSharedSecret,B, dfcompare == bobSecret, dfcompare == aliceSecret, dfcompare == 266)
#break
dfcompare = oldd ^ A
if dfcompare == aliceSecret: # or dfcompare == A or dfcompare == B or dfcompare == bobSharedSecret:
print("alice Key found outside: ", aliceSecret,bobSecret, aliceSecret^bobSecret, mi, c, oldc, A,bobSharedSecret,B, dfcompare == bobSecret, dfcompare == aliceSecret, dfcompare == 266)
if dfcompare == bobSecret: # or dfcompare == A or dfcompare == B or dfcompare == bobSharedSecret:
print("bob Key found outside: ", aliceSecret,bobSecret, aliceSecret^bobSecret, mi, c, oldc, A,bobSharedSecret,B, dfcompare == bobSecret, dfcompare == aliceSecret, dfcompare == 266)
output:
alice Key found inside: 0 3410160 3410160 1628913 3257826 1 1 1 7231001 False True False
alice Key found inside: 0 3410160 3410160 1628913 3257826 1 1 1 7231001 False True False
alice Key found inside: 514345 3410160 3396057 24 3399166 5388270 1550861 9501427 7231001 False True False
alice Key found inside: 736913 3410160 4142689 25451 181780 1390738 5482147 13392747 7231001 False True False
bob Key found inside: 805168 3410160 3686848 795 14730017 2027477 12797655 13227860 7231001 True False False
alice Key found inside: 1090405 3410160 2403221 397 5406755 9329510 10418179 11317568 7231001 False True False
alice Key found inside: 1319263 3410160 2107823 397 12968756 10691327 11994016 9408910 7231001 False True False
alice Key found inside: 1520691 3410160 2309315 198 15131181 6151666 4909505 3061289 7231001 False True False
bob Key found inside: 1812398 3410160 3125086 0 9989884 9989884 11298316 15863668 7231001 True False False
bob Key found inside: 1812398 3410160 3125086 0 0 9989884 11298316 15863668 7231001 True False False
bob Key found inside: 1812398 3410160 3125086 0 0 9989884 11298316 15863668 7231001 True False False
bob Key found outside: 1812398 3410160 3125086 0 0 9989884 11298316 15863668 7231001 True False False
bob Key found outside: 1812398 3410160 3125086 0 0 9989884 11298316 15863668 7231001 True False False
bob Key found inside: 1848360 3410160 2637016 6 5892567 12707894 16113862 3527100 7231001 True False False
bob Key found inside: 2076229 3410160 2860725 12 4861935 6311004 5522604 5891481 7231001 True False False
bob Key found inside: 2637508 3410160 1848884 198 14344704 14930223 14146015 4071246 7231001 True False False
bob Key found inside: 3121830 3410160 1813078 3181 11985609 834007 3715367 14209381 7231001 True False False
alice Key found inside: 3129104 3410160 1816544 101807 3083406 16029624 1760214 8757757 7231001 False True False
alice Key found inside: 3789467 3410160 907883 99 6405853 2089061 1155243 11032296 7231001 False True False

Related

how to remove these characters from a string c () \ ][ ""

I have a string like this
st <- "c(c(c(\"E\" >= \"E\", \"B\" <= \"E\" | \"D\" <= \"B\"), \"A\" >= \"C\" | \"A\" >= \"A\"), c(\"E\" >= \"C\", \"D\" <= \"C\" | \"C\" <= \"C\")) []"
I want to remove these characters from it c () \ ][ ""
i would like to get this
"E >= E, B <= E | D <= B, A >= C | A >= A, E >= C, D <= C | C <= C"
Here is one approach that could solve your problem:
gsub('[c()"]| \\[\\]', "", st)
# [1] "E >= E, B <= E | D <= B, A >= C | A >= A, E >= C, D <= C | C <= C"

looping through rows of dataframe in R to create vector based on conditions doesn´t work

enter image description hereI am trying to create a binary vector by looping through my dataframe row by row. If the first non-zero number in the row is a 1, I want a 1 to be printed into the vector, if it is a -1 I want a 0 printed. The dataframe includes 1, 0 and -1 as entries. My code so far looks like this but doesn´t output the desired vector (only the first entry is correct, every other entry is just a zero).
counter <- 0
Choice <- vector(length = nrow(Temp))
for (row in seq_along(nrow(Temp))) {
counter <- counter + 1
Stereo <- Temp[row, 1]
Mono <- Temp[row, 2]
MS <- Temp[row, 3]
HT1RB1 <- Temp[row, 4]
HT0RB0 <- Temp[row, 5]
ITD0.8SS100 <- Temp[row, 6]
ITD1.2SS60 <- Temp[row, 7]
ITD1.6SS100 <- Temp[row, 8]
if(!(Stereo == 0 | Stereo == -1)) {
Choice[[counter]] <- 1
} else if (!(Stereo == 1 | Stereo == 0)){
Choice[[counter]] <- 0
} else if (!(Mono == -1 | Mono == 0)) {
Choice[[counter]] <- 1
} else if (!(Mono == 1 | Mono == 0)){
Choice[[counter]] <- 0
}else if (!(MS == -1 | MS == 0)){
Choice[[counter]] <- 1
} else if (!(MS == 1 | MS == 0)){
Choice[[counter]] <- 0
} else if (!(HT1RB1 == -1 | HT1RB1 == 0)){
Choice[[counter]] <- 1
} else if (!(HT1RB1 == 1 | HT1RB1 == 0)){
Choice[[counter]] <- 0
} else if (!(HT0RB0 == -1 | HT0RB0 == 0)){
Choice[[counter]] <- 1
} else if (!(HT0RB0 == 1 | HT0RB0 == 0)){
Choice[[counter]] <- 0
} else if (!(ITD0.8SS100 == -1 | ITD0.8SS100 == 0)){
Choice[[counter]] <- 1
} else if (!(ITD0.8SS100 == 1 | ITD0.8SS100 == 0)){
Choice[[counter]] <- 0
} else if (!(ITD1.2SS60 == -1 | ITD1.2SS60 == 0)){
Choice[[counter]] <- 1
} else if (!(ITD1.2SS60 == 1 | ITD1.2SS60 == 0)){
Choice[[counter]] <- 0
} else if (!(ITD1.6SS100 == -1 | ITD1.6SS100 == 0)){
Choice[[counter]] <- 1
} else if (!(ITD1.6SS100 == 1 | ITD1.6SS100 == 0)){
Choice[[counter]] <- 0
}
}
I would be so grateful if someone was able to help, I have looked at this for way to long now and totally lost perspective.
Sorry that the image quality is so shitty :/
Thank you! :)
Reading what you want for your desired output, I think the approach could be changed.
Here, I loop through the elements of the row and break when I find the first non-zero element, which is added to vec.
Temp <- replicate(8, sample(-1:1, size = 10, replace = TRUE))
colnames(Temp) <- c("Stereo",
"Mono",
"MS",
"HT1RB1",
"HT0RB0",
"ITD0.8SS100",
"ITD1.2SS60",
"ITD1.6SS100")
Temp <- as.data.frame(Temp)
Temp
#> Stereo Mono MS HT1RB1 HT0RB0 ITD0.8SS100 ITD1.2SS60 ITD1.6SS100
#> 1 1 -1 0 1 1 0 -1 -1
#> 2 0 -1 1 -1 0 1 1 -1
#> 3 -1 0 -1 1 0 0 1 0
#> 4 -1 0 1 0 0 -1 -1 -1
#> 5 0 1 1 -1 1 0 -1 1
#> 6 0 -1 0 -1 0 0 0 1
#> 7 -1 0 0 0 -1 1 1 -1
#> 8 1 -1 1 -1 1 -1 -1 -1
#> 9 0 -1 1 0 -1 1 -1 -1
#> 10 0 1 -1 0 -1 -1 1 -1
vec <- rep(0, nrow(Temp))
for (i in 1:nrow(Temp)) {
for(j in 1:ncol(Temp)) {
if (Temp[i,j] > 0) {
vec[i] <- 1
break
}
else if (Temp[i,j] < 0) {
vec[i] <- -1
break
}
}
}
vec
#> [1] 1 -1 -1 -1 1 -1 -1 1 -1 1
I hope I interpreted the desired outcome correctly. Good luck!
I think this should simplify your life a lot.
Instead of a for loop, use mutate from the dplyr package,
and, instead of that long chain of else if, use case_when.
library(dplyr)
Temp %>%
mutate(Choice = case_when(
!(Stereo == 0 | Stereo == -1) ~ 1,
!(Stereo == 1 | Stereo == 0) ~ 0,
!(Mono == -1 | Mono == 0) ~ 1,
!(Mono == 1 | Mono == 0) ~ 0,
!(MS == -1 | MS == 0) ~ 1,
!(MS == 1 | MS == 0) ~ 0,
!(HT1RB1 == -1 | HT1RB1 == 0) ~ 1,
!(HT1RB1 == 1 | HT1RB1 == 0) ~ 0,
!(HT0RB0 == -1 | HT0RB0 == 0) ~ 1,
!(HT0RB0 == 1 | HT0RB0 == 0) ~ 0,
!(ITD0.8SS100 == -1 | ITD0.8SS100 == 0) ~ 1,
!(ITD0.8SS100 == 1 | ITD0.8SS100 == 0) ~ 0,
!(ITD1.2SS60 == -1 | ITD1.2SS60 == 0) ~ 1,
!(ITD1.2SS60 == 1 | ITD1.2SS60 == 0) ~ 0,
!(ITD1.6SS100 == -1 | ITD1.6SS100 == 0) ~ 1,
!(ITD1.6SS100 == 1 | ITD1.6SS100 == 0) ~ 0))
For a good reproducible example, there is no need to share private data or to post a screenshot, just create fake data that look believable.
For example, you can use this:
Temp <- replicate(8, sample(-1:1, size = 10, replace = TRUE))
colnames(Temp) <- c("Stereo",
"Mono",
"MS",
"HT1RB1",
"HT0RB0",
"ITD0.8SS100",
"ITD1.2SS60",
"ITD1.6SS100")
Temp <- as.data.frame(Temp)

Creating multiple new columns within a DF depending on the the order of logical columns

I am trying to create three new columns with values depending on a particular order of three logical type columns.
eg I have this:
a b c
1 TRUE TRUE TRUE
2 TRUE FALSE TRUE
3 TRUE FALSE TRUE
And depending if going across the row the values are TRUE, TRUE, TRUE as in row 1, then create three new columns with the values 1,1,1 but if the order is TRUE,FALSE,TRUE as in row 2 and 3 then the values would be 2,3,3. Just to note, a value of TRUE does not = 1 but rather a value I define depending on all three logical values (A total of 8 possible combinations each defined by three separate numbers). So I get something like this:
a b c d e f
1 TRUE TRUE TRUE 5 5 2
2 TRUE FALSE TRUE 2 3 3
3 TRUE FALSE TRUE 2 3 3
If someone could point me in the right direction to do this as efficiently as possible it would be greatly appreciated as I am relatively new to R.
If there is no logic in getting values for the columns and you need to add conditions individually for each combination you can use if/else.
df[c('d', 'e', 'f')] <- t(apply(df, 1, function(x) {
if (x[1] && x[2] && x[3]) c(5, 5, 2)
else if (x[1] && !x[2] && x[3]) c(2, 3, 3)
#add more conditions
#....
}))
df
# a b c d e f
#1 TRUE TRUE TRUE 5 5 2
#2 TRUE FALSE TRUE 2 3 3
#3 TRUE FALSE TRUE 2 3 3
Here's a dplyr solution using case_when. On the left side of the ~ you define your conditions, and on the right side of the ~ you assign a value for when those conditions are met. If a condition is not met (i.e. all FALSE values), you will return NA.
df %>%
mutate(d =
case_when(
a == TRUE & b == TRUE & c == TRUE ~ 5,
a == TRUE & b == FALSE & c == TRUE ~ 2
),
e =
case_when(
a == TRUE & b == TRUE & c == TRUE ~ 5,
a == TRUE & b == FALSE & c == TRUE ~ 3
),
f =
case_when(
a == TRUE & b == TRUE & c == TRUE ~ 2,
a == TRUE & b == FALSE & c == TRUE ~ 3
))
Which gives you:
a b c d e f
<lgl> <lgl> <lgl> <dbl> <dbl> <dbl>
1 TRUE TRUE TRUE 5 5 2
2 TRUE FALSE TRUE 2 3 3
3 TRUE FALSE TRUE 2 3 3
Data:
df <- tribble(
~a, ~b, ~c,
TRUE, TRUE, TRUE,
TRUE, FALSE, TRUE,
TRUE, FALSE, TRUE
)

Logical operators: AND acting liking OR

I'm having a hard time understand how R is treating the AND and OR operators when I'm using filter from dplyr.
Here's an example to illustrate:
library(dplyr)
xy <- data.frame(x=1:6, y=c("a", "b"), z= c(rep("d",3), rep("g",3)))
> xy
x y z
1 1 a d
2 2 b d
3 3 a d
4 4 b g
5 5 a g
6 6 b g
Using filter I want to eliminate all rows where x==1 and z==d. This would lead me to believe I want to use the AND operator: &
> filter(xy, x != 1 & z != "d")
x y z
1 4 b g
2 5 a g
3 6 b g
But this removes all rows that have either x==1 or z==d. What's more confusing, is that when I use the OR operator, | I get the desired result:
> filter(xy, x != 1 | z != "d")
x y z
1 2 b d
2 3 a d
3 4 b g
4 5 a g
5 6 b g
Also, this does work, however not as desirable for if I were stringing together == and != in the same conditional evaluation.
> filter(xy, !(x == 1 & z == "d"))
x y z
1 2 b d
2 3 a d
3 4 b g
4 5 a g
5 6 b g
Can someone explain what I'm missing?
This is a question of boolean algebra. The logical expression !(x == 1 & z == d) is equivalent to x != 1 | z != d, just the same as -(x + y) is equivalent to -x - y. Eliminating the bracket, you change all == to != and all & to | and vice versa. This leads to the fact that
!(x == 1 & z == "d")
is NOT the same as
x != 1 & z != "d"
but rather
x != 1 | z != "d"
A couple tips that won't fit in a comment:
If you're having trouble understanding how something is working in R, I'd highly recommend running each individual piece of the operation. With dplyr, it's easy to keep track on intermediate steps and display them all:
mutate(xy,
A = x != 1,
B = z != 'd',
A_and_B = A & B,
A_or_B = A | B
)
# x y z A B A_and_B A_or_B
# 1 1 a d FALSE FALSE FALSE FALSE
# 2 2 b d TRUE FALSE FALSE TRUE
# 3 3 a d TRUE FALSE FALSE TRUE
# 4 4 b g TRUE TRUE TRUE TRUE
# 5 5 a g TRUE TRUE TRUE TRUE
# 6 6 b g TRUE TRUE TRUE TRUE
I think that if you look at the definition of each column its values will make perfect sense. Then, after going one step at a time, hopefully the results will make sense too.
As others have stated in various ways, you're setting yourself up for a hard time from the start with
Using filter I want to eliminate all rows where x==1 and z==d
Don't think of filter as eliminating rows, think of it as keeping rows. If you mentally invert your goal to "keep all rows where..." you'll set yourself up for a more direct translation of words to code.
The result of filter is the rows where the specified condition is true.
Take for example x != 1 & z != "d". What are the rows where this condition is true? The output you got. The other rows were removed, because the condition was not true for those rows.
In this example, your real intention was to eliminate rows where x == 1 and z == "d".
In other words, you want to keep the rows where the condition x == 1 and z == "d" is false.
Putting that into code becomes filter(xy, !(x == 1 and z == "d")).
It's ironic that this looks much like your intention, and very different from what you actually tried to write.
If you forget this logic of filter,
you can remind yourself with a simpler experiment, filter(xy, TRUE) which will return all rows, and filter(xy, FALSE) which will return none.
# x != 1 & z != "d" evaluates to a single TRUE/FALSE vector which subsets the data
# note how & and | behave in isolation:
TRUE & TRUE # T AND T = T
## [1] TRUE
TRUE & FALSE # T AND F = F
## [1] FALSE
FALSE & FALSE # F AND F = F
## [1] FALSE
TRUE | TRUE # T OR T = T
## [1] TRUE
TRUE | FALSE # T OR F = T
## [1] TRUE
FALSE | FALSE # F OR F = F
## [1] FALSE
# Apply over vectors
(x1 <- xy$x != 1)
## [1] FALSE TRUE TRUE TRUE TRUE TRUE
(z1 <- xy$z != "d")
## [1] FALSE FALSE FALSE TRUE TRUE TRUE
x1 & z1 # you get last 3 rows
## [1] FALSE FALSE FALSE TRUE TRUE TRUE
x1 | z1 # you get all but 1st row (which contains 1 and d)
## [1] FALSE TRUE TRUE TRUE TRUE TRUE

Column values based on another column

I have some data with this structure:
## Column examples generation
bases <- c("A", "T", "C", "G")
ID <- c(1,2,3,4,5,6)
SNP <- rep (c("F1", "F3", "F4"), each=length(ID))
Al_1 <- sample(bases, length(SNP), replace=T)
Al_2 <- sample(bases, length(SNP), replace=T)
tipo <- rep(c("."),length(SNP))
## Data frame generation:
ArrDat <- as.data.frame(cbind(ID, SNP, Al_1, Al_2, tipo))
ArrDat <- data.frame(lapply(ArrDat, as.character), stringsAsFactors = F)
OrderArr <- ArrDat[order(ArrDat$ID),]
## Column "tipo" values:
for (i in 1:nrow(OrderArr)) {
if (OrderArr$Al_1[i] == "A" & OrderArr$Al_2[i] == "T"){
OrderArr$tipo[i] = "a"
} else if (OrderArr$Al_1[i] == "A" & OrderArr$Al_2[i] == "C"){
OrderArr$tipo[i] = "b"
} else if (OrderArr$Al_1[i] == "A" & OrderArr$Al_2[i] == "G"){
OrderArr$tipo[i] = "c"
} else if (OrderArr$Al_1[i] == "T" & OrderArr$Al_2[i] == "A"){
OrderArr$tipo[i] = "d"
} else if (OrderArr$Al_1[i] == "T" & OrderArr$Al_2[i] == "C"){
OrderArr$tipo[i] = "e"
} else if (OrderArr$Al_1[i] == "T" & OrderArr$Al_2[i] == "G"){
OrderArr$tipo[i] = "f"
} else if (OrderArr$Al_1[i] == "C" & OrderArr$Al_2[i] == "A"){
OrderArr$tipo[i] = "g"
} else if (OrderArr$Al_1[i] == "C" & OrderArr$Al_2[i] == "T"){
OrderArr$tipo[i] = "h"
} else if (OrderArr$Al_1[i] == "C" & OrderArr$Al_2[i] == "G"){
OrderArr$tipo[i] = "i"
} else if (OrderArr$Al_1[i] == "G" & OrderArr$Al_2[i] == "A"){
OrderArr$tipo[i] = "j"
} else if (OrderArr$Al_1[i] == "G" & OrderArr$Al_2[i] == "T"){
OrderArr$tipo[i] = "k"
} else if (OrderArr$Al_1[i] == "G" & OrderArr$Al_2[i] == "C"){
OrderArr$tipo[i] = "l"
} else if (OrderArr$Al_1[i] == "A" & OrderArr$Al_2[i] == "A"){
OrderArr$tipo[i] = "STHG.A"
} else if (OrderArr$Al_1[i] == "T" & OrderArr$Al_2[i] == "T"){
OrderArr$tipo[i] = "STHG.T"
} else if (OrderArr$Al_1[i] == "C" & OrderArr$Al_2[i] == "C"){
OrderArr$tipo[i] = "STHG.C"
} else if (OrderArr$Al_1[i] == "G" & OrderArr$Al_2[i] == "G"){
OrderArr$tipo[i] = "STHG.G"
} else {OrderArr$tipo[i] = "x"}
}
Here is an example of the data:
ID SNP Al_1 Al_2 tipo
1 1 F1 T A d
7 1 F3 C A g
13 1 F4 G C l
2 2 F1 T T STHG.T
8 2 F3 C C STHG.C
14 2 F4 C C STHG.C
My problem are the OrderArr$tipo values for these Al_1-Al_2 combinations: A-A, T-T, C-C or G-G.
These combinations may have an OrderArr$tipo value equal to other rows' with the same OrderArr$SNP value, so the data I put before should be:
ID SNP Al_1 Al_2 tipo
1 1 F1 T A d
7 1 F3 C A g
13 1 F4 G C l
2 2 F1 T T d
8 2 F3 C C g
14 2 F4 C C l
How can I implement this at the code?
Thanks a lot.
I've created a data frame where each SNP has only one combination of Al_1 and Al_2.
ID SNP Al_1 Al_2 combo tipo
1 1 F1 A T AT a
2 1 F4 G G GG z
3 1 D2 C T CT h
4 1 D4 T C TC e
5 1 HY7 A A AA z
6 1 HY66 T G TG f
7 1 XZD1 C A CA g
8 1 XZD33 G A GA j
9 2 F1 A A AA z
10 2 F4 C G CG i
11 2 D2 C C CC z
12 2 D4 T C TC e
13 2 HY7 A A AA z
14 2 HY66 G G GG z
15 2 XZD1 C A CA g
16 2 XZD33 G A GA j
17 3 F1 T T TT z
18 3 F4 C C CC z
19 3 D2 C T CT h
20 3 D4 T C TC e
21 3 HY7 A C AC b
22 3 HY66 G G GG z
23 3 XZD1 A A AA z
24 3 XZD33 A A AA z
25 4 F1 A T AT a
26 4 F4 C G CG i
27 4 D2 C T CT h
28 4 D4 T T TT z
29 4 HY7 C C CC z
30 4 HY66 T T TT z
31 4 XZD1 C A CA g
32 4 XZD33 A A AA z
33 5 F1 T T TT z
34 5 F4 C G CG i
35 5 D2 T T TT z
36 5 D4 T T TT z
37 5 HY7 A A AA z
38 5 HY66 T G TG f
39 5 XZD1 A A AA z
40 5 XZD33 G G GG z
41 6 F1 A T AT a
42 6 F4 G G GG z
43 6 D2 T T TT z
44 6 D4 C C CC z
45 6 HY7 C C CC z
46 6 HY66 T T TT z
47 6 XZD1 C C CC z
48 6 XZD33 G A GA j
And I think I've one answer for your question.
data$combo <- paste0(data$Al_1, data$Al_2)
snp <- unique(data$SNP)
for (i in 1:nrow(data)){
if(data$Al_1[i] == data$Al_2[i]) data$tipo[i] ='z'
else if (data$Al_1[i] == 'A') {
if (data$Al_2[i] == 'T') data$tipo[i] = 'a'
else if (data$Al_2[i] == 'C') data$tipo[i] = 'b'
else if (data$Al_2[i] == 'G') data$tipo[i] = 'c'
}
else if (data$Al_1[i] == 'T') {
if (data$Al_2[i] == 'A') data$tipo[i] = 'd'
else if (data$Al_2[i] == 'C') data$tipo[i] = 'e'
else if (data$Al_2[i] == 'G') data$tipo[i] = 'f'
}
else if (data$Al_1[i] == 'C') {
if (data$Al_2[i] == 'A') data$tipo[i] = 'g'
else if (data$Al_2[i] == 'T') data$tipo[i] = 'h'
else if (data$Al_2[i] == 'G') data$tipo[i] = 'i'
}
else if (data$Al_1[i] == 'G') {
if (data$Al_2[i] == 'A') data$tipo[i] = 'j'
else if (data$Al_2[i] == 'T') data$tipo[i] = 'k'
else if (data$Al_2[i] == 'C') data$tipo[i] = 'l'
}
}
ord.data <- data
ord.data2 <- data.frame()
for (j in 1:length(snp)){
temp <- ord.data[as.numeric(as.factor(ord.data$SNP)) == j, ]
for (h in 1:nrow(temp)){
if (temp$tipo[h] == 'z') {
if (temp$Al_1[h] == 'A') {
tempi <- subset(temp, tipo != 'z')
letra <- unique(tempi$tipo)
temp$tipo[h] = letra
}
else if (temp$Al_1[h] == 'T') {
tempi <- subset(temp, tipo != 'z')
letra <- unique(tempi$tipo)
temp$tipo[h] = letra
}
else if (temp$Al_1[h] == 'C') {
tempi <- subset(temp, tipo != 'z')
letra <- unique(tempi$tipo)
temp$tipo[h] = letra
}
else if (temp$Al_1[h] == 'G') {
tempi <- subset(temp, tipo != 'z')
letra <- unique(tempi$tipo)
temp$tipo[h] = letra
}
}
}
ord.data2 <- rbind(ord.data2, temp)
}

Resources