Manupulate the full Row of a tiddle with 2 loops - r

first, this is a homework question.
It's easy to manipulate the full row with:
testMan[2,] = apply(testMan[2,], 2, function(x) 100)
But we have to do this in a loop and it must be a function with a parameter.
manipulateRow = function(rowNumber){
i = 1;
for(row in testMan){
#print(i)
if(i == rowNumber){
for(price in row){
price = 100
}
break;
}
i = i + 1;
}
}
test = manipulateRow(2);
The goal is to replace the full 2nd line with the value 100.
There are more than 600 columns, so we have to do it with a loop.
We are working with tibble and tiddyverse.

Related

R - nested loop alternatives/optimization

I'm currently trying to implement an algorithm in R that requires to loop through the rows and columns of a matrix and that for every cell it computes a value based on the value of previously computed cells.
Here is the code that does what I said above, it is a part of the Needleman Wunsch algorithm:
globalSequenceAlignment <- function(seq1, seq2, match, mismatch, gap) {
# splitting the sequences in order to use them as rows and columns names
seq1_split <- unlist(strsplit(toString(seq1), ""))
seq2_split <- unlist(strsplit(toString(seq2), ""))
len1 <- length(seq1_split)
len2 <- length(seq2_split)
# creating the alignment matrix
alignment_matrix <- matrix(0, nrow = len2+1, ncol = len1+1)
colnames(alignment_matrix) <- c("-", seq1_split)
rownames(alignment_matrix) <- c("-", seq2_split)
# filling first row and column of the alignment matrix
for (i in 2:ncol(alignment_matrix)) {
alignment_matrix[1,i] <- (alignment_matrix[1,i]+(i-1))*(gap)
}
for (j in 2:nrow(alignment_matrix)) {
alignment_matrix[j,1] <- (alignment_matrix[j,1]+(j-1))*(gap)
}
for (i in 2:ncol(alignment_matrix)) {
for (j in 2:nrow(alignment_matrix)) {
horizontal_score <- alignment_matrix[j,i-1] + gap
vertical_score <- alignment_matrix[j-1,i] + gap
if (colnames(alignment_matrix)[i] == rownames(alignment_matrix)[j]) {
diagonal_score <- alignment_matrix[j-1,i-1] + match
} else {
diagonal_score <- alignment_matrix[j-1,i-1] + mismatch
}
scores <- c(horizontal_score, vertical_score, diagonal_score)
alignment_matrix[j,i] <- max(scores)
}
}
return(alignment_matrix)
}
a <- 'GAATC'
b <- 'CATACG'
globalSequenceAlignment(a, b, 10,-5,-4)
Using this code I get the result that I want.
The problem is that with matrices with dimensions grater than 500x500 the nested loops become way too slow (running this code with a 500x500 matrix takes more or less 2 minutes).
I know that *apply functions could improve this but I couldn't achieve to use them since for computing each cell it requires that the previous ones have been computed yet.
I was wondering if there is a way to achieve the same result using *apply functions or a way to vectorize this type of code so that it's more rapid in R.
If someone would ever need this I wrote my own solution to this problem using the package Rcpp. The runtime, from about 3 minutes for sequences of 500 characters, is now about 0.3s.
I post here the code for the part of the two nested loops that you can see in the text of the question, hope that will be useful for someone.
library(Rcpp)
rcppFunction('IntegerMatrix rcpp_compute_matrices(IntegerMatrix Am, StringMatrix Dm,
StringVector seq1, StringVector seq2,
int gap, int miss, int match) {
int nrow = Am.nrow(), ncol = Am.ncol();
for (int i = 1; i < nrow; i++) {
for (int j = 1; j < ncol; j++) {
int vertical_score = Am(i-1, j) + gap;
int horizontal_score = Am(i, j-1) + gap;
int diagonal_score = 0;
if (seq1[j-1] == seq2[i-1]) {
diagonal_score = Am(i-1, j-1) + match;
}
else {
diagonal_score = Am(i-1, j-1) + miss;
}
IntegerVector score = {vertical_score, horizontal_score, diagonal_score};
int max_score = max(score);
Am(i, j) = max_score;
}
}
return Am;
}')

Double sampling method in R

My initial code for double sampling is the following. I did only one sample.
# Data
samples<-matrix(NA,nrow = 12, ncol = 2000)
for (i in 1:12) {
samples[i,]<- rbinom(2000,1,prob = 0.05)
}
# Double Sampling Plan
accept<-rep(0,12)
for (i in 1:12) {
if (sum(samples[i,1:80])<=5){
accept[i]<-1
} else if (sum(samples[i,1:80]<=8) & sum(samples[i,1:80]>5) ) {
if (sum(samples[i,1:160])<=12) {
accept[i]<-1
}
}
}
sum(accept)
Since I generated randomly from Bernoulli, every time you run the code, the results will not be the same.
I want 100 repetitions of this double sample.
My solution:
nm=double(100)
for (j in 1:100){
# Data
samples<-matrix(NA,nrow = 12, ncol = 2000)
for (i in 1:12) {
samples[i,]<- rbinom(2000,1,prob = 0.05)
}
# Double Sampling Plan
accept<-rep(0,12)
for (i in 1:12) {
if (sum(samples[i,1:80])<=5){
accept[i]<-1
} else if (sum(samples[i,1:80]<=8) & sum(samples[i,1:80]>5) ) {
if (sum(samples[i,1:160])<=12) {
accept[i]<-1
}
}
}
nm[j]=sum(accept)
}
mean(nm)
What do you think?
If we follow the proposition of #Onyambu, we can embeded one simulation inside a function and call it in a loop like this :
one_double_sampling <- function(){
# Data
samples<-matrix(NA,nrow = 12, ncol = 2000)
for (i in 1:12) {
samples[i,]<- rbinom(2000,1,prob = 0.05)
}
# Double Sampling Plan
accept<-rep(0,12)
for (i in 1:12) {
if (sum(samples[i, 1:80])<=5){
accept[i]<-1
} else if (sum(samples[i,1:80]<=8) & sum(samples[i,1:80]>5) ) {
if (sum(samples[i,1:160])<=12) {
accept[i]<-1
}
}
}
return(sum(accept))
}
set.seed(123)
# number of sample
n <- 100
# stock the result
res <- rep(0, n)
for(i in 1:n){
res[i] <- one_double_sampling()
}
# mean
mean(res)
Definitly your code is correct. For people interresting by the double sampling method I advise you to see this.
Edit 1
In one line code based on Onyambu advise :
mean(replicate(n, one_double_sampling()))

Loop for cricket code is not successful

I need to calculate the number of 4's and 6's a batsman has scored in a given cricket database.
The following is my code:
runs_batsman = ballbyball[,c("Batter","RunsBatsman")]
fours = data.frame(bat_name)
numoffours = function(name)
{counter = 0
for(i in nrow(ballbyball))
{
if((identical(toString(name),toString(runs_batsman[i,1]))))
{
if(runs_batsman[i,2]== 4)
{
counter = (counter + 1)
}
}
}
return(counter)
}
summary = function(dataset){
fours[,"numfours"]=0
for (j in nrow(bat_name)){
fours[j,2] = lapply(bat_name[j,1], numoffours)
}
return(fours)
}
I am not getting any values for any of the batsmen. bat_name is the data frame with all batsmen names. Please help me.
I am very new to R so please explain in much detail as possible.

finding similar elements within two arrays

Is there a faster way to do this? N^2 time just seems terrible.
mergeData<-function(p,c) {
for(i in 1:length(p[[1]])) {
for(k in 1:length(c[[1]])) {
if(toString(c[[k,46]]) == toString(p[[i,1]])) {
#Do stuff here with pairs found
print(i)
}
}
}
}
row1 = c[[,46]]
row2 = p[[,1]]
x = data.frame(row = row1, nr1 = c(1:len(row1)))
y = data.frame(row = row2, nr2 = c(1:len(row2)))
same_pairs = merge(x, y)[c("nr1", "nr2")]
In same_pairs you have now indeces of a rows with the same elements.
Complexity : O(len(row1) + len(row2))

How to modify drawdown functions in PerformanceAnalytics package for value

I am calculating the average drawdown, average length, recovery length, etc. in R for a PnL data series rather than return data. This is data frame like this
PNL
2008-11-03 3941434
2008-11-04 4494446
2008-11-05 2829608
2008-11-06 2272070
2008-11-07 -2734941
2008-11-10 -2513580
I used the maxDrawDown function from fTrading package and it worked. How could I get the other drawdown functions? If I directly run AverageDrawdown(quantbook) function, it will give out error message like this
Error in if (thisSign == priorSign) { : missing value where TRUE/FALSE needed
I checked the documentation for AverageDrawdown and it is as below:
findDrawdowns(R, geometric = TRUE, ...)
R an xts, vector, matrix, data frame, timeSeries or zoo object of asset returns
My quantbook is a data frame but doesn't work for this function.
Or do you have anything other packages to get the same funciton, please advise.
I've modified the package's functions. Here is one solution in PnL case (or any other case you want to get the value rather than the return) and hope you find it useful. The parameter x is a dataframe and the row.names for x are dates so you don't bother to convert amongst different data types (which I actually suffer a lot). With the function findPnLDrawdown, you could perform a lot other functions to calculate averageDrawDown, averageLength, recovery, etc.
PnLDrawdown <- function(x) {
ts = as.vector(x[,1])
cumsum = cumsum(c(0, ts))
cmaxx = cumsum - cummax(cumsum)
cmaxx = cmaxx[-1]
cmaxx = as.matrix(cmaxx)
row.names(cmaxx) = row.names(x)
cmaxx = timeSeries(cmaxx)
cmaxx
}
findPnLDrawdown <- function(R) {
drawdowns = PnLDrawdown(R)
draw = c()
begin = c()
end = c()
length = c(0)
trough = c(0)
index = 1
if (drawdowns[1] >= 0) {
priorSign = 1
} else {
priorSign = 0
}
from = 1
sofar = as.numeric(drawdowns[1])
to = 1
dmin = 1
for (i in 1:length(drawdowns)) {
thisSign =ifelse(drawdowns[i] < 0, 0, 1)
if (thisSign == priorSign) {
if (as.numeric(drawdowns[i]) < as.numeric(sofar)) {
sofar = drawdowns[i]
dmin = i
}
to = i+ 1
}
else {
draw[index] = sofar
begin[index] = from
trough[index] = dmin
end[index] = to
from = i
sofar = drawdowns[i]
to = i + 1
dmin = i
index = index + 1
priorSign = thisSign
}
}
draw[index] = sofar
begin[index] = from
trough[index] = dmin
end[index] = to
list(pnl = draw, from = begin, trough = trough, to = end,
length = (end - begin + 1),
peaktotrough = (trough - begin + 1),
recovery = (end - trough))
}

Resources