How to change a particular string index - python-3.6

a=(["id': 'tl_00'}"], ["index': '9',"], ["resp': '1110000000001111',"], ["fors': '1110000000001111'}"])
I want to create a new list so that all the } are removed
a=a=(["id': 'tl_00'}"], ["index': '9',"], ["resp': '1110000000001111',"], ["fors': '1110000000001111'}"])
b=""
for i in range(len(a)):
for j in range(len(a[i])):
for k in range(len(a[i][j])):
b+=a[i][j][k]
if a[i][j][k]=="}":
b[i][j][k]+=""
The error is list index is out of range. Is there a better way to do this?

Use str.strip()
Ex:
a=(["id': 'tl_00'}"], ["index': '9',"], ["resp': '1110000000001111',"], ["fors': '1110000000001111'}"])
b=[[k.strip("}") for k in sublist] for sublist in a]
print(b)
Output:
[["id': 'tl_00'"],
["index': '9',"],
["resp': '1110000000001111',"],
["fors': '1110000000001111'"]]

Thought I'd add an alternative that is closer to your original code:
a=(["id': 'tl_00'}"], ["index': '9',"], ["resp': '1110000000001111',"], ["fors': '1110000000001111'}"])
b=[]
for i in range(len(a)):
b+=[[]]
for j in range(len(a[i])):
b[i]+=[""]
for k in range(len(a[i][j])):
if a[i][j][k]!="}":
b[i][j] += a[i][j][k]
print(b)

Related

Julia nested loop

I am trying to find the duplicate value which occours the first in a list or array. I created the below code which worked in Python and trying to replicate it in Julia, but not getting the desired result. Can you please help?
def firstDuplicateValue(array):
# Write your code here.
index = float('inf')
for x, i in enumerate(array):
for j, k in enumerate(array):
if i == k and x != j and j > x:
if j < index:
index = j
if index == float('inf'):
return -1
else:
return array[index]
I have created the same code in Julia as below but I am getting the desired result -
function firstDuplicateValue(array)
index = Inf
for (ind_1, value_1) in enumerate(array)
for (ind_2, value_2) in enumerate(array)
if value_1 == value_2 && ind_2 > ind_1
if ind_2 < index
index = ind_2
end
end
end
end
if index == Inf
return -1
else return array[index]
end
end
I know there are other optimum ways of doing it, but this is just to learn basics of Julia.
added from an answer by the OP: It worked for me, I was providing the wrong input to the function, it was a typo.
It worked for me, I was providing the wrong input to the function, it was a typo.

Julia: type-stability with DataFrames

How can I access the columns of a DataFrame in a type-stable way?
Let's assume I have the following data:
df = DataFrame(x = fill(1.0, 1000000), y = fill(1, 1000000), z = fill("1", 1000000))
And now I want to do some recursive computation (so I cannot use transform)
function foo!(df::DataFrame)
for i in 1:nrow(df)
if (i > 1) df.x[i] += df.x[i-1] end
end
end
This has terrible performance:
julia> #time foo!(df)
0.144921 seconds (6.00 M allocations: 91.529 MiB)
A quick fix in this simplified example would be the following:
function bar!(df::DataFrame)
x::Vector{Float64} = df.x
for i in length(x)
if (i > 1) x[i] += x[i-1] end
end
end
julia> #time bar!(df)
0.000004 seconds
However, I'm looking for a solution that is generalisable, eg when the recursive computation is just specified as a function
function foo2!(df::DataFrame, fn::Function)
for i in 1:nrow(df)
if (i > 1) fn(df, i) end
end
end
function my_fn(df::DataFrame, i::Int64)
x::Vector{Float64} = df.x
x[i] += x[i-1]
end
While this (almost) doesn't allocate, it is still very slow.
julia> #time foo2!(df, my_fn)
0.050465 seconds (1 allocation: 16 bytes)
Is there an approach that is performant and allows this kind of flexibility / generalisability?
EDIT: I should also mention that in practice it is not known a priori on which columns the function fn depends on. Ie I'm looking for an approach that allows performant access to / updating of arbitrary columns inside fn. The needed columns could be specified together with fn as a Vector{Symbol} for example if necessary.
EDIT 2: I tried using barrier functions as follows, but it's not performant
function foo3!(df::DataFrame, fn::Function, colnames::Vector{Symbol})
cols = map(cname -> df[!,cname], colnames)
for i in 1:nrow(df)
if (i > 1) fn(cols..., i) end
end
end
function my_fn1(x::Vector{Float64}, i::Int64)
x[i] += x[i-1]
end
function my_fn2(x::Vector{Float64}, y::Vector{Int64}, i::Int64)
x[i] += x[i-1] * y[i-1]
end
#time foo3!(df, my_fn1, [:x])
#time foo3!(df, my_fn2, [:x, :y])
This issue is intended (to avoid excessive compilation for wide data frames) and the ways how to handle it are explained in https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/11_performance.ipynb.
In general you should reduce the number of times you index into a data frame. So in this case do:
julia> function foo3!(x::AbstractVector, fn::Function)
for i in 2:length(x)
fn(x, i)
end
end
foo3! (generic function with 1 method)
julia> function my_fn(x::AbstractVector, i::Int64)
x[i] += x[i-1]
end
my_fn (generic function with 1 method)
julia> #time foo3!(df.x, my_fn)
0.010746 seconds (16.60 k allocations: 926.036 KiB)
julia> #time foo3!(df.x, my_fn)
0.002301 seconds
(I am using the version where you want to have a custom function passed)
My current approach involves wrapping the DataFrame in a struct and overloading getindex / setindex!. Some additional trickery using generated functions is needed to get the ability to access columns by name. While this is performant, it is also a quite hacky, and I was hoping there was a more elegant solution using only DataFrames.
For simplicity this assumes all (relevant) columns are of Float64 type.
struct DataFrameWrapper{colnames}
cols::Vector{Vector{Float64}}
end
function df_to_vectors(df::AbstractDataFrame, colnames::Vector{Symbol})::Vector{Vector{Float64}}
res = Vector{Vector{Float64}}(undef, length(colnames))
for i in 1:length(colnames)
res[i] = df[!,colnames[i]]
end
res
end
function DataFrameWrapper{colnames}(df::AbstractDataFrame) where colnames
DataFrameWrapper{colnames}(df_to_vectors(df, collect(colnames)))
end
get_colnames(::Type{DataFrameWrapper{colnames}}) where colnames = colnames
#generated function get_col_index(x::DataFrameWrapper, ::Val{col})::Int64 where col
id = findfirst(y -> y == col, get_colnames(x))
:($id)
end
Base.#propagate_inbounds Base.getindex(x::DataFrameWrapper, col::Val)::Vector{Float64} = x.cols[get_col_index(x, col)]
Base.#propagate_inbounds Base.getindex(x::DataFrameWrapper, col::Symbol)::Vector{Float64} = getindex(x, Val(col))
Base.#propagate_inbounds Base.setindex!(x::DataFrameWrapper, value::Float64, row::Int64, col::Val) = setindex!(x.cols[get_col_index(x, col)], value, row)
Base.#propagate_inbounds Base.setindex!(x::DataFrameWrapper, value::Float64, row::Int64, col::Symbol) = setindex!(x, value, row, Val(col))

translate correctly `for(int i = 1; i <= size; i++)` in R (case `size < 1`)

I used to write for loops in R like
for(i in 1:size) { ... }
and I thought that it was equivalent of
for(int i = 1; i <= size; i++) { ... }
but I just realised that if size == 0 R enters the loop anyway because 1:size = [1 0] and so my code behaves totally wrong.
So, what is the real standard way of translating the C for loop I wrote above, such that if size < 1 the program does not enter the loop?
Assuming that size is some size of your object, e.g. length(vec) or nrow(df), you can use seq_along like below:
x = NULL
for (i in seq_along(x)) print(x[i])
If size is originally the length of a vector, the standard way is
for (i in seq_along(myvector)) {...}
which will behave correctly if myvector has length zero. So you could even do
for (i in seq_along(numeric(size))) {...}
but that would be certainly not be considered "standard".
As noted in the comments (and in another answer) the better way is
for (i in seq_len(size)) {...}
You can use base::seq_len:
foo <- 2
bar <- 0
for (i in seq_len(foo)) {print(i)}
[1] 1
[1] 2
# No output
for (i in seq_len(bar)) {print(i)}

R - replacing strings using gsub()

I have a lot of unclean data in the form:
abc
abc/def
abc/de
abc/d
abc/def/i j k
abc/def/i
abc/def/i j
This is just the part of the data I would like to change. This is part of much bigger set of data.
I would like to change all the elements to abc/def/i j k.
I have used the gsub() function as follows:
gsub('abc[a-z/]', 'abc/def/i j k', str)
output :
abc/def/i j k
abc/def/i j k/def
abc/def/i j k/de
abc/def/i j k/d
The problem being that it replaces any occurrence of the pattern.
The only solution where i got decent enough results are where i hard code all the possible options like this:
gsub('abc$|abc/d$|abc/de$|abc/def/i$', 'abc/def/i j k', str)
However, this would not work if there is a variation in any new data.
So I was wondering if it was possible to get the result without hard coding the parameters.
You may use
x <- c("abc", "abc/def","abc/de","abc/d","abc/def/i j k","abc/def/i","abc/def/i j")
sub("^(abc)(?:/[^/]*)?", "\\1/def", x)
## => [1] "abc/def" "abc/def" "abc/def" "abc/def"
## [5] "abc/def/i j k" "abc/def/i" "abc/def/i j"
See R demo
Details:
^ - start of string
(abc) - Group 1: abc
(?:/[^/]*)? - an optional group matching a sequence of:
/ - a /
[^/]* - 0+ chars other than /

Why do i get this error - MATLAB

I have the image and the vector
a = imread('Lena.tiff');
v = [0,2,5,8,10,12,15,20,25];
and this M-file
function y = Funks(I, gama, c)
[m n] = size(I);
for i=1:m
for j=1:n
J(i, j) = (I(i, j) ^ gama) * c;
end
end
y = J;
imshow(y);
when I'm trying to do this:
f = Funks(a,v,2)
I am getting this error:
??? Error using ==> mpower
Integers can only be combined with integers of the same class, or scalar doubles.
Error in ==> Funks at 5
J(i, j) = (I(i, j) ^ gama) * c;
Can anybody help me, with this please?
The error is caused because you're trying to raise a number to a vector power. Translated (i.e. replacing formal arguments with actual arguments in the function call), it would be something like:
J(i, j) = (a(i, j) ^ [0,2,5,8,10,12,15,20,25]) * 2
Element-wise power .^ won't work either, because you'll try to "stuck" a vector into a scalar container.
Later edit: If you want to apply each gamma to your image, maybe this loop is more intuitive (though not the most efficient):
a = imread('Lena.tiff'); % Pics or GTFO
v = [0,2,5,8,10,12,15,20,25]; % Gamma (ar)ray -- this will burn any picture
f = cell(1, numel(v)); % Prepare container for your results
for k=1:numel(v)
f{k} = Funks(a, v(k), 2); % Save result from your function
end;
% (Afterwards you use cell array f for further processing)
Or you may take a look at the other (more efficient if maybe not clearer) solutions posted here.
Later(er?) edit: If your tiff file is CYMK, then the result of imread is a MxNx4 color matrix, which must be handled differently than usual (because it 3-dimensional).
There are two ways I would follow:
1) arrayfun
results = arrayfun(#(i) I(:).^gama(i)*c,1:numel(gama),'UniformOutput',false);
J = cellfun(#(x) reshape(x,size(I)),results,'UniformOutput',false);
2) bsxfun
results = bsxfun(#power,I(:),gama)*c;
results = num2cell(results,1);
J = cellfun(#(x) reshape(x,size(I)),results,'UniformOutput',false);
What you're trying to do makes no sense mathematically. You're trying to assign a vector to a number. Your problem is not the MATLAB programming, it's in the definition of what you're trying to do.
If you're trying to produce several images J, each of which corresponds to a certain gamma applied to the image, you should do it as follows:
function J = Funks(I, gama, c)
[m n] = size(I);
% get the number of images to produce
k = length(gama);
% Pre-allocate the output
J = zeros(m,n,k);
for i=1:m
for j=1:n
J(i, j, :) = (I(i, j) .^ gama) * c;
end
end
In the end you will get images J(:,:,1), J(:,:,2), etc.
If this is not what you want to do, then figure out your equations first.

Resources