Preallocating a dict of dicts - julia

When I run #code_warntype on the following function (Shown in bold are the expressions that are likely raising the red flags.)
function cardata(df::DataFrame,Emission_val::Float64,search_cars::Dict{String,Tuple{Int64,Int64}}=Dict("Car1" => (1000,10000), "Car2" => (1000,50000), "Car3" => (1000,6000)),
all_cars::Array{String,1}=["Car1","Car2","Car3","Car4","Car5","Car6"])
**species = Dict()**
# The data file containing car information of interest
car_library = joinpath(path,"cars.csv")
df_car_data=CSV.read(car_library,header=["Model","Velocity","Emission_Value","Mass","Column6"],delim='\t')
#delete unused column
deletecols!(df_car_data, :Column6)
#create a new column with only the car Identifier name
df_car_data[:Identifier_car]=[split(i,r"[0-9]+")[1] for i in df_car_data[:Model]]
#get the properties of all_cars from the cars_data table
for search_models in all_cars
**cars[search_models] = Dict()**
for i in 1:1:length(df_cars_data[1])
num = split(df_cars_data[:Model][i],r"[0-9]+")[1]
alpha = split(df_cars_data[:Model][i],r"[a-zA-Z]+")[2]
if ( num == search_models )
species[num][alpha] = df_car_data[:Velocity][i]
end
end
end
end
I get the following warning highlighted in red:
Body::Tuple{Dict{Any,Any},Union{DataFrame,DataFrameRow{DataFrame,Index}},Any,Any}.
How to preallocate the types for dicts in such a case, assuming that I know the length of data that will populate the dict?

You have not provided a minimal working example.
Have a look at the code below. Note that for efficiency reasons
it is recommended to use Symbol a the key rather than String
species = Dict{Symbol,Dict{Symbol,Float64}}()
group = get!(()->Dict{Symbol,Float64}(),species,Symbol("audi"))
group[Symbol("a4")]=10.5
group[Symbol("a6")]=9.5
And now printing the output:
julia> println(species)
Dict(:audi=>Dict(:a6=>9.5,:a4=>10.5))

Related

How to use the "6" char code in overloading insertion function on Scilab?

In the Help Documentation of Scilab 6.0.2, I can read the following instruction on the Overloading entry, regarding the last operation code "iext" showed in this entry's table:
"The 6 char code may be used for some complex insertion algorithm like x.b(2) = 33 where b field is not defined in the structure x. The insertion is automatically decomposed into temp = x.b; temp(2) = 33; x.b = temp. The 6 char code is used for the first step of this algorithm. The 6 overloading function is very similar to the e's one."
But I can't find a complete example on how to use this "char 6 code" to overload a function. I'm trying to use it, without success. Does anyone have an example on how to do this?
The code bellow creates a normal "mlist" as a example. Which needs overloading functions
A = rand(5,3)
names = ["colA" "colB" "colC"]
units = ["ft" "in" "lb"]
M = mlist(["Mlog" "names" "units" names],names,units,A(:,1),A(:,2),A(:,3))
Following are the overload functions:
//define display
function %Mlog_p(M)
n = size(M.names,"*")
formatStr = strcat(repmat("%10s ",1,n)) + "\n"
formatNum = strcat(repmat("%0.10f ",1,n)) + "\n"
mprintf(formatStr,M.names)
mprintf(formatStr,M.units)
disp([M(M.names(1)),M(M.names(2)),M(M.names(3))])
end
//define extraction operation
function [Mat]=%Mlog_e(varargin)
M = varargin($)
cols = [1:size(M.names,"*")] // This will also work
cols = cols(varargin($-1)) // when varargin($-1) = 1:1:$
Mat = []
if length(varargin)==3 then
for i = M.names(cols)
Mat = [Mat M(i)(varargin(1))]
end
else
for i=1:size(M.names(cols),"*")
Mat(i).name = M.names(cols(i))
Mat(i).unit = M.units(cols(i))
Mat(i).data = M(:,cols(i))
end
end
endfunction
//define insertion operations (a regular matrix into a Mlog matrix)
function ML=%s_i_Mlog(i,j,V,M)
names = M.names
units = M.units
A = M(:,:) // uses function above
A(i,j) = V
ML = mlist(["Mlog" "names" "units" names],names,units,A(:,1),A(:,2),A(:,3))
endfunction
//insertion operation with structures (the subject of the question)
function temp = %Mlog_6(j,M)
temp = M(j) // uses function %Mlog_e
endfunction
function M = %st_i_Mlog(j,st,M)
A = M(:,:) // uses function %Mlog_e
M.names(j) = st.name // uses function above
M.units(j) = st.unit // uses function above
A(:,j) = st.data // uses function above
names = M.names
units = M.units
M = mlist(["Mlog" "names" "units" names],names,units,A(:,1),A(:,2),A(:,3))
endfunction
The first overload (displays mlist) will show the matrix in the form of the following table:
--> M
M =
colA colB colC
ft in lb
0.4720517 0.6719395 0.5628382
0.0623731 0.1360619 0.5531093
0.0854401 0.2119744 0.0768984
0.0134564 0.4015942 0.5360758
0.3543002 0.4036219 0.0900212
The next overloads (extraction and insertion) Will allow the table to be access as a simple matrix M(i,j).
The extraction function Will also allow M to be access by column, which returns a structure, for instance:
--> M(2)
ans =
name: "colB"
unit: "in"
data: [5x1 constant]
The last two functions are the overloads mentioned in the question. They allow the column metadata to be changed in a structure form.
--> M(2).name = "length"
M =
colA length colC
ft in lb
0.4720517 0.6719395 0.5628382
0.0623731 0.1360619 0.5531093
0.0854401 0.2119744 0.0768984
0.0134564 0.4015942 0.5360758
0.3543002 0.4036219 0.0900212

Julia - Iterating over combinations of keys in a dictionary

Is there a nifty way to iterate over combinations of keys in a dictionary?
my dictionary has values like:
[1] => [1,2], [2,3] => [15], [3] => [6,7,8], [4,9,11] => [3], ...
what I need to do is fetch all combinations of keys that are of length 1:n where n might be fx 3
So as in the example above, I would want to iterate over
[[1], [3], [2,3], [[1],[1,2]], [[3],[2,3]], [4,9,11]]
I know I could just collect the keys, but my dictionary is rather large and I am in the middle of redesigning the entire algorithm because it starts swapping insanely when n > 3, reducing efficiency terribly
tl;dr is there a way to create a combinatoric iterator from a dictionary without collect-ing the dictionary?
The following is a straight forward implementation, which tries to minimize a bit on going through the dictionary. Additionally it uses OrderedDict so holding key indices makes sense (since Dicts don't promise consistent key iteration each time and thus meaningful key indexing).
using Iterators
using DataStructures
od = OrderedDict([1] => [1,2], [2,3] => [15], [3] => [6,7,8], [4,9,11] => [3])
sv = map(length,keys(od)) # store length of keys for quicker calculations
maxmaxlen = sum(sv) # maximum total elements in good key
for maxlen=1:maxmaxlen # replace maxmaxlen with lower value if too slow
#show maxlen
gsets = Vector{Vector{Int}}() # hold good sets of key _indices_
for curlen=1:maxlen
foreach(x->push!(gsets,x),
(x for x in subsets(collect(1:n),curlen) if sum(sv[x])==maxlen))
end
# indmatrix is necessary to run through keys once in next loop
indmatrix = zeros(Bool,length(od),length(gsets))
for i=1:length(gsets) for e in gsets[i]
indmatrix[e,i] = true
end
end
# gkeys is the vector of vecotrs of keys i.e. what we wanted to calculate
gkeys = [Vector{Vector{Int}}() for i=1:length(gsets)]
for (i,k) in enumerate(keys(od))
for j=1:length(gsets)
if indmatrix[i,j]
push!(gkeys[j],k)
end
end
end
# do something with each set of good keys
foreach(x->println(x),gkeys)
end
Is this more efficient that what you currently have? It would also be better to put the code in a function or turn it into a Julia task which produces the next keys set each iteration.
--- UPDATE ---
Using the answer about iterators from tasks in https://stackoverflow.com/a/41074729/3580870
An improved iterator-ified version is:
function keysubsets(n,d)
Task() do
od = OrderedDict(d)
sv = map(length,keys(od)) # store length of keys for quicker calculations
maxmaxlen = sum(sv) # maximum total elements in good key
for maxlen=1:min(n,maxmaxlen) # replace maxmaxlen with lower value if too slow
gsets = Vector{Vector{Int}}() # hold good sets of key _indices_
for curlen=1:maxlen
foreach(x->push!(gsets,x),(x for x in subsets(collect(1:n),curlen) if sum(sv[x])==maxlen))
end
# indmatrix is necessary to run through keys once in next loop
indmatrix = zeros(Bool,length(od),length(gsets))
for i=1:length(gsets) for e in gsets[i]
indmatrix[e,i] = true
end
end
# gkeys is the vector of vecotrs of keys i.e. what we wanted to calculate
gkeys = [Vector{Vector{Int}}() for i=1:length(gsets)]
for (i,k) in enumerate(keys(od))
for j=1:length(gsets)
if indmatrix[i,j]
push!(gkeys[j],k)
end
end
end
# do something with each set of good keys
foreach(x->produce(x),gkeys)
end
end
end
Which now enables iterating over all keysubsets up to combined size 4 in this way (after running the code from the other StackOverflow answer):
julia> nt2 = NewTask(keysubsets(4,od))
julia> collect(nt2)
10-element Array{Array{Array{Int64,1},1},1}:
Array{Int64,1}[[1]]
Array{Int64,1}[[3]]
Array{Int64,1}[[2,3]]
Array{Int64,1}[[1],[3]]
Array{Int64,1}[[4,9,11]]
Array{Int64,1}[[1],[2,3]]
Array{Int64,1}[[2,3],[3]]
Array{Int64,1}[[1],[4,9,11]]
Array{Int64,1}[[3],[4,9,11]]
Array{Int64,1}[[1],[2,3],[3]]
(the definition of NewTask from the linked StackOverflow answer is necessary).

How to use S4 object programming in R

What's wrong with my R script? I'm trying to use a vector of user-defined objects (here a vector of "Page" objects) within another user-defined object (here a "Book" object)
setClass("Page",
slots = c(PageNo = "numeric", #scalar
Contents = "character") #vector of strings
)
setClass("Book",
slots = c(Pages = "vector", # Something wrong here? vector of pages ? "Page" or vector" or "list"
Title = "character") #vector of strings
)
setGeneric(name="AddPage", def=function(aBook, pageNo){standardGeneric("AddPage")})
setMethod(f="AddPage", signature="Book",
definition=function(aBook, pageNo)
{
page1 = new("Page")
page1#PageNo = pageNo
aBook#Pages = c(aBook#Pages, page1) # Something wrong here?
}
)
book1 = new("Book")
book1#Title = "Sample Book"
book1
book1#Pages
AddPage(book1, 1)
AddPage(book1, 2)
book1#Pages
Remember that R does not use reference semantics, so AddPage(book1, 1) creates a copy of book1, and updates that. In the method you don't return the updated object, and book1 remains unchanged.
Update the method so that it returns the modified object
setMethod(f="AddPage", signature="Book",
definition=function(aBook, pageNo)
{
page1 = new("Page")
page1#PageNo = pageNo
aBook#Pages = c(aBook#Pages, page1) # Something wrong here?
aBook
}
)
and assign the return value to the old variable
book1 = AddPage(book1, 1)
But this is a very inefficient approach -- the line aBook#Pages = c(aBook#Pages, page1) makes a copy of all existing pages (on the right-hand side, to create a longer vector; this will scale with the square of the number of Pages added to the book) and then copies the entire Book (for the assignment). In addition, creating individual objects is expensive and does not exploit R's 'vectorization'. A first step is to think of the object 'Page' as instead 'Pages', where the object models the columns rather than rows of a data frame. 'Book' then doesn't have vector of Page objects, but a single Pages object. This also implies a different approach to creating your 'book'.

Convert Dict to DataFrame in Julia

Suppose I have a Dict defined as follows:
x = Dict{AbstractString,Array{Integer,1}}("A" => [1,2,3], "B" => [4,5,6])
I want to convert this to a DataFrame object (from the DataFrames module). Constructing a DataFrame has a similar syntax to constructing a dictionary. For example, the above dictionary could be manually constructed as a data frame as follows:
DataFrame(A = [1,2,3], B = [4,5,6])
I haven't found a direct way to get from a dictionary to a data frame but I figured one could exploit the syntactic similarity and write a macro to do this. The following doesn't work at all but it illustrates the approach I had in mind:
macro dict_to_df(x)
typeof(eval(x)) <: Dict || throw(ArgumentError("Expected Dict"))
return quote
DataFrame(
for k in keys(eval(x))
#eval ($k) = $(eval(x)[$k])
end
)
end
end
I also tried writing this as a function, which does work when all dictionary values have the same length:
function dict_to_df(x::Dict)
s = "DataFrame("
for k in keys(x)
v = x[k]
if typeof(v) <: AbstractString
v = string('"', v, '"')
end
s *= "$(k) = $(v),"
end
s = chop(s) * ")"
return eval(parse(s))
end
Is there a better, faster, or more idiomatic approach to this?
Another method could be
DataFrame(Any[values(x)...],Symbol[map(symbol,keys(x))...])
It was a bit tricky to get the types in order to access the right constructor. To get a list of the constructors for DataFrames I used methods(DataFrame).
The DataFrame(a=[1,2,3]) way of creating a DataFrame uses keyword arguments. To use splatting (...) for keyword arguments the keys need to be symbols. In the example x has strings, but these can be converted to symbols. In code, this is:
DataFrame(;[Symbol(k)=>v for (k,v) in x]...)
Finally, things would be cleaner if x had originally been with symbols. Then the code would go:
x = Dict{Symbol,Array{Integer,1}}(:A => [1,2,3], :B => [4,5,6])
df = DataFrame(;x...)

scilab submatrix incorrectly defined

I am stuck at creating a matrix of a matrix (vector in this case)
What I have so far
index = zeros(size(A)) // This is some matrix but isn't important to the question
indexIndex = 1;
for rows=1:length(R)
for columns=1:length(K)
if(A(rows,columns)==x)
V=[rows columns]; // I create a vector holding the row + column
index(indexIndex) = V(1,2) // I want to store all these vectors
indexIndex = indexIndex + 1
end
end
end
I have tried various ways of getting the information out of V (such as V(1:2)) but nothing seems to work correctly.
In other words, I'm trying to get an array of points.
Thanks in advance
I do not understand your question exactly. What is the size of A? What is x, K and R? But under some assumptions,
Using list
You could use a list
// Create some matrix A
A = zeros(8,8)
//initialize the list
index = list();
// Get the dimensions of A
rows = size(A,1);
cols = size(A,2);
x = 0;
for row=1:rows
for col=1:cols
if(A(row,col)==x)
// Create a vector holding row and col
V=[row col];
// Append it to list using $ (last index) + 1
index($+1) = V
end
end
end
Single indexed matrices
Another approach would be to make use of the fact an multi-dimensional matrix can also be indexed by a single value.
For instance create a random matrix named a:
-->a = rand(3,3)
a =
0.6212882 0.5211472 0.0881335
0.3454984 0.2870401 0.4498763
0.7064868 0.6502795 0.7227253
Access the first value:
-->a(1)
ans =
0.6212882
-->a(1,1)
ans =
0.6212882
Access the second value:
-->a(2)
ans =
0.3454984
-->a(2,1)
ans =
0.3454984
So that proves how the single indexing works. Now to apply it to your problem and knocking out a for-loop.
// Create some matrix A
A = zeros(8,8)
//initialize the array of indices
index = [];
// Get the dimensions of A
rows = size(A,1);
cols = size(A,2);
x = 0;
for i=1:length(A)
if(A(i)==x)
// Append it to list using $ (last index) + 1
index($+1) = i;
end
end
Without for-loop
If you just need the values that adhere to a certain condition you could also do something like this
values = A(A==x);
Be carefull when comparing doubles, these are not always (un)equal when you expect.

Resources