How to flatten a dict type column in a DF - julia

i have a df with a dict type column named measures like below:
How can I flatten this column as new columns in the same DF?

I recently had the same problem, wanting to extract and flatten data from a JSON, it might be overkill for your issue and a bit obscure but here it is:
This expects Dicts and ignores missing or malformed data
function extract_flatten(data::AbstractDict, extract::AbstractDict; cmdchar::AbstractChar='%')
res = Dict()
for (key, val) in extract
temp = Any[data]
keys = [key]
for v in val
if v isa AbstractString
if v[1] == cmdchar
v = split(v[2:end], ':')
if v[1] == "all"
temp2 = []
keys2 = String[]
for (t,k) in zip(temp, keys)
for (kt,vt) in pairs(t)
push!(keys2, join([k; v[2:end]; kt], '_'))
push!(temp2, vt)
end
end
temp = temp2
keys = keys2
elseif v[1] == "name"
keys .*= '_' * join(v[2:end], '_')
else
error("$(repr(v)) is not a valid command")
end
else
temp .= getdefnothing.(temp, Ref(v))
end
elseif v isa Integer
temp .= getdefnothing.(temp, Ref(v))
else
error("$(repr(v)) is not a valid key")
end
nothings = isnothing.(temp)
deleteat!(temp, nothings)
deleteat!(keys, nothings)
isempty(temp) && break
end
push!.(Ref(res), keys .=> temp)
end
return res
end
getdefnothing(x, y) = nothing
getdefnothing(x::AbstractDict, y) = get(x, y, nothing)
getdefnothing(x::AbstractArray, y) = get(x, y, nothing)
example use:
using Test
const d = Dict
schema = d(
"a" => ["b", "c", "d"],
"b" => ["e"],
"c" => ["f", "%all:z", "g"]
)
a = d("z" => 3)
#test extract_flatten(a, schema) == d()
b = d("e" => 0.123)
#test extract_flatten(b, schema) == d("b" => 0.123)
c = d("e" => true, "b" => d("c" => d("d" => "ABC")))
#test extract_flatten(c, schema) == d("b" => true, "a" => "ABC")
e = d("f" => d(
"a" => d("g" => "A"),
"b" => d("g" => "B")
))
#test extract_flatten(e, schema) == d("c_z_a" => "A", "c_z_b" => "B")
f = d("f" => [
d("g" => "A"),
d("g" => "B")
])
#test extract_flatten(f, schema) == d("c_z_1" => "A", "c_z_2" => "B")
g = d("e" => nothing, "f" => [1,2,3])
#test extract_flatten(g, schema) == d()

Assuming that there is only one object in each of those lists, then something like this:
using JSON
using dataframes
transform(
df,
(
:measures =>
ByRow(d -> (; JSON.parse(d; dicttype=Dict{Symbol,Any})[1]...)) =>
AsTable
)
)
What this does is parse the entries in the measures column as JSON (length-one) lists of dicts, take the first element, convert to a NamedTuple, and then use => AsTable to tell transform to convert that NamedTuple into corresponding columns.

Related

Generate all combination of dictionaries containing ranges

In julia I have a dictionary that can contains other dictionaries, lists of strings/numbers, lists of dictionaries, strings/numbers, and ranges.
I need a list containing all the possible combination of dictionaries for every range (like StepRange, FloatRange, UnitRange) it's contained in it.
Example:
Dict{}("A" => Dict{}("B" => 1:1:3, "C" => 2), "B" => [Dict{}( "S" => 1:1.1:2.1)])
=>
[
Dict{}("A" => Dict{}("B" => 1, "C" => 2), "B" => [Dict{}( "S" => 1.1)]),
Dict{}("A" => Dict{}("B" => 2, "C" => 2), "B" => [Dict{}( "S" => 1.1)]),
Dict{}("A" => Dict{}("B" => 3, "C" => 2), "B" => [Dict{}( "S" => 1.1)]),
Dict{}("A" => Dict{}("B" => 1, "C" => 2), "B" => [Dict{}( "S" => 2.1)]),
Dict{}("A" => Dict{}("B" => 2, "C" => 2), "B" => [Dict{}( "S" => 2.1)]),
Dict{}("A" => Dict{}("B" => 3, "C" => 2), "B" => [Dict{}( "S" => 2.1)])
]
Right now, I'm overloading a recursive function like this, but have no idea on how to continue.
function iterate(generic, nets::Array)
return (generic, false)
end
function iterate(range::Union{StepRange,FloatRange,UnitRange}, nets::Array)
return (collect(range), true)
end
function iterate(array::Array, nets::Array)
for (n, v) in enumerate(array)
res = iterate(v, nets)
if res[2]
## We found a range! Return it
return res
end
end
return (array, false)
end
function iterate(dict::Dict, nets::Array)
for (k, v) in dict
res = iterate(v, nets)
if res[2]
return (dict, true)
end
end
return (dict, false)
end
(I have already done this in python, but working on piece of text, using regex to find custom-defined ranges (like "[1,2,0.1]") and after generating the text code parsing it.)
The following snippet reproduces the output in the example, and it could serve as a basis for other variants which treat the recursion differently (there are many options, as I noticed when trying this out). It using Iterators.jl which is installed with Pkg.add("Iterators").
using Iterators
function findranges{K}(sd::Dict{K})
ranges = Vector{Vector}()
for v in values(sd)
if isa(v,Range)
push!(ranges,collect(v))
elseif isa(v,Dict)
push!(ranges,recdictcollect(v))
elseif isa(v,Vector)
push!(ranges,map(x->vcat(x...),collect(product(map(recdictcollect,v)...))))
end
end
ranges
end
function recdictcollect{K}(sd::Dict{K})
ranges = findranges(sd)
if length(ranges)==0
cases = [()]
else
cases = product(ranges...) |> collect
end
outv = Vector{Dict{K,Any}}()
for c in cases
newd = Dict{K,Any}()
i = 1
for (k,v) in sd
if any([isa(v,t) for t in [Range,Dict,Vector]])
newd[k] = c[i]
i += 1
else
newd[k] = v
end
end
push!(outv,newd)
end
return outv
end
And the example:
julia> example = Dict{}("A" => Dict{}("B" => 1:1:3, "C" => 2), "B" => [Dict{}( "S" => 1:1.1:2.1)])
Dict{ASCIIString,Any} with 2 entries:
"B" => [Dict("S"=>1.0:1.1:2.1)]
"A" => Dict{ASCIIString,Any}("B"=>1:1:3,"C"=>2)
julia> recdictcollect(example)
6-element Array{Dict{ASCIIString,Any},1}:
Dict{ASCIIString,Any}("B"=>[Dict{ASCIIString,Any}("S"=>1.0)],"A"=>Dict{ASCIIString,Any}("B"=>1,"C"=>2))
Dict{ASCIIString,Any}("B"=>[Dict{ASCIIString,Any}("S"=>2.1)],"A"=>Dict{ASCIIString,Any}("B"=>1,"C"=>2))
Dict{ASCIIString,Any}("B"=>[Dict{ASCIIString,Any}("S"=>1.0)],"A"=>Dict{ASCIIString,Any}("B"=>2,"C"=>2))
Dict{ASCIIString,Any}("B"=>[Dict{ASCIIString,Any}("S"=>2.1)],"A"=>Dict{ASCIIString,Any}("B"=>2,"C"=>2))
Dict{ASCIIString,Any}("B"=>[Dict{ASCIIString,Any}("S"=>1.0)],"A"=>Dict{ASCIIString,Any}("B"=>3,"C"=>2))
Dict{ASCIIString,Any}("B"=>[Dict{ASCIIString,Any}("S"=>2.1)],"A"=>Dict{ASCIIString,Any}("B"=>3,"C"=>2))

Getting the value of wildcard arm

How would I get the value of the wildcard arm in a match statement?
For example:
let a = 1i;
let b = 2i;
match a.cmp(&b) {
Greater => println!("is greater"),
_ => println!("is {}", _) // error: unexpected token: `_`
}
I'm hoping for something cleaner than storing the enum being matched in a variable:
let a = 1i;
let b = 2i;
let ord = a.cmp(&b);
match ord {
Greater => println!("is greater"),
_ => println!("is {}", ord)
}
Is this what you're asking for?
let a = 1i;
let b = 2i;
match a.cmp(&b) {
Greater => println!("is greater"),
e => println!("is {}", e)
}

Find most occurrences in Linq to SQL using C#

I have the below query that gets Name and TotalPoints as follows:
var gradeData = (from data in oAngieCtxt.prc_ShopInstanceCustomersData(Convert.ToInt32(this.ShopInstanceID), 10000, false)
.Where(row => row.RecievedPoints != "n/a")
.GroupBy(row => new { row.Name })
.Select(g => new
{
TotalPoints = g.Sum(x => Convert.ToDouble(x.RecievedPoints) * (x.Weightage.ToString() == "0.00" ? 1 : Convert.ToDouble(x.Weightage))),
Name = g.Key.Name
})
select data).ToList();
I will have data like below:
TotalPoints Name
5 A
10 B
5 C
15 D
5 E
If we observe the above list 5 is most common. I have to fetch that value from "gradeData".
How can I get that?
var mostCommon = gradeData.GroupBy(x => x.TotalPoints)
.OrderByDescending(g => g.Count())
.Select(g => g.Key)
.First();
The query below gives you the most common point along with all of its associated names:
var result = gradeData.GroupBy(x => x.TotalPoints)
.OrderByDescending(x => x.Count())
.Select(g => new
{
TotalPoints = g.Key,
Names = g.Select(x => x.Name).ToList()
})
.First();

Summing linq data by year

I've seen dozens of posts similar to this, but I just can't get it to work.
Using asp.net MVC framework, I have a table named Contributions that contains a "ContributionDate" column and an "Amount" column. I'm loading the dates and amounts to display in a chart:
var results = db.Contributions.Where(c => c.Amount > 0);
ArrayList xValue = new ArrayList();
ArrayList yValue = new ArrayList();
results.ToList().ForEach(c => xValue.Add(c.ContributionDate));
results.ToList().ForEach(c => yValue.Add(c.Amount));
The above works. Now I'd liked to sum (i.e., total) the Amounts for each year. I've seen examples that are similar to the following, but I'm clearly clueless (in this example, the compiler doesn't like the "c.ContributionDate" in the new{} statement):
var results = db.Contributions
.Where(c => c.Amount > 0)
.GroupBy( c => c.ContributionDate )
.Select(c => new {Amount = c.Sum(b => b.Amount), Date=c.ContributionDate});
Thanks for your help!
When you perform a GroupBy, the key by which you're grouping elements is represented by the Key property.
Try this:
var results = db.Contributions
.Where(c => c.Amount > 0)
.GroupBy( c => c.ContributionDate )
.Select(c => new { Amount = c.Sum(b => b.Amount), Date = c.Key });
But this will group items by the entire ContributionDate value, not just by the year. To do that, you'd have to do something like this:
var results = db.Contributions
.Where(c => c.Amount > 0)
.GroupBy( c => c.ContributionDate.Year)
.Select(c => new
{
Amount = c.Sum(b => b.Amount),
Date = new DateTime(c.Key, 1, 1)
});
But since this appears to be Entity Framework, you probably need to use the CreateDateTime function:
using System.Data.Entity;
...
var results = db.Contributions
.Where(c => c.Amount > 0)
.GroupBy( c => c.ContributionDate.Year)
.Select(c => new
{
Amount = c.Sum(b => b.Amount),
Date = EntityFunctions.CreateDateTime(c.Key, 1, 1, 0, 0, 0)
});

How to Convert this Linq Query into Lambda Expression

foreach (lc_ShoppingCart sc in shQuery)
{
//Decrement the Product Table's Total Remaining row with the quantity
var ProductInventoryQuery = (from pr in db.lc_ProductInventories
join c in db.lc_ColorTables on pr.Color equals c.Color
join s in db.lc_SizeTables on pr.Size equals s.Size
where pr.ProductID == Convert.ToInt32(sc.ProductID)
where pr.Color == c.Color
where pr.Size == s.Size
select pr).First();
ProductInventoryQuery.Quantity = ProductInventoryQuery.Quantity - sc.Quantity;
}
Probably something like this:
var ProductInventoryQuery =
db.lc_ProductInventories.Where(w => w.ProductID == Convert.ToInt32(sc.ProductID))
.Join(db.lc_ColorTables, p => p.Color, ct => ct.Color, (p, ct) => new { ProdInv = p, ColorTables = ct })
.Join(db.lc_SizeTables, p => p.Size, st => st.Color, (p, st) => new { ProdInv = p, SizeTables = st })
.First();

Resources