accounting for duplicates in a custom clojure vector - vector

I am trying to come up with a functional solution to account for duplicates in a custom data structure that consists of alternating sets and numerical values.
An example:
(def a [#{:a} 0.1 #{:b} 0.3 #{:a :b} 0.1 #{:a} 0.3 #{:b} 0.1 #{:a} 0.1])
I want to add the values corresponding to duplicate sets to result in
[#{:a} 0.5 #{:b} 0.4 #{:a :b} 0.1]
I can do this using loop/recur but was wondering if there is a way that uses higher-order function in Clojure.
Thanks.

(reduce
(fn [acc [k v]]
(update acc k (fnil + 0) v))
{}
(partition 2 a))
First, partition your sequence into key-value pairs with partition and then reduce those pairs. The trick is to use fnil which will replace nil for keys that hasn't been added to acc yet with value 0.
That gives you a map:
{#{:a} 0.5, #{:b} 0.4, #{:b :a} 0.1}
If you need it as the flat sequence of values you can pass it through seq and flatten:
(->> a
(partition 2)
(reduce
(fn [acc [k v]]
(update acc k (fnil + 0) v))
{})
(apply concat)
(into []))
;; => (#{:a} 0.5 #{:b} 0.4 #{:b :a} 0.1)

one more variant (just for fun, because reduce version obviously has a better performance):
(->> a
(partition 2)
(group-by first)
(map (fn [[k v]] [k (apply + (map second v))]))
(reduce into []))

I would fold the source vector into a map with keys serving as aggregates for numerical values, and then unfold it back into a vector.
(->> a
(partition 2)
(reduce (fn [acc [k v]]
(if (get acc k)
(update acc k (partial + v))
(assoc acc k v)))
{})
(reduce (fn [acc [k v]]
(into acc [k v]))
[]))

Related

Clojure: update-in, but with wildcards and path tracking

I'm trying to update values in a structure consisting of nested maps and sequences, but update-in won't work because I want to allow wildcards. My manual approach led me to ugly, big, nested for and into {} calls. I ended up making a function that takes the structure, a selector-like sequence, and an update function.
(defn update-each-in
([o [head & tail :as path] f]
(update-each-in o path f []))
([o [head & tail :as path] f current-path]
(cond
(empty? path) (f o current-path)
(identical? * head)
(cond
(map? o)
(into {} (for [[k v] o]
[k (update-each-in v tail f (conj current-path k))]))
:else (for [[i v] (map-indexed vector o)]
(update-each-in v tail f (conj current-path i))))
:else (assoc o head
(update-each-in (get o head) tail f (conj current-path head))))))
This allows me to simplify my updates to the following
(def sample {"TR" [{:geometry {:ID12 {:buffer 22}}}
{:geometry {:ID13 {:buffer 33}
:ID14 {:buffer 55}}}
{:geometry {:ID13 {:buffer 44}}}]
"BR" [{:geometry {:ID13 {:buffer 22}
:ID18 {:buffer 11}}}
{:geometry {:ID13 {:buffer 33}}}
{:geometry {:ID13 {:buffer 44}}}]})
(update-each-in sample [* * :geometry * :buffer]
(fn [buf path] (inc buf)))
Obviously this has a stack overflow problem with deeply nested structures; although I'm far from hitting that one, it'd be nice to have a robust solution. Can anyone suggest a simpler/faster/more elegant solution? Could this be done with reducers/transducers?
UPDATE It's a requirement that the updating function also gets the full path to the value it's updating.
update-in has exactly the same signature as the function you created, and it does almost exactly the same thing. There are two differences: it doesn't allow wildcards in the "path," and it doesn't pass intermediary paths to the update function.
Adding wildcards to update-in
I've adapted this from the source code for update-in.
(defn update-in-*
[m [k & ks] f & args]
(if (identical? k *)
(let [idx (if (map? m) (keys m) (range (count m)))]
(if ks
(reduce #(assoc % %2 (apply update-in-* (get % %2) ks f args))
m
idx)
(reduce #(assoc % %2 (apply f (get % %2) args))
m
idx)))
(if ks
(assoc m k (apply update-in-* (get m k) ks f args))
(assoc m k (apply f (get m k) args)))))
Now these two lines produce the same result:
(update-in-* sample [* * :geometry * :buffer] (fn [buf] (inc buf)))
(update-each-in sample [* * :geometry * :buffer] (fn [buf path] (inc buf)))
The change I made to update-in is just by branching on a check for the wildcard. If the wildcard is encountered, then every child-node at that level must be modified. I used reduce to keep the cumulative updates to the collection.
Also, another remark, in the interests of robustness: I'd try to use something other than * for the wildcard. It could possibly occur as the key in a map.
Adding path-tracking to update-in
If it is required that the updating function receive the full path, then I would just modify update-in one more time. The function signature changes and (conj p k) gets added, but that's about it.
(defn update-in-*
[m ks f & args] (apply update-in-*-with-path [] m ks f args))
(defn- update-in-*-with-path
[p m [k & ks] f & args]
(if (identical? k *)
(let [idx (if (map? m) (keys m) (range (count m)))]
(if ks
(reduce #(assoc % %2 (apply update-in-*-with-path (conj p k) (get % %2) ks f args))
m
idx)
(reduce #(assoc % %2 (apply f (conj p k) (get % %2) args))
m
idx)))
(if ks
(assoc m k (apply update-in-*-with-path (conj p k) (get m k) ks f args))
(assoc m k (apply f (conj p k) (get m k) args)))))
Now these two lines produce the same result:
(update-in-* sample [* * :geometry * :buffer] (fn [path val] (inc val)))
(update-each-in sample [* * :geometry * :buffer] (fn [buf path] (inc buf)))
Is this better than your original solution? I don't know. I like it because it is modeled after update-in, and other people have probably put more careful thought into update-in than I care to myself.

Clojure rearrangement and mapping of vector of vectors

Here is the situation: I have a vector of vectors ("data"), a set of headers, a subset of headers ("primary headers"), a constant ("C"), an element-wise function ("f"), and the remaining headers ("secondary headers"). My goal is to take the "data" and produce a new vector of vectors.
Example data:
[[1.0 "A" 2.0]
[1.0 "B" 4.0]]
Example headers:
["o1" "i1" "i2"]
Example primary headers:
["i1" "i2"]
Example secondary headers:
["o1"]
Example new vector of vectors:
[[(f "A") (f 2.0) C (f 1.0)]
[(f "B") (f 4.0) C (f 1.0)]]
My current attempt is to mapv each row, then map-indexed each element with an if to check for primary membership, then the constant, then map-indexed each element with an if to check for secondary membership, finally conj on the results. But I am not getting it to work right.
Example code:
(mapv (fn [row] (conj (vec (flatten (map-indexed
(fn [idx item] (let [header-name (nth headers idx)]
(if (= (some #{header-name} primary-headers) headers-name) (f item))))
row)))
C
(vec (flatten (map-indexed
(fn [idx item] (let [header-name (nth headers idx)]
(if (= (some #{header-name} secondary-headers) headers-name) (f item))))
row)))))
data)
You should consider using core.matrix for stuff like this. It is a very flexible tool for multi-dimensional array programming in Clojure.
Most array-manipulation operations are likely to be 1-2 liners.....
(def DATA [[1.0 "A" 2.0]
[1.0 "B" 4.0]])
(emap (partial str "f:") (transpose (mapv #(get-column DATA %) [1 0 2])))
=> [["f:A" "f:1.0" "f:2.0"]
["f:B" "f:1.0" "f:4.0"]]
You might need to look up the column names to calculate the [1 0 2] vector but hopefully this gives you a good idea how to do this....
Not sure if I got your problem right, but looks like you want something like this:
(defn magic [data h p s f]
(let [idx (map (into {} (map-indexed #(vector %2 %1) h))
(concat p s))]
(mapv #(mapv (comp f (partial get %))
idx)
data)))
Here is an example of my magic function:
(magic [[1.0 "A" 2.0]
[1.0 "B" 4.0]]
["o1" "i1" "i2"]
["i1" "i2"]
["o1"]
#(str "<" % ">"))
[["<A>" "<2.0>" "<1.0>"]
["<B>" "<4.0>" "<1.0>"]]
Let's get a closer look at it.
First of all, I'm calculating permutation index idx. In your case it's (1 2 0). In order to calculate it I'm turning ["o1" "i1" "i2"] into a hash map {"o1" 0, "i1" 1, "i2" 2} and then using it on ("i1" "i2" "o1") sequence of primary and secondary headers.
Then I'm using idx to rearrange data matrix. On this step I'm also applying f function to each element of new rearranged matrix.
Update
I thought that it'll be best to split my complicated magic function into three simpler ones:
(defn getPermutation [h1 h2]
(map (into {} (map-indexed #(vector %2 %1) h1))
h2))
(defn permutate [idx data]
(mapv #(mapv (partial get %) idx)
data)))
(defn mmap [f data]
(mapv (partial mapv f)
data))
Each function here is atomic (i.e. performing a single task), and they all could be easily combined to do exactly what magic function do:
(defn magic [data h p s f]
(let [idx (getPermutation h (concat p s))]
(->> data
(permutate idx)
(mmap f))))
getPermutation function here calculates idx permutation index vector.
permutate rearranges columns of a matrix data according to given idx vector.
mmap applies function f to each element of a matrix data.
Update 2
Last time I missed the part about adding a constant. So, in order to do so we'll need to change some of the code. Let's change permutate function allowing it to insert new values to the matrix.
(defn permutate [idx data & [default-val]]
(mapv #(mapv (partial get %) idx (repeat default-val))
data)))
Now, it'll use default-val if it won't be able to get the element with the specified index idx.
We'll also need a new magic function:
(defn magic2 [data h p s f c]
(let [idx (getPermutation h (concat p [nil] s))]
(permutate idx (mmap f data) c)))
I changed the order of applying mmap and permutate functions because it seems that you don't want to apply f to your constant.
And it works:
(magic2 [[1.0 "A" 2.0]
[1.0 "B" 4.0]]
["o1" "i1" "i2"]
["i1" "i2"]
["o1"]
#(str "<" % ">")
"-->")
[["<A>" "<2.0>" "-->" "<1.0>"]
["<B>" "<4.0>" "-->" "<1.0>"]]
Given
(def data [[1.0 "A" 2.0] [1.0 "B" 4.0]])
(def headers ["o1" "i1" "i2"])
(def primaries ["i1" "i2"])
(def secondaries ["o1"])
(defn invert-sequence [s] (into {} (map-indexed (fn [i x] [x i]) s)))
... this does the job:
(defn produce [hs ps ss f data const]
(let [perms (map #(mapv (invert-sequence hs) %) [ps ss])]
(mapv (fn [v] (->> perms
(map #(map (comp f v) %))
(interpose [const])
(apply concat)
vec))
data)))
Using the example in the question:
(produce headers primaries secondaries #(list 'f %) data 'C)
; [[(f "A") (f 2.0) C (f 1.0)] [(f "B") (f 4.0) C (f 1.0)]]
Using Leonid Beschastny's example:
(produce headers primaries secondaries #(str "<" % ">") data 'C)
; [["<A>" "<2.0>" C "<1.0>"] ["<B>" "<4.0>" C "<1.0>"]]
Using str:
(produce headers primaries secondaries str data 'C)
; [["A" "2.0" C "1.0"] ["B" "4.0" C "1.0"]]
Using identity:
(produce headers primaries secondaries identity data 'C)
; [["A" 2.0 C 1.0] ["B" 4.0 C 1.0]]

reverse lookup in a map

I have to extract a key from a map using a value. Is there a way to do this other than implementing reverse lookup myself?
I think that map-invert is the right way to do this.
From the docs:
;; Despite being in clojure.set, this has nothing to do with sets.
user=> (map-invert {:a 1, :b 2})
{2 :b, 1 :a}
;; If there are duplicate keys, one is chosen:
user=> (map-invert {:a 1, :b 1})
{1 :b}
;; I suspect it'd be unwise to depend on which key survives the clash.
You can reverse a map really easily with a 2-line function:
(defn reverse-map [m]
(into {} (map (fn [[a b]] [b a]) m)))
(def a {:a 1 :b 2 :c 3})
(reverse-map a)
=> {1 :a, 3 :c, 2 :b}
((reverse-map a) 1)
=> :a
Try
(some #(if (= (val %) your-val) (key %)) your-map)
If you are using ClojureScript or you need one more alternative :)
(zipmap (vals m) (keys m))
Another one:
(defn reverse-map [m]
(apply hash-map (mapcat reverse m)))
(defn reverse-lookup [m k]
(ffirst (filter (comp #{k} second) m)))
if you want to keep the keys, it is better to just invert the map, but collect the old keys in a set / list etc...
(defn map-inverse [m]
(reduce (fn [m' [k v]] (update m' v clojure.set/union #{k})) {} m))
(defn map-inverse [m]
(reduce (fn [m' [k v]] (update m' v conj k)) {} m))

Getting a vector of largest keys in vector of maps

I have a vector of maps, which looks like this:
(def game-vec [{:game 1 :start 123456}
{:game 2 :start 523456}
{:game 3 :start 173456}
{:game 1 :start 123456}
{:game 1 :start 523456}
{:game 2 :start 128456}
{:game 3 :start 123256}])
I'd like to take the biggest :start time for each :game. What's the best way to do this?
Here is yet another solution
user=> (map #(apply max-key :start %)
(vals (group-by :game game-vec)))
({:game 1, :start 523456}
{:game 2, :start 523456}
{:game 3, :start 173456})
(into {} (for [[game times] (group-by :game game-vec)]
{game (apply max (map :start times))}))
One way would be to get all the games from the vector.
maybe something like:
(defn game-keys [from]
(set (map (fn [x] (:game x)) from)))
Now we have all the unique games stored somewhere, now for each of those we want the highest value of start. Sort might be useful if we filter out the right games.
(defn games [key from]
(filter (fn [x] (= (:game x) key)) from))
So we can get the games that we want, now we just need the highest of them
(defn max-start [lst]
(first (sort (fn [x y] (> (:start x) (:start y))) lst)))
So now we can do:
(map (fn [x] (max-start (games x game-vec))) (game-keys game-vec))
However that is just one way of doing, there are probably better ways of doing depending on the definition of best.
I came up with this:
(defn max-start-per-game [coll]
(into {} (map (fn [[k v]] [k (apply max (map :start v))])
(group-by :game game-vec))))
=> (max-start-per-game game-vec)
{1 523456, 2 523456, 3 173456}
The idea is to get all the data per game in one place and then take out the data for starts. Then just do a max on that.
The more general version:
(defn collect [coll sum-key collect]
(into {} (map (fn [[k v]] [k (map :start v)])
(group-by :game game-vec))))
(defn no-good-name
[coll f key1 key2]
(into {} (map (fn [[k v]] [k (f v)])
(collect coll key1 key2)))
(no-good-name game-vec #(apply max %) :game :start)
=> {1 523456, 2 523456, 3 173456}
(using a costum function (called fmap somewhere in contrib) to map over all values of a map would probebly be even better but you can do that your self)
Iterating over my last solution with the max function idea from #nickik. I am convinced there is a one-liner in here somewhere :-)
(reduce
(fn [m x]
(assoc m (:game x)
(max (:start x)
(or (m (:game x)) 0))))
{}
game-vec)
Functionally very similar to Julian Chastang's code and using reduce I have:
(defn max-start-per-game [games]
(reduce (fn [res {:keys [game start]}]
(let [cur-start (get res game 0)
max-start (max start cur-start)]
(assoc res game max-start)))
{}
games))
user=> (max-start-per-game game-vec)
{3 173456, 2 523456, 1 523456}
Alternatively using group-byamalloy's code is as succinct as possible.

Merge list of maps and combine values to sets in Clojure

What function can I put as FOO here to yield true at the end? I played with hash-set (only correct for first 2 values), conj, and concat but I know I'm not handling the single-element vs set condition properly with just any of those.
(defn mergeMatches [propertyMapList]
"Take a list of maps and merges them combining values into a set"
(reduce #(merge-with FOO %1 %2) {} propertyMapList))
(def in
(list
{:a 1}
{:a 2}
{:a 3}
{:b 4}
{:b 5}
{:b 6} ))
(def out
{ :a #{ 1 2 3}
:b #{ 4 5 6} })
; this should return true
(= (mergeMatches in) out)
What is the most idiomatic way to handle this?
This'll do:
(let [set #(if (set? %) % #{%})]
#(clojure.set/union (set %) (set %2)))
Rewritten more directly for the example (Alex):
(defn to-set [s]
(if (set? s) s #{s}))
(defn set-union [s1 s2]
(clojure.set/union (to-set s1) (to-set s2)))
(defn mergeMatches [propertyMapList]
(reduce #(merge-with set-union %1 %2) {} propertyMapList))
I didn't write this but it was contributed by #amitrathore on Twitter:
(defn kv [bag [k v]]
(update-in bag [k] conj v))
(defn mergeMatches [propertyMapList]
(reduce #(reduce kv %1 %2) {} propertyMapList))
I wouldn't use merge-with for this,
(defn fnil [f not-found]
(fn [x y] (f (if (nil? x) not-found x) y)))
(defn conj-in [m map-entry]
(update-in m [(key map-entry)] (fnil conj #{}) (val map-entry)))
(defn merge-matches [property-map-list]
(reduce conj-in {} (apply concat property-map-list)))
user=> (merge-matches in)
{:b #{4 5 6}, :a #{1 2 3}}
fnil will be part of core soon so you can ignore the implementation... but it just creates a version of another function that can handle nil arguments. In this case conj will substitute #{} for nil.
So the reduction conjoining to a set for every key/value in the list of maps supplied.
Another solution contributed by #wmacgyver on Twitter based on multimaps:
(defn add
"Adds key-value pairs the multimap."
([mm k v]
(assoc mm k (conj (get mm k #{}) v)))
([mm k v & kvs]
(apply add (add mm k v) kvs)))
(defn mm-merge
"Merges the multimaps, taking the union of values."
[& mms]
(apply (partial merge-with union) mms))
(defn mergeMatches [property-map-list]
(reduce mm-merge (map #(add {} (key (first %)) (val (first %))) property-map-list)))
This seems to work:
(defn FOO [v1 v2]
(if (set? v1)
(apply hash-set v2 v1)
(hash-set v1 v2)))
Not super pretty but it works.
(defn mergeMatches [propertyMapList]
(for [k (set (for [pp propertyMapList] (key (first pp))))]
{k (set (remove nil? (for [pp propertyMapList] (k pp))))}))

Resources