depth first tree traversal accumulation in clojure - recursion

I'd like to take a tree-like structure like this:
{"foo" {"bar" "1" "baz" "2"}}
and recursively traverse while remembering the path from the root in order to produce something like this:
["foo/bar/1", "foo/baz/2"]
Any suggestions on how this can be done without zippers or clojure.walk?

As nberger does, we separate enumerating the paths from presenting them as strings.
Enumeration
The function
(defn paths [x]
(if (map? x)
(mapcat (fn [[k v]] (map #(cons k %) (paths v))) x)
[[x]]))
... returns the sequence of path-sequences of a nested map. For example,
(paths {"foo" {"bar" "1", "baz" "2"}})
;(("foo" "bar" "1") ("foo" "baz" "2"))
Presentation
The function
#(clojure.string/join \/ %)
... joins strings together with "/"s. For example,
(#(clojure.string/join \/ %) (list "foo" "bar" "1"))
;"foo/bar/1"
Compose these to get the function you want:
(def traverse (comp (partial map #(clojure.string/join \/ %)) paths))
... or simply
(defn traverse [x]
(->> x
paths
(map #(clojure.string/join \/ %))))
For example,
(traverse {"foo" {"bar" "1", "baz" "2"}})
;("foo/bar/1" "foo/baz/2")
You could entwine these as a single function: clearer and more useful to
separate them, I think.
The enumeration is not lazy, so it will run out of
stack space on deeply enough nested maps.

This is my attempt using tree-seq clojure core function.
(def tree {"foo" {"bar" "1" "baz" "2"}})
(defn paths [t]
(let [cf (fn [[k v]]
(if (map? v)
(->> v
(map (fn [[kv vv]]
[(str k "/" kv) vv]))
(into {}))
(str k "/" v)))]
(->> t
(tree-seq map? #(map cf %))
(remove map?)
vec)))
(paths tree) ; => ["foo/bar/1" "foo/baz/2"]
Map keys are used to accumulate paths.

I did something real quick using accumulator, but it isn't depth first.
(defn paths [separator tree]
(let [finished? (fn [[_ v]] ((complement map?) v))]
(loop [finished-paths nil
path-trees (seq tree)]
(let [new-paths (mapcat
(fn [[path children]]
(map
(fn [[k v]]
(vector (str path separator k) v))
children))
path-trees)
finished (->> (filter finished? new-paths)
(map
(fn [[k v]]
(str k separator v)))
(concat finished-paths))
remaining-paths (remove finished? new-paths)]
(if (seq remaining-paths)
(recur finished remaining-paths)
finished)))))
In the repl
(clojure-scratch.core/paths "/" {"foo" {"bar" {"bosh" "1" "bash" "3"} "baz" "2"}})
=> ("foo/baz/2" "foo/bar/bash/3" "foo/bar/bosh/1")

The following uses recursive depth first traversal:
(defn combine [k coll]
(mapv #(str k "/" %) coll))
(defn f-map [m]
(into []
(flatten
(mapv (fn [[k v]]
(if (map? v)
(combine k (f-map v))
(str k "/" v)))
m))))
(f-map {"foo" {"bar" "1" "baz" "2"}})
=> ["foo/bar/1" "foo/baz/2"]

Here's my take:
(defn traverse [t]
(letfn [(traverse- [path t]
(when (seq t)
(let [[x & xs] (seq t)
[k v] x]
(lazy-cat
(if (map? v)
(traverse- (conj path k) v)
[[(conj path k) v]])
(traverse- path xs)))))]
(traverse- [] t)))
(traverse {"foo" {"bar" "1" "baz" "2"}})
;=> [[["foo" "bar"] "1"] [["foo" "baz"] "2"]]
Traverse returns a lazy seq of path-leaf pairs. You can then apply any transformation to each path-leaf, for example to the "/path/to/leaf" fullpath form:
(def ->full-path #(->> (apply conj %) (clojure.string/join "/")))
(->> (traverse {"foo" {"bar" "1" "baz" "2"}})
(map ->full-path))
;=> ("foo/bar/1" "foo/baz/2")
(->> (traverse {"foo" {"bar" {"buzz" 4 "fizz" "fuzz"} "baz" "2"} "faa" "fee"})
(map ->full-path))
;=> ("foo/bar/buzz/4" "foo/bar/fizz/fuzz" "foo/baz/2" "faa/fee")

Related

deref an atom after recursive function completes

I have an atom fs that I'm updating inside a recursive function freq-seq that's the value that holds the results of my computation. I have another function mine-freq-seqs to start freq-seq and when mine-freq-seqs is done I would like to receive the last value of said atom. So I thought I would do it like so
(ns freq-seq-enum)
(def fs (atom #{}))
(defn locally-frequents
[sdb min-sup]
(let [uniq-sdb (map (comp frequencies set) sdb)
freqs (apply merge-with + uniq-sdb)]
(->> freqs
(filter #(<= min-sup (second %)))
(map #(vector (str (first %)) (second %))))))
(defn project-sdb
[sdb prefix]
(if (empty? prefix) sdb
(into [] (->> sdb
(filter #(re-find (re-pattern (str (last prefix))) %))
(map #(subs % (inc (.indexOf % (str (last prefix))))))
(remove empty?)))))
(defn freq-seq
[sdb prefix prefix-support min-sup frequent-seqs]
(if ((complement empty?) prefix) (swap! fs conj [prefix prefix-support]))
(let [lf (locally-frequents sdb min-sup)]
(if (empty? lf) nil
(for [[item sup] lf] (freq-seq (project-sdb sdb (str prefix item)) (str prefix item) sup min-sup #fs)))))
(defn mine-freq-seqs
[sdb min-sup]
(freq-seq sdb "" 0 min-sup #fs))
running it first
(mine-freq-seqs ["CAABC" "ABCB" "CABC" "ABBCA"] 2)
then deref-ing the atom
(deref fs)
yields
#{["B" 4]
["BC" 4]
["AB" 4]
["CA" 3]
["CAC" 2]
["AC" 4]
["ABC" 4]
["CAB" 2]
["A" 4]
["CABC" 2]
["ABB" 2]
["CC" 2]
["CB" 3]
["C" 4]
["BB" 2]
["CBC" 2]
["AA" 2]}
however (doall (mine-freq-seqs ["CAABC" "ABCB" "CABC" "ABBCA"] 2) (deref fs))
just gives #{}
What I want is to let the freq-seq recurse to completion then get the value of the atom fs. So I can call mine-freq-seq and have my result returned in the REPL instead of having to manually deref it there.
First some alternate code without the atom then a look at why you get the empty return.
A more compact version where the sequences in a string are derived with a reduce rather than the recursion with regex and substr.
Then just do a frequencies on those results.
(defn local-seqs
[s]
(->> s
(reduce (fn [acc a] (into acc (map #(conj % a) acc))) #{[]})
(map #(apply str %))
(remove empty?)))
(defn freq-seqs
[sdb min-sup]
(->> (mapcat local-seqs sdb)
frequencies
(filter #(>= (second %) min-sup))
set))
That's the whole thing!
I haven't involved an atom because I didn't see a need but add it at the end if freq-seqs if you like.
For your original question: why the return that you see?
You are calling doall with 2 args, the result of your call and a collection. doall is a function and not a macro so the deref is performed immediately.
(defn doall
;; <snip>
([n coll] ;; you have passed #{} as coll
(dorun n coll) ;; and this line evals to nil
coll) ;; and #{} is returned
You have passed your result as the n arg and an empty set as the coll (from (deref fs))
Now when doall calls dorun, it encounters the following:
(defn dorun
;; <snip>
([n coll]
(when (and (seq coll) (pos? n)) ;; coll is #{} so the seq is falesy
(recur (dec n) (next coll)))) ;; and a nil is returned
Since the empty set from fs is the second arg (coll) and and is a macro, it will be falsey on (seq coll), return nil and then doall returns the empty set that was it's second arg.
Final note:
So that is something that works and why yours failed. As to how to make yours work, to fix the call above I tried:
(do (doall (mine-freq-seqs ["CAABC" "ABCB" "CABC" "ABBCA"] 2))
(deref fs))
That is closer to working but with the recusion in your process, it only forces the eval one level deep. So you could push the doall deeper into your funcs but I have proposed a completely different internal structure so I will leave the rest to you if you really need that structure.
I changed it a bit to remove all of the lazy bits (this happens silently in the repl but can be confusing when it changes outside of the repl). Note the changes with vec, mapv, and doall. At least now I get your result:
(def fs (atom #{}))
(defn locally-frequents
[sdb min-sup]
(let [uniq-sdb (map (comp frequencies set) sdb)
freqs (apply merge-with + uniq-sdb)]
(->> freqs
(filter #(<= min-sup (second %)))
(mapv #(vector (str (first %)) (second %))))))
(defn project-sdb
[sdb prefix]
(if (empty? prefix)
sdb
(into [] (->> sdb
(filter #(re-find (re-pattern (str (last prefix))) %))
(map #(subs % (inc (.indexOf % (str (last prefix))))))
(remove empty?)))))
(defn freq-seq
[sdb prefix prefix-support min-sup frequent-seqs]
(if ((complement empty?) prefix) (swap! fs conj [prefix prefix-support]))
(let [lf (locally-frequents sdb min-sup)]
(if (empty? lf)
nil
(vec (for [[item sup] lf] (freq-seq (project-sdb sdb (str prefix item)) (str prefix item) sup min-sup #fs))))))
(defn mine-freq-seqs
[sdb min-sup]
(freq-seq sdb "" 0 min-sup #fs))
(doall (mine-freq-seqs ["CAABC" "ABCB" "CABC" "ABBCA"] 2))
(deref fs) => #{["B" 4] ["BC" 4] ["AB" 4] ["CA" 3]
["CAC" 2] ["AC" 4] ["ABC" 4] ["CAB" 2]
["A" 4] ["CABC" 2] ["ABB" 2] ["CC" 2] ["CB" 3]
["C" 4] ["BB" 2] ["CBC" 2] ["AA" 2]}
I'm still not really sure what the goal is or how/why you get entries like "CABC".

Clojure - Recursively Flatten Nested Maps

Given a map with the key :content, where content is a list of strings or other maps, how can I flatten out the values to receive only the strings?
(flattener {:content '("b" {:content ("c" {:content ("d")})} "e")})
> '("b" "c" "d" "e")
I'm stumbling through very hacky loop recur attempts and now my brain is burnt out. Is there a nice idiomatic way to do this in Clojure?
Thanks.
What I've got is below, and although it works, it's quite ugly
(defn flatten-content
[coll]
(loop [acc '(), l coll]
(let [fst (first l), rst (rest l)]
(cond
(empty? l) (reverse acc)
(seq? fst) (recur acc (concat fst rst))
(associative? fst) (recur acc (concat (:content fst) rst))
:else (recur (conj acc fst) rst)))))
The tree-seq function helps walk, and since your map
(def m {:content '("b" {:content ("c" {:content ("d")})} "e")})
always has a list of "children" keyed by :content, this works
(filter string? (tree-seq associative? :content m))
;=> ("b" "c" "d" "e")
The following recursive function works (and is about 25% faster than a filtered tree-seq approach):
(defn flatten-content [node]
(lazy-seq
(if (string? node)
(list node)
(mapcat flatten-content (:content node)))))

Clojure: idiomatic update a map's value IF the key exists

Here's my problem: I want a function helpme that takes a map and replaces the keys :r and :g with empty vectors if and only if those keys exist. For example:
Input:
(helpme {:a "1" :r ["1" "2" "3"] :g ["4" "5"]})
Output:
{:a "1" :r [] :g []}
Input:
(helpme {:a "1" :r ["1" "2" "3"]})
Output:
{:a "1" :r []}
I can define a function "helpme" that does this, but it's overly complicated, and I feel like there must be an easier (more idiomatic) way...
Here's the overly complicated way I've done, as requested below:
(defn c [new-doc k] (if (contains? new-doc k) (assoc new-doc k []) new-doc))
(defn helpme [new-doc] (c (c new-doc :r) :g))
(defn helpme [m]
(into m (for [[k _] (select-keys m [:r :g])]
[k []])))
Short, and only requires editing in one place when the number of items to set to [] changes.
In my search for a version of update-in which only updated the map if the key actually existed, Google insisted that I could find my answer here. For others in search of the same thing I've created the following helper functions:
(defn contains-in?
[m ks]
(not= ::absent (get-in m ks ::absent)))
(defn update-if-contains
[m ks f & args]
(if (contains-in? m ks)
(apply (partial update-in m ks f) args)
m))
That way you could:
> (def my-data {:a {:aa "aaa"}})
> (update-if-contains my-data [:a :aa] clojure.string/upper-case)
{:a {:aa "AAA"}}
> (update-if-contains my-data [:a :aa] clojure.string/split #" ")
{:a {:aa ["a" "aa"]}}
> (update-if-contains my-data [:a :b] clojure.string/upper-case)
{:a {:aa "aaa"}} ; no change because [:a :b] didn't exist in the map
(defn helpme
[mp]
(as-> mp m
(or (and (contains? m :r) (assoc m :r []))
m)
(or (and (contains? m :g) (assoc m :g []))
m)
m))
if there were a third replacement, I would use this function:
(defn replace-contained [m k v] (or (and (contains? m k) (assoc m k v)) m))
as-> is new in clojure 1.5 but the definition is very simple if you are stuck using an older clojure version:
(defmacro as->
"Binds name to expr, evaluates the first form in the lexical context
of that binding, then binds name to that result, repeating for each
successive form, returning the result of the last form."
{:added "1.5"}
[expr name & forms]
`(let [~name ~expr
~#(interleave (repeat name) forms)]
~name))
what about using cond-> for that purpose?
(defn helpme [m]
(cond-> m
(:r m) (assoc :r [])
(:g m) (assoc :g [])))
One option:
(defn helpme [m]
(merge m
(apply hash-map (interleave
(clojure.set/intersection
(set (keys m)) #{:r :g})
(repeat [])))))
If this is really as simple as conditionally setting the value of two fixed keys, I'd just write it out long hand to keep it simple.
(defn clean [m]
(let [m (if (:r m) (assoc m :r []) m)
m (if (:g m) (assoc m :g []) m)]
m))
If you want something more general and reusable, here's an option:
(defn cond-assoc [m & kvs]
(reduce
(fn [acc [k v]]
(if (get acc k)
(assoc acc k v)
acc))
m
(partition 2 kvs)))
(cond-assoc {:a "1" :r ["1" "2" "3"] :g ["4" "5"]}
:r []
:g []) ; {:r [] :a "1" :g []}
(cond-assoc {:a "1" :r ["1" "2" "3"]}
:r []
:g []) ; {:r [] :a "1"}
By testing for the expected key in the compare function
(sort-by #(if (number? (:priority %))
(:priority %)
(java.lang.Integer/MAX_VALUE))
<
[{:priority 100} {:priority 10} {:test 1}])
>({:priority 10} {:priority 100} {:test 1})
I really like the helpme API to include the keys to reset and the default value to reset to:
(defn helpme [m & {:keys [ks v]
:or {ks #{:r :g}
v []}}]
(apply assoc m (-> m
;; select only existing keys by selecting from original map
(select-keys ks)
keys
;; generate defaults for each (handled by applying `assoc`)
(interleave (repeat v)))))
This uses assocs variadic form by production the arguments to it.
If you give up the general API it can be as short as:
(defn helpme [m]
(apply assoc m (-> m
(select-keys #{:r :g})
keys
(interleave (repeat [])))))

Getting a vector of largest keys in vector of maps

I have a vector of maps, which looks like this:
(def game-vec [{:game 1 :start 123456}
{:game 2 :start 523456}
{:game 3 :start 173456}
{:game 1 :start 123456}
{:game 1 :start 523456}
{:game 2 :start 128456}
{:game 3 :start 123256}])
I'd like to take the biggest :start time for each :game. What's the best way to do this?
Here is yet another solution
user=> (map #(apply max-key :start %)
(vals (group-by :game game-vec)))
({:game 1, :start 523456}
{:game 2, :start 523456}
{:game 3, :start 173456})
(into {} (for [[game times] (group-by :game game-vec)]
{game (apply max (map :start times))}))
One way would be to get all the games from the vector.
maybe something like:
(defn game-keys [from]
(set (map (fn [x] (:game x)) from)))
Now we have all the unique games stored somewhere, now for each of those we want the highest value of start. Sort might be useful if we filter out the right games.
(defn games [key from]
(filter (fn [x] (= (:game x) key)) from))
So we can get the games that we want, now we just need the highest of them
(defn max-start [lst]
(first (sort (fn [x y] (> (:start x) (:start y))) lst)))
So now we can do:
(map (fn [x] (max-start (games x game-vec))) (game-keys game-vec))
However that is just one way of doing, there are probably better ways of doing depending on the definition of best.
I came up with this:
(defn max-start-per-game [coll]
(into {} (map (fn [[k v]] [k (apply max (map :start v))])
(group-by :game game-vec))))
=> (max-start-per-game game-vec)
{1 523456, 2 523456, 3 173456}
The idea is to get all the data per game in one place and then take out the data for starts. Then just do a max on that.
The more general version:
(defn collect [coll sum-key collect]
(into {} (map (fn [[k v]] [k (map :start v)])
(group-by :game game-vec))))
(defn no-good-name
[coll f key1 key2]
(into {} (map (fn [[k v]] [k (f v)])
(collect coll key1 key2)))
(no-good-name game-vec #(apply max %) :game :start)
=> {1 523456, 2 523456, 3 173456}
(using a costum function (called fmap somewhere in contrib) to map over all values of a map would probebly be even better but you can do that your self)
Iterating over my last solution with the max function idea from #nickik. I am convinced there is a one-liner in here somewhere :-)
(reduce
(fn [m x]
(assoc m (:game x)
(max (:start x)
(or (m (:game x)) 0))))
{}
game-vec)
Functionally very similar to Julian Chastang's code and using reduce I have:
(defn max-start-per-game [games]
(reduce (fn [res {:keys [game start]}]
(let [cur-start (get res game 0)
max-start (max start cur-start)]
(assoc res game max-start)))
{}
games))
user=> (max-start-per-game game-vec)
{3 173456, 2 523456, 1 523456}
Alternatively using group-byamalloy's code is as succinct as possible.

Merge list of maps and combine values to sets in Clojure

What function can I put as FOO here to yield true at the end? I played with hash-set (only correct for first 2 values), conj, and concat but I know I'm not handling the single-element vs set condition properly with just any of those.
(defn mergeMatches [propertyMapList]
"Take a list of maps and merges them combining values into a set"
(reduce #(merge-with FOO %1 %2) {} propertyMapList))
(def in
(list
{:a 1}
{:a 2}
{:a 3}
{:b 4}
{:b 5}
{:b 6} ))
(def out
{ :a #{ 1 2 3}
:b #{ 4 5 6} })
; this should return true
(= (mergeMatches in) out)
What is the most idiomatic way to handle this?
This'll do:
(let [set #(if (set? %) % #{%})]
#(clojure.set/union (set %) (set %2)))
Rewritten more directly for the example (Alex):
(defn to-set [s]
(if (set? s) s #{s}))
(defn set-union [s1 s2]
(clojure.set/union (to-set s1) (to-set s2)))
(defn mergeMatches [propertyMapList]
(reduce #(merge-with set-union %1 %2) {} propertyMapList))
I didn't write this but it was contributed by #amitrathore on Twitter:
(defn kv [bag [k v]]
(update-in bag [k] conj v))
(defn mergeMatches [propertyMapList]
(reduce #(reduce kv %1 %2) {} propertyMapList))
I wouldn't use merge-with for this,
(defn fnil [f not-found]
(fn [x y] (f (if (nil? x) not-found x) y)))
(defn conj-in [m map-entry]
(update-in m [(key map-entry)] (fnil conj #{}) (val map-entry)))
(defn merge-matches [property-map-list]
(reduce conj-in {} (apply concat property-map-list)))
user=> (merge-matches in)
{:b #{4 5 6}, :a #{1 2 3}}
fnil will be part of core soon so you can ignore the implementation... but it just creates a version of another function that can handle nil arguments. In this case conj will substitute #{} for nil.
So the reduction conjoining to a set for every key/value in the list of maps supplied.
Another solution contributed by #wmacgyver on Twitter based on multimaps:
(defn add
"Adds key-value pairs the multimap."
([mm k v]
(assoc mm k (conj (get mm k #{}) v)))
([mm k v & kvs]
(apply add (add mm k v) kvs)))
(defn mm-merge
"Merges the multimaps, taking the union of values."
[& mms]
(apply (partial merge-with union) mms))
(defn mergeMatches [property-map-list]
(reduce mm-merge (map #(add {} (key (first %)) (val (first %))) property-map-list)))
This seems to work:
(defn FOO [v1 v2]
(if (set? v1)
(apply hash-set v2 v1)
(hash-set v1 v2)))
Not super pretty but it works.
(defn mergeMatches [propertyMapList]
(for [k (set (for [pp propertyMapList] (key (first pp))))]
{k (set (remove nil? (for [pp propertyMapList] (k pp))))}))

Resources