clojure: add index to vector of maps - vector

I have a vector of maps. I want to associate an index element for each element.
Example:
(append-index [{:name "foo"} {:name "bar"} {:name "baz"}])
should return
[{:name "foo" :index 1} {:name "bar" :index 2} {:name "baz" :index 3}]
What is the best way to implement append-index function?

First of all, Clojure starts counting vector elements from 0, so you probably want to get
[{:index 0, :name "foo"} {:index 1, :name "bar"} {:index 2, :name "baz"}]
You could do it pretty easily with map-indexed function
(defn append-index [coll]
(map-indexed #(assoc %2 :index %1) coll))

just adding some fun:
(defn append-index [items]
(map assoc items (repeat :index) (range)))

Related

How can I turn an ordered tree into a collection of named nodes in Clojure?

I think it's best to use an example. Let's say I have an ordered tree:
(def abcd [:a [:b :c] :d])
I want to build from it a collection of key-value maps, each map representing a nodes of this tree, with a random name and all relevant information, that is, its parent (nil for the root node) its index (0, 1, 2 ..) and, if it's a leaf node, its content (like ":a"). For instance, in this case it could be:
[{:name G__36654, :parent nil, :index 0}
{:name G__36655, :content :a, :parent G__36654, :index 0}
{:name G__36656, :parent G__36654, :index 1}
{:name G__36657, :content :b, :parent G__36656, :index 0}
{:name G__36658, :content :c, :parent G__36656, :index 1}
{:name G__36659, :content :d, :parent G__36654, :index 2}]
I defined a function that seems to do what I want, but it uses recursion by calling itself and I'm having trouble figuring out how to use loop-recur instead, and I believe there must be something better out there. Here's my attempt:
(defn mttrav "my tree traversal"
([ptree parent index]
(let [name (gensym)]
(cond
(not (coll? ptree)) [ {:name name :content ptree :parent parent :index index}]
:else (reduce into
[{:name name :parent parent :index index}]
(map-indexed #(mttrav %2 name %1) ptree)))))
([ptree]
(mttrav ptree nil 0)))
BTW, I don't know if a vector is the right collection to use, maybe a set would make more sense, but I'm using a vector for easier debugging, since it's more readable when the order in which nodes are generated is preserved, and if nodes are accidentally repeated I want to see it.
Thanks in advance!
Edit: just to clarify, it would also be acceptable for each node to have a list of :child nodes instead of a :parent node, and some other variations, as long as it's a flat collection of maps, each map representing a node, with a unique :name, and the position, content and parent-child relations of the nodes are captured in this structure. The intended input are hiccup parse trees coming typically from Instaparse, and the maps are meant to become records to insert in a Clara session.
When a tree resists tail recursion, another thing to try is a "zipper" from Clojure's standard library. Zippers shine for editing, but they're also pretty good at linearizing depth-first traversal while keeping structure context available. A typical zipper loop looks like this:
user> (def abcd '(:a (:b :c) :d))
#'user/abcd'
user> (loop [ret [], z (zip/seq-zip abcd)]
(if (zip/end? z)
ret
(let [o {:name 42, :content (zip/node z), :parent 42, :index 42}]
(recur (conj ret o) (zip/next z)))))
[{:name 42, :content (:a (:b :c) :d), :parent 42, :index 42}
{:name 42, :content :a, :parent 42, :index 42}
{:name 42, :content (:b :c), :parent 42, :index 42}
{:name 42, :content :b, :parent 42, :index 42}
{:name 42, :content :c, :parent 42, :index 42}
{:name 42, :content :d, :parent 42, :index 42}]
To fill in :parent and :index, you'll find zipper notation for looking "up" at parents, "left" for siblings, etc., in the official docs at https://clojure.github.io/clojure/clojure.zip-api.html.
I created the zip with seq-zip having modeled nodes as a list. Your specific case models nodes as vectors, which seq-zip does not recognize, so you would presumably use vector-zip or invent your own adapter. You can follow the "Source" link in the docs to see how seq-zip and vector-zip work.
Breadth first traversal is what you need. So if you want to build the list of parents while you traverse the tree, you need to first uniquely identify all your leaf nodes. I'm not sure it can be done without doing that, except if you know for sure that your leafs nodes are unique. It's also getting really late/early here, so my brain is not working optimally. I'm sure my solution can get distilled down a lot.
So if you have a tree like [:a [:b :c] :d [:b :c]], [:b :c] is a parent of :b and :c, but then last two leaf nodes are also :b and :c, so which parent do you choose ?
So let's have a tree whose leaves have unique id.
(defn attach-ids [tree]
(clojure.walk/postwalk (fn [node]
(if (coll? node) node
{:node node :id (gensym)}))
tree))
(def tree (attach-ids [:a [:b :c] :d]))
;; produces this
;; [{:node :a, :id G__21500}
;; [{:node :b, :id G__21501} {:node :c, :id G__21502}]
;; {:node :d, :id G__21503}]
Now for the rest of the solution
(defn add-parent [parent-map id branch]
(assoc parent-map id {:children-ids (set (map :id branch))
:child-nodes (map :node branch)}))
(defn find-parent-id [node parent-map]
(->> parent-map
(filter (fn [[parent-id {children-ids :children-ids}]]
(contains? children-ids (:id node))))
ffirst))
(defn find-index [node parent-map tree]
(if-let [parent-id (find-parent-id node parent-map)]
(let [children (:child-nodes (get parent-map parent-id))]
(.indexOf children (:node node)))
(.indexOf tree node)))
(defn bfs [tree]
(loop [queue tree
parent-map {}
ret []]
(if (not-empty queue)
(let [node (first queue)
rst (vec (rest queue))]
(cond
(map? node)
(recur rst
parent-map
(conj ret (assoc node :parent (find-parent-id node parent-map)
:index (find-index node parent-map tree))))
(vector? node)
(let [parent-id (gensym)]
(recur (into rst node)
(add-parent parent-map parent-id node)
(conj ret {:id parent-id
:index (find-index node parent-map tree)
:parent (find-parent-id node parent-map)})))))
ret)))
(def tree (attach-ids [:a [:b :c] :d]))
(bfs tree)
;; children with :parent nil value point to root
;;[{:node :a, :id G__21504, :parent nil, :index 0}
;; {:id G__21513, :index 1}
;; {:node :d, :id G__21507, :parent nil, :index 2}
;; {:node :b, :id G__21505, :parent G__21513, :index 0}
;; {:node :c, :id G__21506, :parent G__21513, :index 1}]

Clojure iterate a vector and look ahead/look behing

I do have to iterate over a vector, which in turn has maps as its items. I need to compare which map comes next, and sometimes I need to look what was in the map we looked at before. So it is necessary to have some kind of look ahead/look behind functionality. My current approach works, but I guess it is ugly, unidiomatic Clojure and I assume that there must be a better (more canonical) way to achieve this.
(let [result (apply str (map (fn [{position-before :position compound-before :compund } ; previous term (unfortunately we need to compare all three)
{:keys [word position line tag stem compound grammarpos] :or {grammarpos "0" stem "" } } ; this maps to the current word
{position-ahead :position compound-ahead :compound line-ahead :line}] ; this is for lookahead
(do some stuff)) ;; now we have position, which is our current position, position-before and position-after to compare with each other
;; this is how we map:
(into '[{}] (conj grammar '[{}]))
(next (into '[{}] (conj grammar '[{}])))
(next (next (into '[{}] (conj grammar '[{}]))))))])
As for the request of the data-example, this is a part of the vector:
[{:tag "0", :position "0", :line "0", :stem "dev", :grammarpos "2625", :word "deva"} {:tag "0", :position "0", :line "0", :stem "deva", :grammarpos "4", :word "deva"}]
The job is to compare values for position, compound etc., sometimes look ahead, sometimes look behind.
You could iterate over a partition of your vector, with a size of 3 and step of 1. Then for each element in the vector, you also get the before and after that you can study as you iterate with a for or reduce.
Some examples: https://clojuredocs.org/clojure.core/partition
also if you need all the preceding and following items for every item, you can combine for list comprehension, with destructuring.
for example:
user> (def items [:a :b :c :d :e :f :g])
#'user/items
user> (for [index (range (count items))
:let [[before [current & after]] (split-at index items)]]
{:before before :current current :after after})
({:before (), :current :a, :after (:b :c :d :e :f :g)}
{:before (:a), :current :b, :after (:c :d :e :f :g)}
{:before (:a :b), :current :c, :after (:d :e :f :g)}
{:before (:a :b :c), :current :d, :after (:e :f :g)}
{:before (:a :b :c :d), :current :e, :after (:f :g)}
{:before (:a :b :c :d :e), :current :f, :after (:g)}
{:before (:a :b :c :d :e :f), :current :g, :after nil})
you just split collection at every item's index one by one, and from the result take first item (before), first of second item (current), rest of second item (after)
also a bit less readable way (but probably more productive for big collection, since it doesn't do take/drop on every step, but adds/removes a single item to coll)
user> (take (count items)
(iterate
(fn [[before current after]]
[(conj before current) (first after) (rest after)])
[[] (first items) (rest items)]))
([[] :a (:b :c :d :e :f :g)]
[[:a] :b (:c :d :e :f :g)]
[[:a :b] :c (:d :e :f :g)]
[[:a :b :c] :d (:e :f :g)]
[[:a :b :c :d] :e (:f :g)]
[[:a :b :c :d :e] :f (:g)]
[[:a :b :c :d :e :f] :g ()])
If you want to do really complex things, perhaps zippers will be a better solution.
For example, lets say that you start with:
(def x
[{:tag "0" :dups 0}
{:tag "1" :dups 0}
{:tag "1" :dups 0}
{:tag "3" :dups 0}])
And your requirements are to increment the dups counter of all consecutive tags with the same name and add a "---" tag between them.
With zippers the solution will look like:
(require '[clojure.zip :as zip :refer [root node]])
(defn complex-thing [zip]
(if (zip/end? zip) ;; are we done?
(root zip) ;; return the "updated" vector
(let [current-node (node zip)
before-node (node (zip/prev zip))] ;; you can access any map in the vector, both before or after
(if (= (:tag current-node) (:tag before-node))
(recur (-> zip
zip/prev ;; move to the previous map
(zip/edit update :dups inc) ;; increment it
zip/next ;; move back to the current map
(zip/edit update :dups inc)
(zip/insert-left {:tag "----"}) ;; insert "---" before the current tag
zip/next)) ;; move to next map to process
(recur (zip/next zip))))))
(complex-thing (zip/next (zip/next (zip/vector-zip x)))) ;; start from the second element of the vector
[{:tag "0", :dups 0}
{:tag "1", :dups 1}
{:tag "----"}
{:tag "1", :dups 1}
{:tag "3", :dups 0}]

How do I check for duplicates within a map in clojure?

So I have a list like the following:
({:name "yellowtail", :quantity 2} {:name "tuna", :quantity 1}
{:name "albacore", :quantity 1} {:quantity 1, :name "tuna"})
My goal is to search the list of map items and find duplicates keys, if there are duplicates then increment the quantity. So in the list I have two tuna mapped elements that show up. I want to remove one and just increment the quantity of the other. So the result should be:
({:name "yellowtail", :quantity 2} {:name "tuna", :quantity 2}
{:name "albacore", :quantity 1} )
With :quantity of tuna incremented to 2. I have attempted to use recur to do this without success, I'm not sure if recur is a good direction to run with. Could someone point me in the right direction?
You can group-by :name your elements and then map through the grouped collection summing the values.
Something like this
(->> your-list
(group-by :name)
(map (fn [[k v]]
{:name k :quantity (apply + (map :quantity v))})))
P.S. I assume you need to sum quantity of elements, because it's not clear what exactly you need to increment.
This is standard use case for map and reduce.
(->> data
(map (juxt :name :quantity identity))
(reduce (fn [m [key qty _]]
(update m key (fnil (partial + qty) 0)))
{})
(map #(hash-map :name (key %1) :quantity (val %1))))
I am using identity to return the element in case you wish to use other properties in the map to determine uniqueness. If the map only contains two fields, then you could simplify it down to
(->> data
(mapcat #(repeat (:quantity %1) (:name %1)))
(frequencies)
(map #(hash-map :name (key %1) :quantity (val %1))))
Why not just hold a map from name to quantity. Instead of
({:name "yellowtail", :quantity 2} {:name "tuna", :quantity 1}
{:name "albacore", :quantity 1} {:quantity 1, :name "tuna"})
... we have
{"yellowtail" 2, "tuna" 1, "albacore" 1}
We are using the map to represent a multiset. Several clojure implementations are available, but I haven't used them.

Converting vector to indexed map in Clojure?

Let's say I have the following vector of maps:
[{:name "Jack" :age 5}
{:name "Joe" :age 15}
{:name "Mare" :age 34}
{:name "William" :age 64}
{:name "Adolf" :age 34}]
I want to convert this to an indexed map, like:
{1 {:name "Jack" :age 5}
2 {:name "Joe" :age 15}
3 {:name "Mare" :age 34}
4 {:name "William" :age 64}
5 {:name "Adolf" :age 34}}
And at some point, when I have modified the indexed map, I want to convert it back to vector of maps.
How to do it?
You can use map-indexed in order to associate each map to its index and then reduce it into an hashmap:
(reduce into {} (map-indexed #(assoc {} %1 %2) test))
If you want to go back to your first structure:
(vec (vals your-indexed-map))
zipmap combines a series of keys and values, so you could do:
(zipmap (iterate inc 1) data-vector)
(with data-vector being your vector of maps)
The reverse would basically be sorting by key, then taking all values, which can be written exactly like that:
(->> data-map
(sort-by key)
(map val))

Recreate a flattened tree

I have a vector of maps, that I'd like to transform in a nested fashion.
The data is structured as follows:
(def data
[{:id 1 :name "a" :parent 0}
{:id 2 :name "b" :parent 0}
{:id 3 :name "c" :parent 0}
{:id 4 :name "a_1" :parent 1}
{:id 5 :name "a_2" :parent 1}
{:id 6 :name "b_1" :parent 2}
{:id 7 :name "a_1_1" :parent 4}])
Each map has an :id, some other keys and values not important for this discussion, and :parent key, denoting if the elements belong to another element. If :parent is 0, it's a top level element.
I want to nest this flattened list so that each element belonging to a parent gets stored under a key :nodes in the parent map, like this:
(def nested
[{:id 1 :name "a" :parent 0 :nodes
[{:id 4 :name "a_1" :parent 1 :nodes []}
{:id 5 :name "a_2" :parent 1 :nodes
[{:id 7 :name "a_1_1" :parent 4 :nodes []}]}]}
{:id 2 :name "b" :parent 0 :nodes
[{:id 6 :name "b_1" :parent 2}]}
{:id 3 :name "c" :parent 0 :nodes []}])
To sum up - I have a flattened tree-like structure that I whish to transform into a tree again. I tried to achieve this using zippers, but failed to handle arbritarily nested levels.
The easiest way is to build it recursively by performing a full scan at each step:
(defn tree
([flat-nodes]
(tree flat-nodes 0))
([flat-nodes parent-id]
(for [node flat-nodes
:when (= (:parent node) parent-id)]
(assoc node
:nodes (tree flat-nodes (:id node))))))
and then
=> (tree data)
({:parent 0, :name "a", :nodes
({:parent 1, :name "a_1", :nodes
({:parent 4, :name "a_1_1", :nodes (), :id 7}), :id 4}
{:parent 1, :name "a_2", :nodes (), :id 5}), :id 1}
{:parent 0, :name "b", :nodes
({:parent 2, :name "b_1", :nodes (), :id 6}), :id 2}
{:parent 0, :name "c", :nodes (), :id 3})
Update: A more efficient variation
(defn tree [flat-nodes]
(let [children (group-by :parent flat-nodes)
nodes (fn nodes [parent-id]
(map #(assoc % :nodes (nodes (:id %)))
(children parent-id)))]
(nodes 0)))
Such a tree has to be built from the bottom up, so we need a function that will split a seq of nodes into leaves and inner ones:
(defn split-leaves
[nodes]
(let [parent-id? (set (map :parent nodes))]
(group-by
(comp #(if % :inner :leaves) parent-id? :id)
nodes)))
The next step is attaching all leaves to their parents:
(defn attach-leaves
[inner leaves]
(let [leaves-by-parent (group-by :parent leaves)]
(map
(fn [{:keys [id] :as node}]
(update-in node [:nodes] concat (leaves-by-parent id)))
inner)))
Those two steps have to be repeated until there are only leaves left:
(defn generate
[nodes root-id]
(loop [nodes (conj nodes {:id root-id})]
(let [{:keys [leaves inner]} (split-leaves nodes)]
(if (seq inner)
(recur (attach-leaves inner leaves))
(some #(when (= (:id %) root-id) (:nodes %)) leaves)))))
Note that we have to add and remove a virtual root node for this to work since your original set of nodes did not contain one (that's why the function expects the root node's ID).
(generate data 0)
;; => ({:parent 0, :name "c", :id 3}
;; {:parent 0, :name "b",
;; :nodes ({:parent 2, :name "b_1", :id 6}),
;; :id 2}
;; {:parent 0, :name "a",
;; :nodes ({:parent 1, :name "a_2", :id 5}
;; {:parent 1, :name "a_1",
;; :nodes ({:parent 4, :name "a_1_1", :id 7}),
;; :id 4}),
;; :id 1})
Another option is to convert your child parent relationships to an adjacency list and then traverse the acyclic directed graph.
(defn adjacency-list [coll]
(reduce (fn [r {p :parent c :id}]
(-> r
(update-in [:counts p] #(or % 0))
(update-in [:counts c] #(if % (inc %) 1))
(update-in [:adjacency p] #(if % (conj % c) [c]))))
{}
coll))
(defn get-data [k]
(first (filter #(= (:id %) k) data)) )
(defn traverse [m al roots]
(reduce (fn [r k]
(conj r
(assoc (get-data k)
:nodes (if-let [v (get al k)]
(traverse [] al v)
[]))))
m
roots))
(clojure.pprint/pprint
(let [{:keys [adjacency]} (adjacency-list data)]
(traverse [] adjacency (get adjacency 0))))

Resources