Clojure rearrangement and mapping of vector of vectors - vector

Here is the situation: I have a vector of vectors ("data"), a set of headers, a subset of headers ("primary headers"), a constant ("C"), an element-wise function ("f"), and the remaining headers ("secondary headers"). My goal is to take the "data" and produce a new vector of vectors.
Example data:
[[1.0 "A" 2.0]
[1.0 "B" 4.0]]
Example headers:
["o1" "i1" "i2"]
Example primary headers:
["i1" "i2"]
Example secondary headers:
["o1"]
Example new vector of vectors:
[[(f "A") (f 2.0) C (f 1.0)]
[(f "B") (f 4.0) C (f 1.0)]]
My current attempt is to mapv each row, then map-indexed each element with an if to check for primary membership, then the constant, then map-indexed each element with an if to check for secondary membership, finally conj on the results. But I am not getting it to work right.
Example code:
(mapv (fn [row] (conj (vec (flatten (map-indexed
(fn [idx item] (let [header-name (nth headers idx)]
(if (= (some #{header-name} primary-headers) headers-name) (f item))))
row)))
C
(vec (flatten (map-indexed
(fn [idx item] (let [header-name (nth headers idx)]
(if (= (some #{header-name} secondary-headers) headers-name) (f item))))
row)))))
data)

You should consider using core.matrix for stuff like this. It is a very flexible tool for multi-dimensional array programming in Clojure.
Most array-manipulation operations are likely to be 1-2 liners.....
(def DATA [[1.0 "A" 2.0]
[1.0 "B" 4.0]])
(emap (partial str "f:") (transpose (mapv #(get-column DATA %) [1 0 2])))
=> [["f:A" "f:1.0" "f:2.0"]
["f:B" "f:1.0" "f:4.0"]]
You might need to look up the column names to calculate the [1 0 2] vector but hopefully this gives you a good idea how to do this....

Not sure if I got your problem right, but looks like you want something like this:
(defn magic [data h p s f]
(let [idx (map (into {} (map-indexed #(vector %2 %1) h))
(concat p s))]
(mapv #(mapv (comp f (partial get %))
idx)
data)))
Here is an example of my magic function:
(magic [[1.0 "A" 2.0]
[1.0 "B" 4.0]]
["o1" "i1" "i2"]
["i1" "i2"]
["o1"]
#(str "<" % ">"))
[["<A>" "<2.0>" "<1.0>"]
["<B>" "<4.0>" "<1.0>"]]
Let's get a closer look at it.
First of all, I'm calculating permutation index idx. In your case it's (1 2 0). In order to calculate it I'm turning ["o1" "i1" "i2"] into a hash map {"o1" 0, "i1" 1, "i2" 2} and then using it on ("i1" "i2" "o1") sequence of primary and secondary headers.
Then I'm using idx to rearrange data matrix. On this step I'm also applying f function to each element of new rearranged matrix.
Update
I thought that it'll be best to split my complicated magic function into three simpler ones:
(defn getPermutation [h1 h2]
(map (into {} (map-indexed #(vector %2 %1) h1))
h2))
(defn permutate [idx data]
(mapv #(mapv (partial get %) idx)
data)))
(defn mmap [f data]
(mapv (partial mapv f)
data))
Each function here is atomic (i.e. performing a single task), and they all could be easily combined to do exactly what magic function do:
(defn magic [data h p s f]
(let [idx (getPermutation h (concat p s))]
(->> data
(permutate idx)
(mmap f))))
getPermutation function here calculates idx permutation index vector.
permutate rearranges columns of a matrix data according to given idx vector.
mmap applies function f to each element of a matrix data.
Update 2
Last time I missed the part about adding a constant. So, in order to do so we'll need to change some of the code. Let's change permutate function allowing it to insert new values to the matrix.
(defn permutate [idx data & [default-val]]
(mapv #(mapv (partial get %) idx (repeat default-val))
data)))
Now, it'll use default-val if it won't be able to get the element with the specified index idx.
We'll also need a new magic function:
(defn magic2 [data h p s f c]
(let [idx (getPermutation h (concat p [nil] s))]
(permutate idx (mmap f data) c)))
I changed the order of applying mmap and permutate functions because it seems that you don't want to apply f to your constant.
And it works:
(magic2 [[1.0 "A" 2.0]
[1.0 "B" 4.0]]
["o1" "i1" "i2"]
["i1" "i2"]
["o1"]
#(str "<" % ">")
"-->")
[["<A>" "<2.0>" "-->" "<1.0>"]
["<B>" "<4.0>" "-->" "<1.0>"]]

Given
(def data [[1.0 "A" 2.0] [1.0 "B" 4.0]])
(def headers ["o1" "i1" "i2"])
(def primaries ["i1" "i2"])
(def secondaries ["o1"])
(defn invert-sequence [s] (into {} (map-indexed (fn [i x] [x i]) s)))
... this does the job:
(defn produce [hs ps ss f data const]
(let [perms (map #(mapv (invert-sequence hs) %) [ps ss])]
(mapv (fn [v] (->> perms
(map #(map (comp f v) %))
(interpose [const])
(apply concat)
vec))
data)))
Using the example in the question:
(produce headers primaries secondaries #(list 'f %) data 'C)
; [[(f "A") (f 2.0) C (f 1.0)] [(f "B") (f 4.0) C (f 1.0)]]
Using Leonid Beschastny's example:
(produce headers primaries secondaries #(str "<" % ">") data 'C)
; [["<A>" "<2.0>" C "<1.0>"] ["<B>" "<4.0>" C "<1.0>"]]
Using str:
(produce headers primaries secondaries str data 'C)
; [["A" "2.0" C "1.0"] ["B" "4.0" C "1.0"]]
Using identity:
(produce headers primaries secondaries identity data 'C)
; [["A" 2.0 C 1.0] ["B" 4.0 C 1.0]]

Related

Clojure : Passing values of variable to macros

I want to convert {a 1, b 2} clojure.lang.PersistentArrayMap into
[a 1 b 2] clojure.lang.PersistentVector in clojure.
I have tried to write a function in clojure which converts {a 1, b 2} into [a 1 b 2]. I have also written a macro which gives me expected end result. In clojure we cannot pass the values generated inside functions to macros. For that I wanted to know a way in which I can implement a macro directly which can convert {a 1, b 2} into (let [a 1 b 2] (println a)), which will return 1.
Dummy Macro:
(defmacro mymacro [binding & body]
--some implemetation---)
Execution :
(mymacro '{a 1, b 2} (println a))
output:
1
nil
My Implementaion:
Function which converts into desired output.
(defn myfn [x]
(let [a (into (vector) x) b (vec (mapcat vec a))] b))
Execution:
(myfn '{a 1, b 2})
Output:
[a 1 b 2]
MACRO:
(defmacro list-let [bindings & body] `(let ~(vec bindings) ~#body))
Execution:
(list-let [a 1 b 2] (println a))
Output:
1
nil
I wanted to know how can I implement the same inside the macro itself and avoid the function implementation to get the require output. Something same as dummy macro given above. I am also interested in knowing if there is any which through which I can pass the value from my funtion to the macro without using
(def)
in general, macro code is plain clojure code (with the difference that it returns clojure code to be evaluated later). So, almost anything you can think of coding in clojure, you can do inside macro to the arguments passed.
for example, here is the thing you're trying to do (if i understood you correctly):
(defmacro map-let [binding-map & body]
(let [b-vec (reduce into [] binding-map)]
`(let ~b-vec ~#body)))
(map-let {a 10 b 20}
(println a b)
(+ a b))
;;=> 10 20
30
or like this:
(defmacro map-let [binding-map & body]
`(let ~(reduce into [] binding-map) ~#body))
or even like this:
(defmacro map-let [binding-map & body]
`(let [~#(apply concat binding-map)] ~#body))
You don't need a macro for this, and you should always prefer functions over macros when possible.
For your particular case, I have already written a function keyvals which you may find handy:
(keyvals m)
"For any map m, returns the keys & values of m as a vector,
suitable for reconstructing via (apply hash-map (keyvals m))."
(keyvals {:a 1 :b 2})
;=> [:b 2 :a 1]
(apply hash-map (keyvals {:a 1 :b 2}))
;=> {:b 2, :a 1}
And, here are the full API docs.
If you are curious about the implementation, it is very simple:
(s/defn keyvals :- [s/Any]
"For any map m, returns the (alternating) keys & values of m as a vector, suitable for reconstructing m via
(apply hash-map (keyvals m)). (keyvals {:a 1 :b 2} => [:a 1 :b 2] "
[m :- tsk/Map ]
(reduce into [] (seq m)))

clojure - (Another one) StackOverflow with loop/recur

I know this is a recurring question (here, here, and more), and I know that the problem is related to creating lazy sequencies, but I can't see why it fails.
The problem: I had written a (not very nice) quicksort algorithm to sort strings that uses loop/recur. But applied to 10000 elements, I get a StackOverflowError:
(defn qsort [list]
(loop [[current & todo :as all] [list] sorted []]
(cond
(nil? current) sorted
(or (nil? (seq current)) (= (count current) 1)) (recur todo (concat sorted current))
:else (let [[pivot & rest] current
pred #(> (compare pivot %) 0)
lt (filter pred rest)
gte (remove pred rest)
work (list* lt [pivot] gte todo)]
(recur work sorted)))))
I used in this way:
(defn tlfnum [] (str/join (repeatedly 10 #(rand-int 10))))
(defn tlfbook [n] (repeatedly n #(tlfnum)))
(time (count (qsort (tlfbook 10000))))
And this is part of the stack trace:
[clojure.lang.LazySeq seq "LazySeq.java" 49]
[clojure.lang.RT seq "RT.java" 521]
[clojure.core$seq__4357 invokeStatic "core.clj" 137]
[clojure.core$concat$fn__4446 invoke "core.clj" 706]
[clojure.lang.LazySeq sval "LazySeq.java" 40]
[clojure.lang.LazySeq seq "LazySeq.java" 49]
[clojure.lang.RT seq "RT.java" 521]
[clojure.core$seq__4357 invokeStatic "core.clj" 137]]}
As far as I know, loop/recur performs tail call optimization, so no stack is used (is, in fact, an iterative process written using recursive syntax).
Reading other answers, and because of the stack trace, I see there's a problem with concat and adding a doall before concat solves the stack overflow problem. But... why?
Here's part of the code for the two-arity version of concat.
(defn concat [x y]
(lazy-seq
(let [s (seq x)]
,,,))
)
Notice that it uses two other functions, lazy-seq, and seq. lazy-seq is a bit like a lambda, it wraps some code without executing it yet. The code inside the lazy-seq block has to result in some kind of sequence value. When you call any sequence operation on the lazy-seq, then it will first evaluate the code ("realize" the lazy seq), and then perform the operation on the result.
(def lz (lazy-seq
(println "Realizing!")
'(1 2 3)))
(first lz)
;; prints "realizing"
;; => 1
Now try this:
(defn lazy-conj [xs x]
(lazy-seq
(println "Realizing" x)
(conj (seq xs) x)))
Notice that it's similar to concat, it calls seq on its first argument, and returns a lazy-seq
(def up-to-hundred
(reduce lazy-conj () (range 100)))
(first up-to-hundred)
;; prints "Realizing 99"
;; prints "Realizing 98"
;; prints "Realizing 97"
;; ...
;; => 99
Even though you asked for only the first element, it still ended up realizing the whole sequence. That's because realizing the outer "layer" results in calling seq on the next "layer", which realizes another lazy-seq, which again calls seq, etc. So it's a chain reaction that realizes everything, and each step consumes a stack frame.
(def up-to-ten-thousand
(reduce lazy-conj () (range 10000)))
(first up-to-ten-thousand)
;;=> java.lang.StackOverflowError
You get the same problem when stacking concat calls. That's why for instance (reduce concat ,,,) is always a smell, instead you can use (apply concat ,,,) or (into () cat ,,,).
Other lazy operators like filter and map can exhibit the exact same problem. If you really have a lot of transformation steps over a sequence consider using transducers instead.
;; without transducers: many intermediate lazy seqs and deep call stacks
(->> my-seq
(map foo)
(filter bar)
(map baz)
,,,)
;; with transducers: seq processed in a single pass
(sequence (comp
(map foo)
(filter bar)
(map baz))
my-seq)
Arne had a good answer (and, in fact, I'd never noticed cat before!). If you want a simpler solution, you can use the glue function from the Tupelo library:
Gluing Together Like Collections
The concat function can sometimes have rather surprising results:
(concat {:a 1} {:b 2} {:c 3} )
;=> ( [:a 1] [:b 2] [:c 3] )
In this example, the user probably meant to merge the 3 maps into one. Instead, the three maps were mysteriously converted into length-2 vectors, which were then nested inside another sequence.
The conj function can also surprise the user:
(conj [1 2] [3 4] )
;=> [1 2 [3 4] ]
Here the user probably wanted to get [1 2 3 4] back, but instead got a nested vector by mistake.
Instead of having to wonder if the items to be combined will be merged, nested, or converted into another data type, we provide the glue function to always combine like collections together into a result collection of the same type:
; Glue together like collections:
(is (= (glue [ 1 2] '(3 4) [ 5 6] ) [ 1 2 3 4 5 6 ] )) ; all sequential (vectors & lists)
(is (= (glue {:a 1} {:b 2} {:c 3} ) {:a 1 :c 3 :b 2} )) ; all maps
(is (= (glue #{1 2} #{3 4} #{6 5} ) #{ 1 2 6 5 3 4 } )) ; all sets
(is (= (glue "I" " like " \a " nap!" ) "I like a nap!" )) ; all text (strings & chars)
; If you want to convert to a sorted set or map, just put an empty one first:
(is (= (glue (sorted-map) {:a 1} {:b 2} {:c 3}) {:a 1 :b 2 :c 3} ))
(is (= (glue (sorted-set) #{1 2} #{3 4} #{6 5}) #{ 1 2 3 4 5 6 } ))
An Exception will be thrown if the collections to be 'glued' are not all of the same type. The allowable input types are:
all sequential: any mix of lists & vectors (vector result)
all maps (sorted or not)
all sets (sorted or not)
all text: any mix of strings & characters (string result)
I put glue into your code instead of concat and still got a StackOverflowError. So, I also replaced the lazy filter and remove with eager versions keep-if and drop-if to get this result:
(defn qsort [list]
(loop [[current & todo :as all] [list] sorted []]
(cond
(nil? current) sorted
(or (nil? (seq current)) (= (count current) 1))
(recur todo (glue sorted current))
:else (let [[pivot & rest] current
pred #(> (compare pivot %) 0)
lt (keep-if pred rest)
gte (drop-if pred rest)
work (list* lt [pivot] gte todo)]
(recur work sorted)))))
(defn tlfnum [] (str/join (repeatedly 10 #(rand-int 10))))
(defn tlfbook [n] (repeatedly n #(tlfnum)))
(def result
(time (count (qsort (tlfbook 10000)))))
-------------------------------------
Clojure 1.8.0 Java 1.8.0_111
-------------------------------------
"Elapsed time: 1377.321118 msecs"
result => 10000

Returning positions of an element Clojure

I am trying to find a solution to a problem i am facing at the moment, i have tried and tried to get this working but to no avail. I am trying to scan a list containing data and then returning the position of the data if found.
For example if i ran this:
(ind 'p '(l m n o p o p))
Then i would get a return value of....
==> 4 6
As it has found the data in those positions.
I have come close i think to what i want with this solution before, but i cannot get it to run. Can anyone help me figure out whats up with my function?? As far as i can see it should work, but i cant figure out why it isnt?
(defn ind
([choice list emptylist x]
(let [x (count list)])
(if (= (x) 0)
nil)
(if (= (first list) item)
(ind item (rest list) (cons x emptylist) (dec x))
(ind item (rest list) emptylist (dec x))
)
)
)
What i have tried to do is loop through the list until it hits a value and add it to the empty list and once it has looped through return the empty list.
I found that there is a built-in function called keep-indexed in Clojure.
So you can simply do this:
(keep-indexed (fn [idx elm] (if (= 'p elm) idx)) '(l m n o p o p))
; return (4 6)
Here is a solution which I think is a bit simpler:
(defn find-index
"Returns a seq of the indexes of the supplied collection."
[values target]
(let [indexes (range (count values))
val-idx-tuples (map vector values indexes)
found-tuples (filter #(= target (first %)) val-idx-tuples)
found-indexes (vec (map second found-tuples)) ]
found-indexes))
(println (find-index '(l m n o p o p) 'p ))
;=> [4 6]
Though I prefer #chanal's approach, you can write the function you want as follows:
(defn ind [x coll]
(loop [ans [], coll coll, n 0]
(if-let [[y & ys] (seq coll)]
(recur (if (= x y) (conj ans n) ans) ys (inc n))
ans)))
(ind 'p '(l m n o p o p))
;[4 6]
This uses several idioms to make it concise:
if-let comprises if inside let.
The destructuring form [y & ys] comprises calls to first and rest.
Pushing the if form down into the recur avoids repetition.

clojure reduce a seq of maps to a map of vectors

I would like to reduce the following seq:
({0 "Billie Verpooten"}
{1 "10:00"}
{2 "17:00"}
{11 "11:10"}
{12 "19:20"})
to
{:name "Billie Verpooten"
:work {:1 ["10:00" "17:00"]
:11 ["11:10" "19:20"]}}
but I have no idea to do this.
I was think about a recursive function that uses deconstruction.
There's a function for reducing a sequence to something in the standard library, and it's called reduce. Though in your specific case, it seems appropriate to remove the special case key 0 first and partition the rest into the pairs of entries that they're meant to be.
The following function gives the result described in your question:
(defn build-map [maps]
(let [entries (map first maps)
key-zero? (comp zero? key)]
{:name (val (first (filter key-zero? entries)))
:work (reduce (fn [acc [[k1 v1] [k2 v2]]]
(assoc acc (keyword (str k1)) [v1 v2]))
{}
(partition 2 (remove key-zero? entries)))}))
Just for variety here is a different way of expressing an answer by threading sequence manipulation functions:
user> (def data '({0 "Billie Verpooten"}
{1 "10:00"}
{2 "17:00"}
{11 "11:10"}
{12 "19:20"}))
user> {:name (-> data first first val)
:work (as-> data x
(rest x)
(into {} x)
(zipmap (map first (partition 1 2 (keys x)))
(partition 2 (vals x))))}
teh as-> threading macro is new to Clojure 1.5 and makes expressing this sort of function a bit more concise.

Merge list of maps and combine values to sets in Clojure

What function can I put as FOO here to yield true at the end? I played with hash-set (only correct for first 2 values), conj, and concat but I know I'm not handling the single-element vs set condition properly with just any of those.
(defn mergeMatches [propertyMapList]
"Take a list of maps and merges them combining values into a set"
(reduce #(merge-with FOO %1 %2) {} propertyMapList))
(def in
(list
{:a 1}
{:a 2}
{:a 3}
{:b 4}
{:b 5}
{:b 6} ))
(def out
{ :a #{ 1 2 3}
:b #{ 4 5 6} })
; this should return true
(= (mergeMatches in) out)
What is the most idiomatic way to handle this?
This'll do:
(let [set #(if (set? %) % #{%})]
#(clojure.set/union (set %) (set %2)))
Rewritten more directly for the example (Alex):
(defn to-set [s]
(if (set? s) s #{s}))
(defn set-union [s1 s2]
(clojure.set/union (to-set s1) (to-set s2)))
(defn mergeMatches [propertyMapList]
(reduce #(merge-with set-union %1 %2) {} propertyMapList))
I didn't write this but it was contributed by #amitrathore on Twitter:
(defn kv [bag [k v]]
(update-in bag [k] conj v))
(defn mergeMatches [propertyMapList]
(reduce #(reduce kv %1 %2) {} propertyMapList))
I wouldn't use merge-with for this,
(defn fnil [f not-found]
(fn [x y] (f (if (nil? x) not-found x) y)))
(defn conj-in [m map-entry]
(update-in m [(key map-entry)] (fnil conj #{}) (val map-entry)))
(defn merge-matches [property-map-list]
(reduce conj-in {} (apply concat property-map-list)))
user=> (merge-matches in)
{:b #{4 5 6}, :a #{1 2 3}}
fnil will be part of core soon so you can ignore the implementation... but it just creates a version of another function that can handle nil arguments. In this case conj will substitute #{} for nil.
So the reduction conjoining to a set for every key/value in the list of maps supplied.
Another solution contributed by #wmacgyver on Twitter based on multimaps:
(defn add
"Adds key-value pairs the multimap."
([mm k v]
(assoc mm k (conj (get mm k #{}) v)))
([mm k v & kvs]
(apply add (add mm k v) kvs)))
(defn mm-merge
"Merges the multimaps, taking the union of values."
[& mms]
(apply (partial merge-with union) mms))
(defn mergeMatches [property-map-list]
(reduce mm-merge (map #(add {} (key (first %)) (val (first %))) property-map-list)))
This seems to work:
(defn FOO [v1 v2]
(if (set? v1)
(apply hash-set v2 v1)
(hash-set v1 v2)))
Not super pretty but it works.
(defn mergeMatches [propertyMapList]
(for [k (set (for [pp propertyMapList] (key (first pp))))]
{k (set (remove nil? (for [pp propertyMapList] (k pp))))}))

Resources