pyparsing flatten indented block - pyparsing

The following code analyse the left spaces and set all objects on one line.
The script checks if the position is equal then: update the temp array on this position and write all object with the deleminiter on the stack
If the position is greater then: update the temp array on this position and right all object with the deleminiter on the stack up to this postition
If the position is less then: update the temp array on this position, delete all higher object and write all object with the deleminiter on the stack up to this postition
Is there a better,shorter way to achieve the same?
import pyparsing as pp
class Parser(object):
def __init__(self):
self.__config_line = []
self.__config_all = []
self.__loc_last = 0
self.__position_last = 0
self.__dimension = 20
start = pp.OneOrMore(pp.Word(pp.printables))
self.__pattern = pp.Combine((start+pp.restOfLine), joinString='').setParseAction(self.test)
def test(self,s, loc, toks):
position_current = loc - self.__loc_last
self.__loc_last = loc + 1 + len(toks[0])
position_delta = position_current - self.__position_last
self.__position_last = position_current
if position_current == 0:
self.__config_line = [''] * self.__dimension
self.__config_line[position_current] = toks[0]
elif position_delta == 0:
self.__config_line[position_current] = toks[0]
elif position_delta < 0:
self.__config_line[position_current:self.__dimension] = [''] * (self.__dimension - 1 - position_current)
self.__config_line[position_current] = toks[0]
elif position_delta > 0:
self.__config_line[self.__position_last + 1:self.__dimension] = [''] * (self.__dimension - 1 - self.__position_last)
self.__config_line[position_current] = toks[0]
self.__position_last = position_current
self.__config_all.append(list(self.__config_line))
def parse(self, line):
try:
parsed = self.__pattern.searchString(line)
return self.__config_all
except pp.ParseException as x:
#print x
#return False
pass
If I run the code with the following parameter:
if __name__ == "__main__":
parser = Parser()
test="""first level config parameter 1-n
second level config parameter 1-n
thirt level config parameter 1-n
second level config parameter 1-n
thirt level config parameter 1-n
first level config parameter 1-n"""
print ("## Test String level based ##")
print ("#############################")
print(test)
print ("## Test String formal translated ##")
print ("###################################")
a = "\n".join(map("|".join, parser.parse(test)))
print (a)
print (parser.parse(test))
I got the following output:
first level config parameter 1-n|||||||||||||||||||
first level config parameter 1-n|second level config parameter 1-n||||||||||||||||||
first level config parameter 1-n|second level config parameter 1-n|thirt level config parameter 1-n|||||||||||||||||
first level config parameter 1-n|second level config parameter 1-n|||||||||||||||||
first level config parameter 1-n|second level config parameter 1-n|thirt level config parameter 1-n|||||||||||||||||
first level config parameter 1-n|||||||||||||||||||
........

Related

pyparsing: Grouping guidelines

pyparsing: The below is the code i put up which can parse a nested function call , a logical function call or a hybrid call which nests both the function and a logical function call. The dump() data adds too many unnecessary levels of braces because of grouping. Removing the Group() results in a wrong output. Is there a guideline to use Group(parsers)?
Also the Pyparsing document does'nt detail on how to walk the tree created and not much of data is available out there. Please point me to a link/guide which helps me write the tree walker for recursively parsed data for my test cases.
I will be translating this parsed data to a valid tcl code.
from pyparsing import *
from pyparsing import OneOrMore, Optional, Word, delimitedList, Suppress
# parse action -maker; # from Paul's example
def makeLRlike(numterms):
if numterms is None:
# None operator can only by binary op
initlen = 2
incr = 1
else:
initlen = {0:1,1:2,2:3,3:5}[numterms]
incr = {0:1,1:1,2:2,3:4}[numterms]
# define parse action for this number of terms,
# to convert flat list of tokens into nested list
def pa(s,l,t):
t = t[0]
if len(t) > initlen:
ret = ParseResults(t[:initlen])
i = initlen
while i < len(t):
ret = ParseResults([ret] + t[i:i+incr])
i += incr
return ParseResults([ret])
return pa
line = Forward()
fcall = Forward().setResultsName("fcall")
flogical = Forward()
lparen = Literal("(").suppress()
rparen = Literal(")").suppress()
arg = Word(alphas,alphanums+"_"+"."+"+"+"-"+"*"+"/")
args = delimitedList(arg).setResultsName("arg")
fargs = delimitedList(OneOrMore(flogical) | OneOrMore(fcall) |
OneOrMore(arg))
fname = Word(alphas,alphanums+"_")
fcall << Group(fname.setResultsName('func') + Group(lparen +
Optional(fargs) + rparen).setResultsName('fargs'))
flogic = Keyword("or") | Keyword("and") | Keyword("not")
logicalArg = delimitedList(Group(fcall.setResultsName("fcall")) |
Group(arg.setResultsName("arg")))
#logicalArg.setDebug()
flogical << Group(logicalArg.setResultsName('larg1') +
flogic.setResultsName('flogic') + logicalArg.setResultsName('larg2'))
#logical = operatorPrecedence(flogical, [(not, 1, opAssoc.RIGHT,
makeLRlike(2)),
# (and, 2, opAssoc.LEFT,
makeLRlike(2)),
# (or , 2, opAssoc.LEFT,
makeLRlike(2))])
line = flogical | fcall #change to logical if operatorPrecedence is used
# Works fine
print line.parseString("f(x, y)").dump()
print line.parseString("f(h())").dump()
print line.parseString("a and b").dump()
print line.parseString("f(a and b)").dump()
print line.parseString("f(g(x))").dump()
print line.parseString("f(a and b) or h(b not c)").dump()
print line.parseString("f(g(x), y)").dump()
print line.parseString("g(f1(x), a, b, f2(x,y, k(x,y)))").dump()
print line.parseString("f(a not c) and g(f1(x), a, b, f2(x,y,
k(x,y)))").dump()
#Does'nt work fine yet;
#try changing flogical assignment to logicalArg | flogic
#print line.parseString("a or b or c").dump()
#print line.parseString("f(a or b(x) or c)").dump()

Pyparsing: ParseAction not called

On a simple grammar I am in the bad situation that one of my ParseActions is not called.
For me this is strange as parseActions of a base symbol ("logic_oper") and a derived symbol ("cmd_line") are called correctly. Just "pa_logic_cmd" is not called. You can see this on the output which is included at the end of the code.
As there is no exception on parsing the input string, I am assuming that the grammar is (basically) correct.
import io, sys
import pyparsing as pp
def diag(msg, t):
print("%s: %s" % (msg , str(t)) )
def pa_logic_oper(t): diag('logic_oper', t)
def pa_operand(t): diag('operand', t)
def pa_ident(t): diag('ident', t)
def pa_logic_cmd(t): diag('>>>>>> logic_cmd', t)
def pa_cmd_line(t): diag('cmd_line', t)
def make_grammar():
semi = pp.Literal(';')
ident = pp.Word(pp.alphas, pp.alphanums).setParseAction(pa_ident)
operand = (ident).setParseAction(pa_operand)
op_and = pp.Keyword('A')
op_or = pp.Keyword('O')
logic_oper = (( op_and | op_or) + pp.Optional(operand))
logic_oper.setParseAction(pa_logic_oper)
logic_cmd = logic_oper + pp.Suppress(semi)
logic_cmd.setParseAction(pa_logic_cmd)
cmd_line = (logic_cmd)
cmd_line.setParseAction(pa_cmd_line)
grammar = pp.OneOrMore(cmd_line) + pp.StringEnd()
return grammar
if __name__ == "__main__":
inp_str = '''
A param1;
O param2;
A ;
'''
grammar = make_grammar()
print( "pp-version:" + pp.__version__)
parse_res = grammar.parseString( inp_str )
'''USAGE/Output: python test_4.py
pp-version:2.0.3
operand: ['param1']
logic_oper: ['A', 'param1']
cmd_line: ['A', 'param1']
operand: ['param2']
logic_oper: ['O', 'param2']
cmd_line: ['O', 'param2']
logic_oper: ['A']
cmd_line: ['A']
'''
Can anybody give me a hint on this parseAction problem?
Thanks,
The problem is here:
cmd_line = (logic_cmd)
cmd_line.setParseAction(pa_cmd_line)
The first line assigns cmd_line to be the same expression as logic_cmd. You can verify by adding this line:
print("???", cmd_line is logic_cmd)
Then the second line calls setParseAction, which overwrites the parse action of logic_cmd, so the pa_logic_cmd will never get called.
Remove the second line, since you are already testing the calling of the parse action with pa_logic_cmd. You could change to using the addParseAction method instead, but to my mind that is an invalid test (adding 2 parse actions to the same pyparsing expression object).
Or, change the definition of cmd_line to:
cmd_line = pp.Group(logic_cmd)
Now you will have wrapped logic_cmd inside another expression, and you can then independently set and test the running of parse actions on the two different expressions.

Groovy map creation, use value assigned to previous key

Is there a way to use the values assigned to the previous key in map, for eg:
def x = [
a: someList.sum(),
b: anotherList.sum(),
c: someList.sum() / anotherList.sum()
]
I want the value of 'c' to be a/b, so is there a shortcut so that I don't have to recompute the sums while computing 'c'
In order to use previously-added key/values to compute new key/values, you must be able to control the order in which the keys/values are added. I know that's obvious, but what may not be obvious is that Groovy Map declarations do not take order into account. For example, if you write this...
def x = [
a: 8,
b: 2,
c: a / b
]
..., when evaluating the expression for the value of key c, Groovy will attempt to access a variable or property named a, which will fail because the variable/property does not exist. However, you can take advantage of that property lookup and do this:
def x = [:].with {
a = 8
b = 2
c = a / b
delegate
}
You start by creating an empty Map. Then, use with(Closure) to execute putAt() and get() against the Map. The example above is the equivalent to...
def x = [:].with {
putAt('a', 8)
putAt('b', 2)
putAt('c', get('a') / get('b'))
delegate
}
Finally, return the Map itself so that it's assigned to x.

Index of string value in MiniZinc array

The question
Given a MiniZinc array of strings:
int: numStats;
set of int: Stats = 1..numStats;
array[Stats] of string: statNames;
... with data loaded from a MiniZinc data file:
numStats = 3;
statNames = ["HEALTH", "ARMOR", "MANA"];
How can one look up the index of a specific string in the array? For example, that ARMOR is located at position 2.
The context
I need to find an optimal selection of items with regard to some constraints on their stats. This information is stored in a 2D array declared as follows:
int: numItems;
set of int: Items = 1..numItems;
array[Items, Stats] of float: itemStats;
So in order to write a constraint on, say, the minimum amount of ARMOR obtained through the selected items, I need to know that ARMOR has index 2 in the inner array.
Since the data file is generated by an external program, and the number and order of stats are dynamic, I cannot hardcode the indices in the constraints.
One solution (that won't work in my case)
The MiniZinc tutorial uses an interesting trick to achieve something similar:
set of int: Colors = 1..3;
int: red = 1;
int: yellow = 2;
int: blue = 3;
array[Colors] of string: name = ["red", "yellow", "blue"];
var Colors: x;
constraint x != red;
output [ name[fix(x)] ];
Unfortunately, as variable declarations are not allowed in MiniZinc data files, this trick won't work in my case.
You can write your own custom function to get the index of a string within a string array:
function int: getIndexOfString(string: str,
array[int] of string: string_array) =
sum( [ if str = string_array[i]
then i
else 0 endif
| i in index_set(string_array) ]
);
In this function I create an array of integers where the integer at position i either equals the index of str if string_array[i]=str and 0 otherwise. For instance, for your sample string array ["HEALTH", "ARMOR", "MANA"] and str ARMOR the resulting int array will be [0,2,0].
This is why I can simply sum over the int array to get the index of the string. If the string does not occur, the return value is 0, which is fine since indices in MiniZinc start with 1 by default.
Here is how you can call the function above for your first example:
int: numStats;
set of int: Stats = 1..numStats;
array[Stats] of string: statNames;
numStats = 3;
statNames = ["HEALTH", "ARMOR", "MANA"];
var int: indexOfArmor;
constraint
indexOfArmor = getIndexOfString("ARMOR",statNames);
solve satisfy;
Note however that the function above is limited and has some flaws. First, if you have multiple occurrences of the string in the array, then you will receive an invalid index (the sum of all indices where str occurred). Also, if you have your own index set for your string array (say (2..6)), then you will need to adapt the function.
Another, cleaner option is to write a function that uses a recursive helper function:
% main function
function int: index_of(string: elem, array[int] of string: elements) =
let {
int: index = length(elements);
} in % calls the helper function with the last index
get_index(elem, elements, index)
;
% recursive helper function
function int: get_index(string: elem, array[int] of string: elements, int: index) =
if index == 0
then -1 % the element was not found (base case of recursion)
elseif elements[index] == elem
then index % the element was found
else
get_index(elem, elements, index - 1) % continue searching
endif
;
The helper function iterates recursively over the array, starting from the last element, and when it finds the element, it returns the index. If the element was not found in the array, then -1 is returned. Alternatively, you can also throw an assertion following the suggestion of Patrick Trentin by replacing then -1 with then assert(false, "unknown element: " + elem).
An example of calling this function:
set of int: Customers = 1..5;
array[Customers] of string: ids = ["a-1", "a-2", "a-3", "a-4", "a-5"];
var int: index = index_of("a-3", ids);
var int: unknown_index = index_of("x-3", ids);
where index will be assigned 3 and unknown_index will be -1.
An alternative approach to that presented by Andrea Rendl-Pitrey, is the following one:
array[int] of string: statNames = array1d(10..12, ["HEALTH", "ARMOR", "MANA"]);
var int: indexOfArmor =
sum([i | i in index_set(statNames) where statNames[i] = "ARMOR"]);
solve satisfy;
output [
"indexOfArmor=", show(indexOfArmor), "\n",
];
which outputs:
~$ mzn2fzn example.mzn ; flatzinc example.fzn
indexOfArmor = 11;
----------
note: that var can be dropped from the declaration of indexOfArmor, since the index can be statically computed. I kept it here only for output purposes.
A better solution is to declare a new predicate:
predicate index_of_str_in_array(var int: idx,
string: str,
array[int] of string: arr) =
assert(
not exists(i in index_set(arr), j in index_set(arr))
(i != j /\ arr[i] = str /\ arr[j] = str),
"input string occurs at multiple locations",
assert(
exists(i in index_set(arr))
(arr[i] = str),
"input string does not occur in the input array",
exists(i in index_set(arr))
(arr[i] = str /\ i = idx)
));
which enforces both of the following conditions:
str occurs at least once in arr
str does not occur multiple times in arr
e.g
predicate index_of_str_in_array(var int: idx,
string: str,
array[int] of string: arr) =
...
array[10..13] of string: statNames =
array1d(10..13, ["HEALTH", "ARMOR", "MANA", "ATTACK"]);
var int: indexOfArmor;
constraint index_of_str_in_array(indexOfArmor, "ARMOR", statNames);
solve satisfy;
output [
"indexOfArmor=", show(indexOfArmor), "\n",
];
outputs
~$ mzn2fzn example.mzn ; flatzinc example.fzn
indexOfArmor = 11;
----------
If one changes statNames in the following way
array[10..13] of string: statNames =
array1d(10..13, ["HEALTH", "ARMOR", "MANA", "ARMOR"]);
then mzn2fzn detects an assertion violation:
~$ mzn2fzn example.mzn ; flatzinc example.fzn
MiniZinc: evaluation error:
example.mzn:24:
in call 'index_of_str_in_array'
example.mzn:4:
in call 'assert'
Assertion failed: input string occurs at multiple locations
flatzinc:
example.fzn: cannot open input file: No such file
A similar result would be obtained by searching for the index of a string that does not occur in the array. This condition can of course be removed if not necessary.
DISCLAIMER: older versions of mzn2fzn don't seem to check that the declared index-set of an array of strings variable matches the index-set of an array of strings literal that is being assigned to it. This rule is enforced on newer versions, as it is also valid for other data types.
According to this other post on Stackoverflow there is no way of converting strings to integers in MiniZinc, only the other way around. You need to first pre process your data in some other language and turn it into integers. You can however turn those integers into string once you are done in MiniZinc.
You can however load MiniZinc files instead of data files if you would like. Use the include syntax to include any .mzn file.

How can I use functools.partial on multiple methods on an object, and freeze parameters out of order?

I find functools.partial to be extremely useful, but I would like to be able to freeze arguments out of order (the argument you want to freeze is not always the first one) and I'd like to be able to apply it to several methods on a class at once, to make a proxy object that has the same methods as the underlying object except with some of its methods parameters being frozen (think of it as generalizing partial to apply to classes). And I'd prefer to do this without editing the original object, just like partial doesn't change its original function.
I've managed to scrap together a version of functools.partial called 'bind' that lets me specify parameters out of order by passing them by keyword argument. That part works:
>>> def foo(x, y):
... print x, y
...
>>> bar = bind(foo, y=3)
>>> bar(2)
2 3
But my proxy class does not work, and I'm not sure why:
>>> class Foo(object):
... def bar(self, x, y):
... print x, y
...
>>> a = Foo()
>>> b = PureProxy(a, bar=bind(Foo.bar, y=3))
>>> b.bar(2)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: bar() takes exactly 3 arguments (2 given)
I'm probably doing this all sorts of wrong because I'm just going by what I've pieced together from random documentation, blogs, and running dir() on all the pieces. Suggestions both on how to make this work and better ways to implement it would be appreciated ;) One detail I'm unsure about is how this should all interact with descriptors. Code follows.
from types import MethodType
class PureProxy(object):
def __init__(self, underlying, **substitutions):
self.underlying = underlying
for name in substitutions:
subst_attr = substitutions[name]
if hasattr(subst_attr, "underlying"):
setattr(self, name, MethodType(subst_attr, self, PureProxy))
def __getattribute__(self, name):
return getattr(object.__getattribute__(self, "underlying"), name)
def bind(f, *args, **kwargs):
""" Lets you freeze arguments of a function be certain values. Unlike
functools.partial, you can freeze arguments by name, which has the bonus
of letting you freeze them out of order. args will be treated just like
partial, but kwargs will properly take into account if you are specifying
a regular argument by name. """
argspec = inspect.getargspec(f)
argdict = copy(kwargs)
if hasattr(f, "im_func"):
f = f.im_func
args_idx = 0
for arg in argspec.args:
if args_idx >= len(args):
break
argdict[arg] = args[args_idx]
args_idx += 1
num_plugged = args_idx
def new_func(*inner_args, **inner_kwargs):
args_idx = 0
for arg in argspec.args[num_plugged:]:
if arg in argdict:
continue
if args_idx >= len(inner_args):
# We can't raise an error here because some remaining arguments
# may have been passed in by keyword.
break
argdict[arg] = inner_args[args_idx]
args_idx += 1
f(**dict(argdict, **inner_kwargs))
new_func.underlying = f
return new_func
Update: In case anyone can benefit, here's the final implementation I went with:
from types import MethodType
class PureProxy(object):
""" Intended usage:
>>> class Foo(object):
... def bar(self, x, y):
... print x, y
...
>>> a = Foo()
>>> b = PureProxy(a, bar=FreezeArgs(y=3))
>>> b.bar(1)
1 3
"""
def __init__(self, underlying, **substitutions):
self.underlying = underlying
for name in substitutions:
subst_attr = substitutions[name]
if isinstance(subst_attr, FreezeArgs):
underlying_func = getattr(underlying, name)
new_method_func = bind(underlying_func, *subst_attr.args, **subst_attr.kwargs)
setattr(self, name, MethodType(new_method_func, self, PureProxy))
def __getattr__(self, name):
return getattr(self.underlying, name)
class FreezeArgs(object):
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def bind(f, *args, **kwargs):
""" Lets you freeze arguments of a function be certain values. Unlike
functools.partial, you can freeze arguments by name, which has the bonus
of letting you freeze them out of order. args will be treated just like
partial, but kwargs will properly take into account if you are specifying
a regular argument by name. """
argspec = inspect.getargspec(f)
argdict = copy(kwargs)
if hasattr(f, "im_func"):
f = f.im_func
args_idx = 0
for arg in argspec.args:
if args_idx >= len(args):
break
argdict[arg] = args[args_idx]
args_idx += 1
num_plugged = args_idx
def new_func(*inner_args, **inner_kwargs):
args_idx = 0
for arg in argspec.args[num_plugged:]:
if arg in argdict:
continue
if args_idx >= len(inner_args):
# We can't raise an error here because some remaining arguments
# may have been passed in by keyword.
break
argdict[arg] = inner_args[args_idx]
args_idx += 1
f(**dict(argdict, **inner_kwargs))
return new_func
You're "binding too deep": change def __getattribute__(self, name): to def __getattr__(self, name): in class PureProxy. __getattribute__ intercepts every attribute access and so bypasses everything that you've set with setattr(self, name, ... making those setattr bereft of any effect, which obviously's not what you want; __getattr__ is called only for access to attributes not otherwise defined so those setattr calls become "operative" & useful.
In the body of that override, you can and should also change object.__getattribute__(self, "underlying") to self.underlying (since you're not overriding __getattribute__ any more). There are other changes I'd suggest (enumerate in lieu of the low-level logic you're using for counters, etc) but they wouldn't change the semantics.
With the change I suggest, your sample code works (you'll have to keep testing with more subtle cases of course). BTW, the way I debugged this was simply to stick in print statements in the appropriate places (a jurassic=era approach but still my favorite;-).

Resources