ragel parser is not greedy? - http

I'm trying to write an HTTP parser in C++ using ragel, but finding that the parser generated is not greedy.
The first step is to parse a URL, and here is the ragel grammar translated from RFC-3986:
#include <string>
#include <iostream>
#include <string_view>
%%{
machine http_uri_parser;
action mark_scheme {
std::printf("mark scheme, p:%s\n", p);
this->scheme.data = p;
}
action store_scheme {
std::printf("store scheme, p:%s\n", p);
this->scheme.len = p - this->scheme.data;
}
action mark_authority {
std::printf("mark authority, p:%s\n", p);
this->authority.data = p;
}
action store_authority {
std::printf("store authority, p:%s\n", p);
this->authority.len = p - this->authority.data;
}
action mark_userinfo {
std::printf("mark userinfo, p:%s\n", p);
this->userinfo.data = p;
}
action store_userinfo {
std::printf("store userinfo, p:%s\n", p);
this->userinfo.len = p - this->userinfo.data;
}
action mark_host {
std::printf("mark host, p:%s\n", p);
this->host.data = p;
}
action store_host {
std::printf("store host, p:%s\n", p);
this->host.len = p - this->host.data;
}
action mark_port {
std::printf("mark port, p:%s\n", p);
this->port.data = p;
}
action store_port {
std::printf("store port, p:%s\n", p);
this->port.len = p - this->port.data;
}
action mark_path {
std::printf("mark path, p:%s\n", p);
this->path.data = p;
}
action store_path {
std::printf("store path, p:%s\n", p);
this->path.len = p - this->path.data;
}
action mark_query {
std::printf("mark query, p:%s\n", p);
this->query.data = p;
}
action store_query {
std::printf("store query, p:%s\n", p);
this->query.len = p - this->query.data;
}
action mark_fragment {
std::printf("mark fragment, p:%s\n", p);
this->fragment.data = p;
}
action store_fragment {
std::printf("store fragment, p:%s\n", p);
this->fragment.len = p - this->fragment.data;
}
action done {
std::printf("parser done, p:%s\n", p);
this->_done = 1;
fbreak;
}
###############################################################################
# Characters
###############################################################################
crlf = '\r\n';
gen_delims = ( ':' | '/' | '?' | '#' | '[' | ']' | '#' );
sub_delims = ( '!' | '$' | '&' | "'" | '(' | ')'
| '*' | '+' | ',' | ';' | '=' );
reserved = ( gen_delims | sub_delims );
unreserved = ( alpha | digit | '-' | '.' | '_' | '~' );
pct_encoded = ( '%' xdigit xdigit );
###############################################################################
# Scheme
###############################################################################
scheme = ( alpha ( alpha | digit | '+' | '-' | '.' )* )
>mark_scheme %store_scheme;
###############################################################################
# Authority
###############################################################################
dec_octet = ( ( digit ) # 0-9
| ( ( '1'..'9' ) digit ) # 10-99
| ( '1' digit{2} ) # 100-199
| ( '2' ( '0'..'4' ) digit ) # 200-249
| ( '25' ( '0'..'5' ) ) # 250-255
);
IPv4_address = ( dec_octet '.' dec_octet '.' dec_octet '.' dec_octet );
h16 = ( xdigit{1,4} );
# 16 bits of address represented in hexadecimal
ls32 = ( ( h16 ':' h16 ) | IPv4_address );
# least-significant 32 bits of address
IPv6_address = ( ( ( h16 ':' ){6} ls32 )
| ( '::' ( h16 ':' ){5} ls32 )
| ( ( h16 )? '::' ( h16 ':' ){4} ls32 )
| ( ( ( h16 ':' ){,1} h16 )? '::' ( h16 ':' ){3} ls32 )
| ( ( ( h16 ':' ){,2} h16 )? '::' ( h16 ':' ){2} ls32 )
| ( ( ( h16 ':' ){,3} h16 )? '::' ( h16 ':' ){1} ls32 )
| ( ( ( h16 ':' ){,4} h16 )? '::' ls32 )
| ( ( ( h16 ':' ){,5} h16 )? '::' h16 )
| ( ( ( h16 ':' ){,6} h16 )? '::' )
);
IPv_future = ( 'v' ( ( xdigit+ ) '.' ) ( unreserved | sub_delims | ':' )+ );
IP_literal = ( '[' ( IPv6_address | IPv_future ) ']' );
reg_name = ( ( unreserved | pct_encoded | sub_delims )* )
> { std::printf("mark reg_name, p:%s\n", p); }
% { std::printf("store reg_name, p:%s\n", p); };
port = ( digit* )
>mark_port %store_port;
host = ( IP_literal | IPv4_address | reg_name )
>mark_host %store_host;
userinfo = ( ( unreserved | pct_encoded | sub_delims | ':' )* )
>mark_userinfo %store_userinfo;
authority = ( ( userinfo '#' )? host ( ':' port )? )
>mark_authority %store_authority;
###############################################################################
# Path
###############################################################################
pchar = ( unreserved | pct_encoded | sub_delims | ':' | '#' );
segment = ( pchar* );
segment_nz = ( pchar+ );
# non-zero-length
segment_nz_nc = ( ( unreserved | pct_encoded | sub_delims | '#' )+ );
# non-zero-length segment without any colon ':'
path_abempty = ( ( '/' segment )* )
>mark_path %store_path;
path_absolute = ( '/' ( segment_nz ( '/' segment )* )? )
>mark_path %store_path;
path_noscheme = ( segment_nz_nc ( '/' segment )* )
>mark_path %store_path;
path_rootless = ( segment_nz ( '/' segment )* )
>mark_path %store_path;
path_empty = ( zlen )
>mark_path %store_path;
path = ( path_abempty # begins with '/' or is empty
| path_absolute # begins with '/' but not '//'
| path_noscheme # begins with a non-colon segment
| path_rootless # begins with a segment
| path_empty # zero characters
);
###############################################################################
# Query
###############################################################################
query = ( ( pchar | '/' | '?' )* )
>mark_query %store_query;
###############################################################################
# Fragment
###############################################################################
fragment = ( ( pchar | '/' | '?' )* )
>mark_fragment %store_fragment;
###############################################################################
# URI
###############################################################################
hier_part = ( ( '//' authority path_abempty )
| ( path_absolute )
| ( path_rootless )
| ( path_empty )
);
relative_part = ( ( '//' authority path_abempty )
| ( path_absolute )
| ( path_noscheme )
| ( path_empty )
);
absolute_URI = ( scheme ':' hier_part ( '?' query )? );
relative_ref = ( relative_part ( '?' query )? ( '#' fragment )? );
URI = ( scheme ':' hier_part ( '?' query )? ( '#' fragment )? );
URI_reference = ( URI | relative_ref );
###############################################################################
# main rule
###############################################################################
main := URI #done;
}%%
%% write data;
struct slice {
size_t len{};
const char* data{};
};
struct http_parser {
http_parser() = default;
~http_parser() = default;
void reset();
void execute();
int state = 0;
std::string uri;
/* parsed result */
slice scheme{};
slice authority{};
slice userinfo{};
slice host{};
slice port{};
slice path{};
slice query{};
slice fragment{};
/* parse status */
bool _eof{};
bool _done{};
bool _failed{};
};
void http_parser::reset() {
int cs = 0;
%% write init;
this->state = cs;
this->_eof = false;
this->_done = false;
this->_failed = false;
this->scheme = slice{};
this->authority = slice{};
this->userinfo = slice{};
this->host = slice{};
this->port = slice{};
this->path = slice{};
this->query = slice{};
this->fragment = slice{};
}
void http_parser::execute() {
const char* p = &this->uri.front();
const char* pe = &this->uri.back() + 1;
const char* eof = pe;
int cs = this->state;
%% write exec;
if (!this->_failed) {
this->state = cs;
}
std::printf(
"eof:%d, done:%d, failed:%d, state:%d, p:%p, pe:%p, diff:%ld, rest:%s\n",
this->_eof, this->_done, this->_failed, this->state, p, pe, pe - p, p);
#define print_parser_component(fld) \
if (this->fld.len) { \
std::printf(#fld ": %.*s\n", (int)this->fld.len, this->fld.data); \
}
print_parser_component(scheme);
print_parser_component(authority);
print_parser_component(userinfo);
print_parser_component(host);
print_parser_component(port);
print_parser_component(path);
print_parser_component(query);
print_parser_component(fragment);
#undef print_parser_component
}
Here I set the main rule to URL rather than URI_reference in order to test absolute URL first.
And here is the test code:
int main(int argc, char** argv) {
auto parser = std::make_unique<http_parser>();
parser->uri =
"https://chenjianyong.com/blog/2022/01/"
"seastar_fpc_1.html?hello=world#preface";
parser->reset();
parser->execute();
return 0;
}
Run the program and it prints:
mark scheme, p:https://chenjianyong.com/blog/2022/01/seastar_fpc_1.html?hello=world#preface
store scheme, p:://chenjianyong.com/blog/2022/01/seastar_fpc_1.html?hello=world#preface
parser done, p:://chenjianyong.com/blog/2022/01/seastar_fpc_1.html?hello=world#preface
eof:0, done:1, failed:0, state:171, p:0x6000016f4006, pe:0x6000016f404c, diff:70, rest://chenjianyong.com/blog/2022/01/seastar_fpc_1.html?hello=world#preface
scheme: https
It seems that the parser stops after parsing the scheme https://, its so weird! Why doesn't it greedily consume to the last byte?
After changing the main rule to main := (crlf #done);, append a crlf to the test URL, regenerate the parser and this time the parser can consume to the end and the print shows that all URL components are parsed successfully:
mark scheme, p:https://chenjianyong.com/blog/2022/01/seastar_fpc_1.html?hello=world#preface
store scheme, p:://chenjianyong.com/blog/2022/01/seastar_fpc_1.html?hello=world#preface
mark path, p://chenjianyong.com/blog/2022/01/seastar_fpc_1.html?hello=world#preface
mark authority, p:chenjianyong.com/blog/2022/01/seastar_fpc_1.html?hello=world#preface
mark userinfo, p:chenjianyong.com/blog/2022/01/seastar_fpc_1.html?hello=world#preface
mark host, p:chenjianyong.com/blog/2022/01/seastar_fpc_1.html?hello=world#preface
mark reg_name, p:chenjianyong.com/blog/2022/01/seastar_fpc_1.html?hello=world#preface
store reg_name, p:/blog/2022/01/seastar_fpc_1.html?hello=world#preface
store host, p:/blog/2022/01/seastar_fpc_1.html?hello=world#preface
store authority, p:/blog/2022/01/seastar_fpc_1.html?hello=world#preface
mark path, p:/blog/2022/01/seastar_fpc_1.html?hello=world#preface
store path, p:?hello=world#preface
mark query, p:hello=world#preface
store query, p:#preface
mark fragment, p:preface
store fragment, p:
parser done, p:
eof:0, done:1, failed:0, state:188, p:0x60000140c04e, pe:0x60000140c04e, diff:0, rest:
scheme: https
authority: chenjianyong.com
host: chenjianyong.com
path: /blog/2022/01/seastar_fpc_1.html
query: hello=world
fragment: preface
So, why isn't my ragel parser greedy?

Why doesn't it greedily consume to the last byte?
As you have already noticed, your done action is executed right after the colon (with fbreak inside) and we can confirm it by rendering your (quite big one!) FSM (ragel -o f.dot -Vp source.c++ && dot -Tpng -o f.png f.dot):
It's executed because main specification says #done which, according to the documentation
embeds an action into any transitions that move the machine
into a final state
And as you can see, 171 is one of the possible final states of your machine. If you're to change main specification to URI %done it's gonna be a bit different:
done is no longer executed, it's going to be only after
the transitions that go out of a machine via a final state
Which I think is a bit more appropriate for your case.
Now you probably wonder why 171 is one of the final states and that's because hier_part among other things can be path_empty which is a zlen, so it's OK for machine to finish in this state (for scheme: input) -> it's one of the final states -> #done is executed when transitioning to this state.

Related

jq: getting default when an item array doesn0t exist

I'm getting an issue with those two json objects:
{
"generalPractitioner":[
{
"extension":[
{
"url":"http://catsalut.gencat.cat/fhir/StructureDefinition/patient-tipus-relacio",
"valueCodeableConcept":{
"coding":[
{
"system":"http://catsalut.gencat.cat/fhir/StructureDefinition/tipus-rel-organitzacio",
"code":"UPAPASSIG"
}
]
}
}
],
"reference":"Organization/132336"
},
{
"extension":[
{
"url":"http://catsalut.gencat.cat/fhir/StructureDefinition/patient-tipus-relacio",
"valueCodeableConcept":{
"coding":[
{
"system":"http://catsalut.gencat.cat/fhir/StructureDefinition/tipus-rel-organitzacio",
"code":"UPAPTERRI"
}
]
}
}
],
"reference":"Organization/132346"
}
]
}
{
"generalPractitioner":[
{
"extension":[
{
"url":"http://catsalut.gencat.cat/fhir/StructureDefinition/patient-tipus-relacio",
"valueCodeableConcept":{
"coding":[
{
"system":"http://catsalut.gencat.cat/fhir/StructureDefinition/tipus-rel-organitzacio",
"code":"UPAPASSIG"
}
]
}
}
],
"reference":"Organization/132336"
}
]
}
I'm trying:
jq --raw-output '[ ( .generalPractitioner[0] | ( .extension[] | .url, ( .valueCodeableConcept | .coding[] | .system, .code ) ), .reference ), ( .generalPractitioner[1] | ( .extension[] | .url, ( .valueCodeableConcept | .coding[] | .system, .code ) ), .reference ) ] | #csv
Tyding up, filter expression is:
[
( .generalPractitioner[0] |
( .extension[] |
.url,
( .valueCodeableConcept |
.coding[] |
.system,
.code
)
),
.reference
),
( .generalPractitioner[1] |
( .extension[] |
.url,
( .valueCodeableConcept |
.coding[] |
.system,
.code
)
),
.reference
),
] | #csv
I'm getting this message:
jq: error (at :55): Cannot iterate over null (null)
Second generalPractitioner object only contains 1 element.
I know I'm trying to get access to an element doesn't exist.
I don't quite figure out how to workaround it using a default null object.
What I'm trying to get is this:
.generalPractitioner[0].extension[0].url // null, .generalPractitioner[0].extension[0].valueCodeableConcept.coding[0].system // null, .generalPractitioner[0].extension[0].valueCodeableConcept.coding[0].code // null, .generalPractitioner[0].reference // null,
.generalPractitioner[1].extension[0].url // null, .generalPractitioner[0].extension[0].valueCodeableConcept.coding[0].system // null, .generalPractitioner[1].extension[0].valueCodeableConcept.coding[0].code // null, .generalPractitioner[1].reference // null,
.generalPractitioner[2].extension[0].url // null, .generalPractitioner[0].extension[0].valueCodeableConcept.coding[0].system // null, .generalPractitioner[2].extension[0].valueCodeableConcept.coding[0].code // null, .generalPractitioner[2].reference // null,
.generalPractitioner[3].extension[0].url // null, .generalPractitioner[0].extension[0].valueCodeableConcept.coding[0].system // null, .generalPractitioner[3].extension[0].valueCodeableConcept.coding[0].code // null, .generalPractitioner[3].reference // null
Any ideas?
As you want to generate the columns by iterating over an array (.generalPractitioner[]) while interweaving the output with a constant from a specific iteration step (.generalPractitioner[0]) in every fourth column starting with the second, you will have to store that value in a variable ($system) to access it from within the iteration.
jq -r '
[
.generalPractitioner
| .[0].extension[0].valueCodeableConcept.coding[0].system as $system
| .[range(4)]
| (.extension[0] | .url, $system, .valueCodeableConcept.coding[0].code),
.reference
]
| #csv
'
Demo

extraneous input '-' expecting {DECIMAL, '+', '-'}

Using the latest Antlr runtime 4.6.6,
I'm trying to handle signed numbers and can't figure what I'm doing wrong. scavenging samples from the TSQL grammar, I use the following to parse signed numbers:
time_expression
: interval=(YEARS|MONTHS|DAYS|HOURS|MINUTES) '(' signed_decimal ')'
;
signed_decimal
:sign? DECIMAL
;
It works fine for none signed numbers, but when I try to parse: Test > MONTHS(-537) I get this error: extraneous input '-' expecting {DECIMAL, '+', '-'}
This is my lexer grammar:
lexer grammar QLexer;
#modifier{internal}
FROM: 'FROM';
IN: 'IN';
NULL: 'NULL';
OR: 'OR';
IS: 'IS';
AND: 'AND';
NOT: 'NOT';
PARENT: 'PARENT';
YEARS: 'YEARS';
MONTHS: 'MONTHS';
DAYS: 'DAYS';
HOURS: 'HOURS';
MINUTES: 'MINUTES';
HASTAG: 'HASTAG';
PARAGRAPH: (NEWLINE NEWLINE);
TAB: [ \t\r\n]+ -> skip;
// https://learn.microsoft.com/en-us/sql/t-sql/language-elements/slash-star-comment-transact-sql
COMMENT: '/*' (COMMENT | .)*? '*/' -> channel(HIDDEN);
LINE_COMMENT: '--' ~[\r\n]* -> channel(HIDDEN);
// TODO: ID can be not only Latin.
EMBEDED_SQL: '{' (.)*? '}';
DOUBLE_QUOTE_ID: '"' ~'"'+ '"';
SINGLE_QUOTE: '\'';
SQUARE_BRACKET_ID: '[' ~']'+ ']';
LOCAL_ID: '#' ID;
TEST_ID: '#' ID;
DECIMAL: DEC_DIGIT+;
ID: ([A-Za-z0-9_])+;
STRING: 'N'? '\'' (~'\'' | '\'\'')* '\'';
BINARY: '0' 'X' HEX_DIGIT*;
EQUAL: '=';
GREATER: '>';
LESS: '<';
EXCLAMATION: '!';
PLUS_ASSIGN: '+=';
MINUS_ASSIGN: '-=';
MULT_ASSIGN: '*=';
DIV_ASSIGN: '/=';
MOD_ASSIGN: '%=';
AND_ASSIGN: '&=';
XOR_ASSIGN: '^=';
OR_ASSIGN: '|=';
ARITHMETIC: '*' | '/'|'+' | '-';
DOUBLE_BAR: '||';
DOT: '.';
UNDERLINE: '_';
AT: '#';
SHARP: '#';
DOLLAR: '$';
LR_BRACKET: '(';
RR_BRACKET: ')';
COMMA: ',';
SEMI: ';';
COLON: ':';
STAR: '*';
DIVIDE: '/';
MODULE: '%';
PLUS: '+';
MINUS: '-';
BIT_NOT: '~';
BIT_OR: '|';
BIT_AND: '&';
BIT_XOR: '^';
NUM : '[0-9]+ ('.' [0-9]+)?';
SIGNED_NUMBER: '^-?[1-9][0-9]{0,2}$';
UNSIGNED_INT : ('0' | '1'..'9' '0'..'9'*);
HEX_DIGIT: [0-9A-F];
DEC_DIGIT: [0-9];
//fragment Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
fragment LETTER: [A-Z_];
//fragment DEC_DOT_DEC: (DEC_DIGIT+ '.' DEC_DIGIT+ | DEC_DIGIT+ '.' | '.' DEC_DIGIT+);
//fragment HEX_DIGIT: [0-9A-F];
//fragment DEC_DIGIT: [0-9];
fragment NEWLINE: '\r'? '\n';
Your rules:
NUM : '[0-9]+ ('.' [0-9]+)?';
SIGNED_NUMBER : '^-?[1-9][0-9]{0,2}$';
match literal strings. You probably mean something like this:
NUM : [0-9]+ ('.' [0-9]+)?;
SIGNED_NUMBER : '-'? [1-9] ([0-9] [0-9]?)?;
And you'll probably want to let signed_decimal match SIGNED_NUMBER too:
signed_decimal
: sign? DECIMAL
| SIGNED_NUMBER
;

Is there an easy way to convert a number to a word with wordpress via shortcode?

Is there a function that will express any given number in words with shortcode ? i have tested this function but i think i have a problem with the shortecode.
My function :
function number_to_word( $num = '' ){
$num = ( string ) ( ( int ) $num );
if( ( int ) ( $num ) && ctype_digit( $num ) )
{
$words = array( );
$num = str_replace( array( ',' , ' ' ) , '' , trim( $num ) );
$list1 = array('','one','two','three','four','five','six','seven',
'eight','nine','ten','eleven','twelve','thirteen','fourteen',
'fifteen','sixteen','seventeen','eighteen','nineteen');
$list2 = array('','ten','twenty','thirty','forty','fifty','sixty',
'seventy','eighty','ninety','hundred');
$list3 = array('','thousand','million','billion','trillion',
'quadrillion','quintillion','sextillion','septillion',
'octillion','nonillion','decillion','undecillion',
'duodecillion','tredecillion','quattuordecillion',
'quindecillion','sexdecillion','septendecillion',
'octodecillion','novemdecillion','vigintillion');
$num_length = strlen( $num );
$levels = ( int ) ( ( $num_length + 2 ) / 3 );
$max_length = $levels * 3;
$num = substr( '00'.$num , -$max_length );
$num_levels = str_split( $num , 3 );
foreach( $num_levels as $num_part )
{
$levels--;
$hundreds = ( int ) ( $num_part / 100 );
$hundreds = ( $hundreds ? ' ' . $list1[$hundreds] . ' Hundred' . ( $hundreds == 1 ? '' : 's' ) . ' ' : '' );
$tens = ( int ) ( $num_part % 100 );
$singles = '';
if( $tens < 20 )
{
$tens = ( $tens ? ' ' . $list1[$tens] . ' ' : '' );
}
else
{
$tens = ( int ) ( $tens / 10 );
$tens = ' ' . $list2[$tens] . ' ';
$singles = ( int ) ( $num_part % 10 );
$singles = ' ' . $list1[$singles] . ' ';
}
$words[] = $hundreds . $tens . $singles . ( ( $levels && ( int ) ( $num_part ) ) ? ' ' . $list3[$levels] . ' ' : '' );
}
$commas = count( $words );
if( $commas > 1 )
{
$commas = $commas - 1;
}
$words = implode( ', ' , $words );
//Some Finishing Touch
//Replacing multiples of spaces with one space
$words = trim( str_replace( ' ,' , ',' , trim_all( ucwords( $words ) ) ) , ', ' );
if( $commas )
{
$words = str_replace_last( ',' , ' and' , $words );
}
return $words;
}
else if( ! ( ( int ) $num ) )
{
return 'Zero';
}
return '';}
add_shortcode( 'convertNumber', 'number_to_word' );
For example:
If a shortcode is [convertNumber num='1432'] , then this function will return "One thousand four hundred and thirty two" in my wordpress Page .
Use the Number Formatter Class class which facilitate it.
$number_formatter = new NumberFormatter("en", NumberFormatter::SPELLOUT);
echo $number_formatter->format(1432);
Exmaple to use it in WordPress
function number_to_word($atts, $content){
include('class.numberformatter.php');
// let's fetch all of the arguments of the shortcode
$atts = shortcode_atts(
array(
'number' => '0',
), $atts );
$number = $atts['number'];
$number_formatter = new NumberFormatter("en", NumberFormatter::SPELLOUT);
$converted = $number_formatter->format($number);
return $converted;
}
add_shortcode( 'convertNumber', 'number_to_word' );
You can call it any where in WordPress by [convertNumber number="2222"]
Thanks

SQL Convert Seconds into Day:Hour:Min:Sec

Topic says it all. I'm trying to do some magic, via a function, that turns a second integer into a string "DD:HH:MM:SS".
Snip
input: 278543
output: "3D 5H 22M 23S "
What I'd like to do, more gracefully if possible, is pad the numbers (So that 5M shows as 05M) and right align them so that "3D 5H 22M 23S " is " 3D 5H 22M 23S" instead.
edit: Latest cut that seems to work. Would love to have it prettier, but this definitely works as far as I can tell:
CREATE FUNCTION DHMS(secondsElapsed INT)
RETURNS Char(20)
LANGUAGE SQL
NO EXTERNAL ACTION
DETERMINISTIC
BEGIN
DECLARE Dy Integer;
DECLARE Hr Integer;
DECLARE Mn Integer;
DECLARE Sc Integer;
SET Dy = Cast( secondsElapsed / 86400 as Int);
SET Hr = Cast(MOD( secondsElapsed, 86400 ) / 3600 as Int);
SET Mn = Cast(MOD( secondsElapsed, 3600 ) / 60 as Int);
SET Sc = Cast(MOD( secondsElapsed, 60 ) as Int);
RETURN REPEAT(' ',6-LENGTH(RTRIM(CAST(Dy AS CHAR(6))))) || Dy || 'D '
|| REPEAT('0',2-LENGTH(RTRIM(CAST(Hr AS CHAR(6))))) || Hr || 'H '
|| REPEAT('0',2-LENGTH(RTRIM(CAST(Mn AS CHAR(6))))) || Mn || 'M '
|| REPEAT('0',2-LENGTH(RTRIM(CAST(Sc AS CHAR(6))))) || Sc || 'S';
END
You were on the right track using LPAD(), since it can pad with zero or any other string. CHAR(15) is not enough to format the output the way you want and still allow five positions for the number of days, which is the length you specified in your code.
CREATE OR REPLACE FUNCTION DHMS(secondsElapsed INT)
RETURNS Char(18)
LANGUAGE SQL
NO EXTERNAL ACTION
DETERMINISTIC
RETURN LPAD( secondsElapsed / 86400 , 5 ) || 'D '
|| LPAD( MOD( secondsElapsed, 86400 ) / 3600, 2, '0') || 'H '
|| LPAD( MOD( secondsElapsed, 3600 ) / 60, 2, '0' ) || 'M '
|| LPAD( MOD( secondsElapsed, 60 ), 2, '0' ) || 'S'
;
declare #seconds int
set #seconds = 900000000
select cast(#seconds/86400 as varchar(50))+':'+Convert(VarChar, DateAdd(S, #seconds, 0), 108)

Antlr left recursive problem

I have a left recursive issue in my Antlr grammar. While I think I understand why there is a problem I am unable to think of a solution. The issue is with the last line for my datatype rule. I have included the entire grammar for you to see:
grammar Test;
options {output=AST;ASTLabelType=CommonTree;}
tokens {FUNCTION; ATTRIBUTES; CHILDREN; COMPOSITE;}
program : function ;
function : ID (OPEN_BRACKET (attribute (COMMA? attribute)*)? CLOSE_BRACKET)? (OPEN_BRACE function* CLOSE_BRACE)? SEMICOLON? -> ^(FUNCTION ID ^(ATTRIBUTES attribute*) ^(CHILDREN function*)) ;
attribute : ID (COLON | EQUALS) datatype -> ^(ID datatype);
datatype : ID -> ^(STRING["id"] ID)
| NUMBER -> ^(STRING["number"] NUMBER)
| STRING -> ^(STRING["string"] STRING)
| BOOLEAN -> ^(STRING["boolean"] BOOLEAN)
| array -> ^(STRING["array"] array)
| lookup -> ^(STRING["lookup"] lookup)
| datatype PLUS datatype -> ^(COMPOSITE datatype datatype) ;
array : OPEN_BOX (datatype (COMMA datatype)*)? CLOSE_BOX -> datatype* ;
lookup : OPEN_BRACE (ID (PERIOD ID)*) CLOSE_BRACE -> ID* ;
NUMBER
: ('+' | '-')? (INTEGER | FLOAT)
;
STRING
: '"' ( ESC_SEQ | ~('\\'|'"') )* '"'
;
BOOLEAN
: 'true' | 'TRUE' | 'false' | 'FALSE'
;
ID : (LETTER|'_') (LETTER | INTEGER |'_')*
;
COMMENT
: '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
| '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
;
WHITESPACE : (' ' | '\t' | '\r' | '\n') {$channel=HIDDEN;} ;
COLON : ':' ;
SEMICOLON : ';' ;
COMMA : ',' ;
PERIOD : '.' ;
PLUS : '+' ;
EQUALS : '=' ;
OPEN_BRACKET : '(' ;
CLOSE_BRACKET : ')' ;
OPEN_BRACE : '{' ;
CLOSE_BRACE : '}' ;
OPEN_BOX : '[' ;
CLOSE_BOX : ']' ;
fragment
LETTER
: 'a'..'z' | 'A'..'Z'
;
fragment
INTEGER
: '0'..'9'+
;
fragment
FLOAT
: INTEGER+ '.' INTEGER*
;
fragment
ESC_SEQ
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
;
I am developing using Antlr works which provides a function to resolve this issue - but unfortunately it does not seem to work :s
Any help would be great.
Thanks.
EDIT:
Here is an example of the language I'm trying to implement / parse
<FunctionName> <OptionalAttributes> <OptionalChildFunctions>
So for example:
ForEach(in:[1,2,3,4,5] as:"i") {
Switch(value:{i}) {
Case(value:3) {
Print(message:"This is the number 3")
}
Default {
Print(message:"This isn't the number 3")
}
}
}
Okay, this should do the trick:
grammar Test;
/************************************** PARSER **************************************/
program
: function EOF
;
function
: ID (OPEN_PAREN (attribute (COMMA attribute)*)? CLOSE_PAREN)?
(OPEN_BRACE function* CLOSE_BRACE)?
SEMICOLON?
;
attribute
: ID (COLON | EQUALS)? expression
;
expression
: atom (PLUS atom)*
;
atom
: ID
| STRING
| BOOLEAN
| NUMBER
| array
| lookup
;
array
: OPEN_BOX (expression (COMMA expression)*)? CLOSE_BOX
;
lookup
: OPEN_BRACE (ID (PERIOD ID)*) CLOSE_BRACE
;
/************************************** LEXER **************************************/
NUMBER : ('+' | '-')? (INTEGER | FLOAT)
;
STRING : '"' ( ESC_SEQ | ~('\\'|'"') )* '"'
;
BOOLEAN : 'true' | 'TRUE' | 'false' | 'FALSE'
;
ID : (LETTER|'_') (LETTER | INTEGER |'_')*
;
COMMENT : '//' ~('\n'|'\r')* ('\r'? '\n'| EOF) {$channel=HIDDEN;}
| '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
;
WHITESPACE : (' ' | '\t' | '\r' | '\n') {$channel=HIDDEN;} ;
COLON : ':' ;
SEMICOLON : ';' ;
COMMA : ',' ;
PERIOD : '.' ;
PLUS : '+' ;
EQUALS : '=' ;
OPEN_PAREN : '(' ;
CLOSE_PAREN : ')' ;
OPEN_BRACE : '{' ;
CLOSE_BRACE : '}' ;
OPEN_BOX : '[' ;
CLOSE_BOX : ']' ;
fragment
LETTER : 'a'..'z' | 'A'..'Z' ;
fragment
INTEGER : '0'..'9'+ ;
fragment
FLOAT : INTEGER+ '.' INTEGER* ;
fragment
ESC_SEQ : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') ;
Note that I've changed the name of OPEN_BRACKET and CLOSE_BRACKET into OPEN_PAREN and CLOSE_PAREN. The round ones, ( and ), are parenthesis, the square ones, [ and ], are called brackets (the ones you called boxes, but calling them boxes doesn't hurt IMO).

Resources