Where does the "newline" (\n) come from? (pattern matching using "flex") - apache-flex

I have an experimental flex source file(lex.l):
%option noyywrap
%{
int chars = 0;
int words = 0;
int lines = 0;
%}
delim [ \t\n]
ws {delim}+
letter [A-Za-z]
digit [0-9]
id {letter}({letter}|{digit})*
number {digit}+(.{digit}+)?(E[+-]?{digit}+)?
%%
{letter}+ { words++; chars += strlen(yytext); printf("Word\n"); }
\n { chars++; lines++; printf("Line\n"); }
. { chars++; printf("SomethingElse\n"); }
%%
int main(argc, argv)
int argc;
char **argv;
{
if(argc > 1)
{
if(!(yyin = fopen(argv[1], "r")))
{
perror(argv[1]);
return (1);
}
}
yylex();
printf("lines: %8d\nwords: %8d\nchars: %8d\n", lines, words, chars);
}
I created an input file called "input.txt" with "red apple" written in it. Command line:
$ flex lex.l
$ cc lex.yy.c
$ ./a.out < input.txt
Word
SomethingElse
Word
Line
lines: 1
words: 2
chars: 10
Since there is no newline character in the input file, why the "\n" in lex.l is pattern matched? (The "lines" is supposed to be 0, and the "chars" is supposed to be 9)
(I am using OS X.)
Thanks for your time.

It is very possible that your text editor has automatically inserted a newline at the end of the file.

Related

Store data in a vector o vectors from a txt of different lines

I am trying to read data from a file of 12 cols and 4 rows so they keep their indexes when I read them into a loop, say vec[i][j] will be i= 0...3 and j= 0...11. But the code I already have gives me a single "row", so i goes from 0 to 47. How can I fix it?
vector<vector<float>> reading(string filename) {
vector<vector<float>> vec;
ifstream file_in(filename);
if (!file_in) { cout << "File not opened" << endl; }
string line;
while (getline(file_in, line, ',')) {
vec.push_back(vector<float>());
stringstream split(line);
float value;
while (split >> value) {
vec.back().push_back(value);
}
}
return vec;
}

How to split QString and keep the separator in Qt?

I have a QString: "{x, c | 0x01}", and I want to split it to 7 tokens as below:
{
x
,
c
|
0x01
}
What's the best way to do it in Qt?
I tried to use QString::split(QRegExp("[\\{\\},|]")), but it DOES NOT keep the separator in the result.
Maybe this solution can serve you task:
int main(void) {
QString str { "{x, c | 0x01}" };
QRegExp separators { "[\\{\\},|]" };
QStringList list;
str.replace( " ", "" );
int mem = 0;
for(int i = 0; i<str.size(); ++i) {
if(i == str.indexOf(separators, i)) {
if(mem) list.append(str.mid(mem, i-mem)); // append str before separator
list.append(str.mid(i, 1)); // append separator
mem = i+1;
}
}
qDebug() << list;
return 0;
}
Outputs: ("{", "x", ",", "c", "|", "0x01", "}")
You can eliminate if(mem) but then use list.pop_front(); orlist.removeAll(""); after the for cycle, as first element will be a rubbish "".
Basically, you iterate through the string, check if a deliminator is found, and add the deliminator to the list. If no deliminator is found, a new 'word' is added to the list, and until the next deliminator is found, characters will be added to the word. Take a look:
//input string
QString str = "{x, c | 0x01}";
QList<QString> out;
//flag used to keep track of whether we're adding a mullti-char word, or just a deliminator
bool insideWord = false;
//remove whitespaces
str = str.simplified();
str = str.replace(" ", "");
//iterate through string, check for delims, populate out list
for (int i = 0; i < str.length(); i++)
{
QChar c = str.at(i); //get char at current index
if (c == '{' || c == '}' || c == ',' || c == '|')
{
//append deliminator
out.append(c);
insideWord = false;
}
else
{
//append new word to qlist...
if (!insideWord)
{
out.append(c);
insideWord = true;
}
//but if word already started
else
{
//add 'c' to the word in last index of the qlist
out.last().append(c);
}
}
}
//output as requested by OP
qDebug() << "String is" << out;
This can be done in a single regular expression, but has to use look-ahead and look-behind.
The expression specified in the question ([\\{\\},|]) will match a 1-character long string consisting of any of the characters {, }, , or |. QString.split will then remove that 1-character long string.
What is needed is to find the zero-character string immediately before each of those separators, using a look-ahead: (?=[\\{\\},|]) and also to find the zero-character string immediately after the separator (?<=[\\{\\},|]).
Combining these gives:
QString::split(QRegularExpression("(?=[\\{\\},|])|(?<=[\\{\\},|])"))
Which will give the desired output: ("{", "x", ",", "c", "|", "0x01", "}")

Finding all possible permutations of the characters in a set of strings using recursion

I have this set of (Greek) strings:
ἸἼΙἹἽ,
ῇηἤήῃὴῆἡἠἢᾖἥἣῄἦᾗᾐἧᾔᾑ,
σς,
οὸόὀὄὅὂ,
ὺὖυῦύὐὑὔΰϋὕὗὓὒῢ
I'd like to find all possible permutations of the characters in these 5 strings. For example, Ἰῇσοὺ, Ἰῇσοὖ, Ἰῇσου, etc. I know it should involve recursion since the number of strings is not fixed but I'm a beginner and I'm completely dumbfounded by recursion.
I did the following in Python and it does give me all combinations of the characters in each string. But I need the 'ἸἼΙἹἽ' to always come first, 'ῇηἤήῃὴῆἡἠἢᾖἥἣῄἦᾗᾐἧᾔᾑ' second,'σς' third, etc.
# -*- coding: utf-8 -*-
def Gen( wd, pos, chars ):
if pos < len( chars ):
for c in chars:
for l in c:
Gen( wd + l, pos + 1, chars )
else:
print wd
chars = [ u'ἸἼΙἹἽ', u'ῇηἤήῃὴῆἡἠἢᾖἥἣῄἦᾗᾐἧᾔᾑ', u'σς', u'οὸόὀὄὅὂ', u'ὺὖυῦύὐὑὔΰϋὕὗὓὒῢ' ]
Gen( "", 0, chars )
Thanks for the help everybody. My mind is completely blown. Recursion! Here's what I ended up doing in Python:
# -*- coding: utf-8 -*-
s = [ u'ἸἼΙἹἽ', u'ῇηἤήῃὴῆἡἠἢᾖἥἣῄἦᾗᾐἧᾔᾑ', u'σς', u'οὸόὀὄὅὂ', u'ὺὖυῦύὐὑὔΰϋὕὗὓὒῢ' ]
results = []
def recur( wd, strings ):
index = 0
if index < len( strings ):
for c in strings[ index ]:
recur( wd + c, strings[ index + 1: ] )
else:
results.append( wd )
def main():
recur( '', s )
for r in results:
print r.encode( 'utf-8' )
main()
You create a char array which will contain the string you want to work with
char str[] = "ABC";
then you get the length of the string int n = strlen(str); and lastly you permutate.
You make a new function which will contain the input string, starting index of the string and ending index of the string.
Check if the starting index (int s) equals the ending index (int e)
if it does, that means you're done, if not you go into a loop where you go from start (s) to end (e), swap the values, recurse, swap again to backtrack.
An example in C++:
#include <stdio.h>
#include <string.h>
void swap(char *i, char *j)
{
char temp;
temp = *i;
*i = *j;
*j = temp;
}
void permutate(char *str, int start, int end)
{
int i;
if (start == end)
printf("%s\n", str);
else
{
for (i = start; i <= end; i++)
{
swap((str + start), (str + i));
permutate(str, start + 1, end);
swap((str + start), (str + i)); //backtrack
}
}
}
int main()
{
char str[] = "ABC";
int n = strlen(str);
permutate(str, 0, n - 1);
return 0;
}
I'm not that familliar with Python, but I've found something that might help in your case:
def comb(first_str, second_str):
if not first_str:
yield second_str
return
if not second_str:
yield first_str
return
for result in comb(first_str[1:], second_str):
yield first_str[0] + result
for result in comb(first_str, second_str[1:]):
yield second_str[0] + result
>>> for result in comb("ἸἼΙἹἽ", "ῇηἤήῃὴῆἡἠἢᾖἥἣῄἦᾗᾐἧᾔᾑ"):
print(result)
Just write down the five nested loops. In pseudocode,
for a in "ἸἼΙἹἽ"
for b in "ῇηἤήῃὴῆἡἠἢᾖἥἣῄἦᾗᾐἧᾔᾑ"
for c in "σς"
for d in "οὸόὀὄὅὂ"
for e in "ὺὖυῦύὐὑὔΰϋὕὗὓὒῢ"
emit [a,b,c,d,e]
To encode these five loops with recursion, so it's good for any number of input strings, again in pseudocode,
g(list-of-strings) =
| case list-of-strings
| of empty --> end-of-processing
| of (first-string AND rest-of-strings) -->
for each ch in first-string
DO g(rest-of-strings)
Now you only need to figure out where to hold each current first-string's character ch and how to combine them all while at the end-of-processing (basically, your two options are a global accumulator, or an argument to a function invocation).

Find duplicate words in two text files using command line

I have two text files:
f1.txt
boom Boom pow
Lazy dog runs.
The Grass is Green
This is TEST
Welcome
and
f2.txt
Welcome
I am lazy
Welcome, Green
This is my room
Welcome
bye
In Ubuntu Command Line I am trying:
awk 'BEGIN {RS=" "}FNR==NR {a[$1]=NR; next} $1 in a' f1.txt f2.txt
and getting output:
Green
This
is
My desired output is:
lazy
Green
This is
Welcome
Description: I want to compare two txt files, line by line. Then I want to output all duplicate words. The matches should be not case sensitive. Also, comparing line by line would be better instead of looking for a match from f1.txt in a whole f2.txt file. In example, the word "Welcome" should not be in desired output if it was on line 6 instead of line 5 in f2.txt
Well, then. With awk:
awk 'NR == FNR { for(i = 1; i <= NF; ++i) { a[NR,tolower($i)] = 1 }; next } { flag = 0; for(i = 1; i <= NF; ++i) { if(a[FNR,tolower($i)]) { printf("%s%s", flag ? OFS : "", $i); flag = 1 } } if(flag) print "" }' f1.txt f2.txt
This works as follows:
NR == FNR { # While processing the first file:
for(i = 1; i <= NF; ++i) { # Remember which fields were in
a[NR,tolower($i)] = 1 # each line (lower-cased)
}
next # Do nothing else.
}
{ # After that (when processing the
# second file)
flag = 0 # reset flag so we know we haven't
# printed anything yet
for(i = 1; i <= NF; ++i) { # wade through fields (words)
if(a[FNR,tolower($i)]) { # if this field was in the
# corresponding line in the first
# file, then
printf("%s%s", flag ? OFS : "", $i) # print it (with a separator if it
# isn't the first)
flag = 1 # raise flag
}
}
if(flag) { # and if we printed anything
print "" # add a newline at the end.
}
}

Pivot table in AWK

I need to transform elements from an array to column index and return the value of $3 for each column index.
I don´t have access to gawk 4 so I cannot work with real multidimensional arrays.
Input
Name^Code^Count
Name1^0029^1
Name1^0038^1
Name1^0053^1
Name2^0013^3
Name2^0018^3
Name2^0023^5
Name2^0025^1
Name2^0029^1
Name2^0038^1
Name2^0053^1
Name3^0018^1
Name3^0060^1
Name4^0018^2
Name4^0025^5
Name5^0018^2
Name5^0025^1
Name5^0060^1
Desired output
Name^0013^0018^0023^0025^0029^0038^0053^0060
Name1^^^^^1^1^1^
Name2^3^3^5^1^1^1^1^
Name3^^1^^^^^^1
Name4^^2^^5^^^^
Name5^^^^1^^^^1
Any suggestions on how to tackle this task without using real multidimensional arrays?
The following solution uses GNU awk v3.2 features for sorting. This does not use multi-dimensional arrays. It only simulates one.
awk -F"^" '
NR>1{
map[$1,$2] = $3
name[$1]++
value[$2]++
}
END{
printf "Name"
n = asorti(value, v_s)
for(i=1; i<=n; i++) {
printf "%s%s", FS, v_s[i]
}
print ""
m = asorti(name, n_s)
for(i=1; i<=m; i++) {
printf "%s", n_s[i]
for(j=1; j<=n; j++) {
printf "%s%s", FS, map[n_s[i],v_s[j]]
}
print ""
}
}' file
Name^0013^0018^0023^0025^0029^0038^0053^0060
Name1^^^^^1^1^1^
Name2^3^3^5^1^1^1^1^
Name3^^1^^^^^^1
Name4^^2^^5^^^^
Name5^^2^^1^^^^1
This will work with any awk and will order the output of counts numerically while keeping the names in the order they occur in your input file:
$ cat tst.awk
BEGIN{FS="^"}
NR>1 {
if (!seenNames[$1]++) {
names[++numNames] = $1
}
if (!seenCodes[$2]++) {
# Insertion Sort - start at the end of the existing array and
# move everything greater than the current value down one slot
# leaving open the slot for the current value to be inserted between
# the last value smaller than it and the first value greater than it.
for (j=++numCodes;codes[j-1]>$2+0;j--) {
codes[j] = codes[j-1]
}
codes[j] = $2
}
count[$1,$2] = $3
}
END {
printf "%s", "Name"
for (j=1;j<=numCodes;j++) {
printf "%s%s",FS,codes[j]
}
print ""
for (i=1;i<=numNames;i++) {
printf "%s", names[i]
for (j=1;j<=numCodes;j++) {
printf "%s%s",FS,count[names[i],codes[j]]
}
print ""
}
}
...
$ awk -f tst.awk file
Name^0013^0018^0023^0025^0029^0038^0053^0060
Name1^^^^^1^1^1^
Name2^3^3^5^1^1^1^1^
Name3^^1^^^^^^1
Name4^^2^^5^^^^
Name5^^2^^1^^^^1
Since you only have two "dimensions", it is easy enough to use one array for each dimension and a joining array with a calculated column name. I didn't do the sorting of columns or rows, but the idea is pretty basic.
#!/usr/bin/awk -f
#
BEGIN { FS = "^" }
(NR == 1) {next}
{
rows[$1] = 1
columns[$2] = 1
join_table[$1 "-" $2] = $3
}
END {
printf "Name"
for (col_name in columns) {
printf "^%s", col_name
}
printf "\n"
for (row_name in rows) {
printf row_name
for (col_name in columns) {
printf "^%s", join_table[row_name "-" col_name]
}
printf "\n"
}
}

Resources