Parsing large text files with Adobe AIR - apache-flex

I am trying to do the following in AIR:
browse to a text file
read the text file and store it in a string (ultimately in an array)
split the string by the delimiter \n and put the resulting strings in an array
manipulate that data before sending it to a website (mysql database)
The text files I am dealing with will be anywhere from 100-500mb in size. So far, I've been able to to complete steps 1 and 2, here is my code:
<mx:Script>
<![CDATA[
import mx.collections.ArrayCollection;
import flash.filesystem.*;
import flash.events.*;
import mx.controls.*;
private var fileOpened:File = File.desktopDirectory;
private var fileContents:String;
private var stream:FileStream;
private function selectFile(root:File):void {
var filter:FileFilter = new FileFilter("Text", "*.txt");
root.browseForOpen("Open", [filter]);
root.addEventListener(Event.SELECT, fileSelected);
}
private function fileSelected(e:Event):void {
var path:String = fileOpened.nativePath;
filePath.text = path;
stream = new FileStream();
stream.addEventListener(ProgressEvent.PROGRESS, fileProgress);
stream.addEventListener(Event.COMPLETE, fileComplete);
stream.openAsync(fileOpened, FileMode.READ);
}
private function fileProgress(p_evt:ProgressEvent):void {
fileContents += stream.readMultiByte(stream.bytesAvailable, File.systemCharset);
readProgress.text = ((p_evt.bytesLoaded/1048576).toFixed(2)) + "MB out of " + ((p_evt.bytesTotal/1048576).toFixed(2)) + "MB read";
}
private function fileComplete(p_evt:Event):void {
stream.close();
//fileText.text = fileContents;
}
private function process(c:String):void {
if(!c.length > 0) {
Alert.show("File contents empty!", "Error");
}
//var array:Array = c.split(/\n/);
}
]]>
</mx:Script>
Here is the MXML
<mx:Text x="10" y="10" id="filePath" text="Select a file..." width="678" height="22" color="#FFFFFF" fontWeight="bold"/>
<mx:Button x="10" y="40" label="Browse" click="selectFile(fileOpened)" color="#FFFFFF" fontWeight="bold" fillAlphas="[1.0, 1.0]" fillColors="[#E2E2E2, #484848]"/>
<mx:Button x="86" y="40" label="Process" click="process(fileContents)" color="#FFFFFF" fontWeight="bold" fillAlphas="[1.0, 1.0]" fillColors="[#E2E2E2, #484848]"/>
<mx:TextArea x="10" y="70" id="fileText" width="678" height="333" editable="false"/>
<mx:Label x="10" y="411" id="readProgress" text="" width="678" height="19" color="#FFFFFF"/>
step 3 is where I am having some troubles. There are 2 lines in my code commented out, both lines cause the program to freeze.
fileText.text = fileContents; attempts to put the contents of the string in a textarea
var array:Array = c.split(/\n/); attempts to split the string by delimiter newline
Could use some input at this point...
Am i even going about this the right way?
Can flex/air handle files this large? (i'd assume so)
This is my first attempt at doing any sort of flex work, if you see other things ive done wrong or could be done better, i'd appreciate the heads up!
Thanks!

Doing a split on a 500MB file might not be a good idea. You can write your own parser to work on the file but it may not be very fast either:
private function fileComplete(p_evt:Event):void
{
var array:Array = [];
var char:String;
var line:String = "";
while(stream.position < stream.bytesAvailable)
{
char = stream.readUTFBytes(1);
if(char == "\n")
{
array.push(line);
line = "";
}
else
{
line += char;
}
}
// catch the last line if the file isn't terminated by a \n
if(line != "")
{
array.push(line);
}
stream.close();
}
I haven't tested it but it should just step through the file character by character. If the character is a new line then push the old line into the array otherwise add it to the current line.
If you don't want it to block your UI while you do it, you'll need to abstract it into a timer based idea:
// pseudo code
private function fileComplete(p_evt:Event):void
{
var array:Array = [];
processFileChunk();
}
private function processFileChunk(event:TimerEvent=null):void
{
var MAX_PER_FRAME:int = 1024;
var bytesThisFrame:int = 0;
var char:String;
var line:String = "";
while( (stream.position < stream.bytesAvailable)
&& (bytesThisFrame < MAX_PER_FRAME))
{
char = stream.readUTFBytes(1);
if(char == "\n")
{
array.push(line);
line = "";
}
else
{
line += char;
}
bytesThisFrame++;
}
// if we aren't done
if(stream.position < stream.bytesAvailable)
{
// declare this in the class
timer = new Timer(100, 1);
timer.addEventListener(TimerEvent.TIMER_COMPLETE, processFileChunk);
timer.start();
}
// we're done
else
{
// catch the last line if the file isn't terminated by a \n
if(line != "")
{
array.push(line);
}
stream.close();
// maybe dispatchEvent(new Event(Event.COMPLETE)); here
// or call an internal function to deal with the complete array
}
}
Basically you choose an amount of the file to process each frame (MAX_PER_FRAME) and then process that many bytes. If you go over the number of bytes then just make a timer to call the process function again in a few frames time and it should continue where it left off. You can dispatch an event of call another function once you are sure you are complete.

I agree.
Try to split the text into chunks while you're reading it from the stream.
This way you don't have to store the text in your fileContents String (reducing the memory usage by 50%)

Try to process it in parts.

With regards to James's homespun parser, there is a problem if the text files contain any multibyte UTF characters (I was trying to parse UTF files in a similar manner when I came across this thread). Converting each byte to an individual string will disintegrate multi-byte characters, so I made some modifications.
In order to make this parser multi-byte friendly, you can store the growing lines in a ByteArray rather than a string. Then when you hit the end of a line (or a chunk, or the file), you can parse it as a UTF string (if necessary) without any problems:
var
out :ByteArray,
line_out :String,
line_end :Number,
char :int,
line:ByteArray;
out = new ByteArray();
line = new ByteArray();
while( file_stream.bytesAvailable > 0 )
{
char = file_stream.readByte();
if( (String.fromCharCode( char ) == "\n") )
{
// Do some processing on a line-by-line basis
line_out = ProcessLine( line );
line_out += "\n";
out.writeUTFBytes( line_out );
line = new ByteArray();
}
else
{
line.writeByte( char );
}
}
//Get the last line in there
out.writeBytes( line );

stream.position < stream.bytesAvailable
Wouldn't this condition be false after the position reaches the middle of the file?
If the file is 10 bytes, after you have read 5 bytes then bytesAvailable will be 5, I stored the initial value in another variable and used it in the condition.
Besides that, I think it is pretty good

Related

ArayList as dataProvider for a List: The index 0 is out of range 0

Does anybody please have an idea, why do I get the runtime error:
RangeError: Error #1125: The index 0 is out of range 0.
........
at Popup/update()[Popup.mxml:80]
at PopupTest/showPopup()[PopupTest.mxml:45]
at PopupTest/___btn_click()[PopupTest.mxml:52]
when calling the function:
private function showPopup(event:MouseEvent):void {
_popup.update(new Array('Pass' ,
'6♠', '6♣', '6♦', '6♥', '6 x',
'7♠', '7♣', '7♦', '7♥', '7 x',
'8♠', '8♣', '8♦', '8♥', '8 x',
'9♠', '9♣', '9♦', '9♥', '9 x',
'10♠', '10♣', '10♦', '10♥', '10 x'), true, 80);
}
As if my _list would have no entries at all (but why? I do assign _data.source=args) and thus the _list.ensureIndexIsVisible(0) call would fail at the line 80:
<?xml version="1.0" encoding="utf-8"?>
<s:Panel xmlns:fx="http://ns.adobe.com/mxml/2009"
xmlns:s="library://ns.adobe.com/flex/spark"
xmlns:mx="library://ns.adobe.com/flex/mx"
width="220" height="200"
initialize="init(event)">
<fx:Script>
<![CDATA[
import mx.collections.ArrayList;
import mx.events.FlexEvent;
import mx.utils.ObjectUtil;
private static const FORCE:uint = 20;
[Bindable]
private var _data:ArrayList = new ArrayList();
private var _timer:Timer = new Timer(1000, 120);
private function init(event:FlexEvent):void {
_timer.addEventListener(TimerEvent.TIMER, timerUpdated);
_timer.addEventListener(TimerEvent.TIMER_COMPLETE, timerCompleted);
}
public function close():void {
_timer.reset();
_data.source = null;
visible = false;
}
private function timerUpdated(event:TimerEvent=null):void {
var seconds:int = _timer.repeatCount - _timer.currentCount;
title = 'Your turn! (' + seconds + ')';
// show panel for cards too
if (seconds < FORCE)
visible = true;
}
private function timerCompleted(event:TimerEvent=null):void {
title = 'Your turn!';
close();
}
public function update(args:Array, bidding:Boolean, seconds:int):void {
if (seconds <= 0) {
close();
return;
}
// nothing has changed
if (ObjectUtil.compare(_data.source, args, 0) == 0)
return;
_data.source = args;
if (args == null || args.length == 0) {
close();
return;
}
if (seconds < FORCE || bidding)
visible = true;
_timer.reset();
title = 'Your turn! (' + seconds + ')';
_list.ensureIndexIsVisible(0); // the line 80
_timer.repeatCount = seconds;
_timer.start();
}
]]>
</fx:Script>
<s:VGroup paddingLeft="10" paddingTop="10" paddingRight="10" paddingBottom="10" gap="10" width="100%" height="100%">
<s:List id="_list" dataProvider="{_data}" width="100%" height="100%" fontSize="24" itemRenderer="RedBlack" />
</s:VGroup>
</s:Panel>
the reason
You are adding the new array allright, but then the List starts creating ItemRenderers based on the items that are in that array. This takes some time and happens asynchronously. In the meantime you're saying "show me item 1", but the ItemRenderer for item 1 doesn't exist yet. It will very soon, but not right now. That's why you get an indexoutofrange error.
the solution
You have to be sure the List is done creating ItemRenderers before you call that method. The easiest way to solve this situation - though definitely not the cleanest - is to just wait until the next render cycle by using the infamous callLater().
callLater(_list.ensureIndexIsVisible, [0]);
This essentially saying: wait for the next render cycle and then call ensureIndexIsVisible() on _list with parameter 0.
(On a side note: if you really only want index 0 this whole thing is rather pointless, because I think a List scrolls back to the top when its dataprovider is changed anyway)
a cleaner solution
You can listen on the List for the RendererExistenceEvent#RENDERER_ADD event. This will be dispatched whenever a new ItemRenderer was added to the list and it holds a reference to the item's index in the List, the data and the ItemRenderer itself. However in your case we only need the 'index'. Whenever an ItemRenderer is added at index 0 we'll scroll back to the top:
_list.addEventListener(RendererExistenceEvent.RENDERER_ADD, onRendererAdded);
private function onRendererAdded(event:RendererExistenceEvent):void {
if (event.index == 0) myList.ensureIndexIsVisible(0);
}
This will immediately scroll to the top when the first ItemRenderer is added and doesn't need to wait until all of them are ready.

Flex 3: Can anybody see why this dictionary isn't working?

So, in my main mxml, i have a variable defined as such:
[Bindable] public var studentsListDict:Dictionary = new Dictionary;
I also have the following imported:
import flash.utils.Dictionary;
I then have an HTTPService that imports an XML file:
<mx:HTTPService id="studentsHttp" url="students.xml" resultFormat="e4x" makeObjectsBindable="true" result="createStudentsCollection(event)" />
The createStudentsCollection function is as follows:
private function createStudentsCollection(e:ResultEvent):void
{
var xmlList:XMLList = XML(e.result).student;
var dupString:String = "|";
var tempArray:Array = new Array;
studentsListDict = new Dictionary;
for (var i:int = 0; i < xmlList.length(); i++)
{
if (dupString.indexOf(String("|" + xmlList[i].name) + "|") == -1)
{
tempArray = new Array;
tempArray[0] = xmlList[i].name.#id;
tempArray[1] = xmlList[i].name;
tempArray[2] = xmlList[i].year;
tempArray[3] = xmlList[i].track;
studentsListAC.addItem(tempArray);
studentsListDict[tempArray[0]] = tempArray;
dupString += "|" + xmlList[i].name + "|";
getLen(studentsListDict);
}
}
}
Then, to ensure the items were correctly put into the dictionary, i have the following function:
public static function getLen(d:Dictionary):int
{
var i:int = 0;
for (var key:Object in d)
{
Alert.show(String(key + "\n" + d[key]));
i++;
}
return i;
}
This creates pop up alerts that show that everything was loaded correctly into the dictionary.
Later on, in a child, I call a function that tries to use the dictionary, and i get a return of "undefined".
Here's the function that searches based on key, and returns a value from the array within:
public function getStudentName(sID:Number):String
{
return studentsListDict[sID][1];
}
Unfortunately, the getStudentName function simply returns undefined every time.
If anybody can see something I'm missing, it'd be greatly appreciated.
Thanks,
Brds
EDIT
It wasn't working b/c you can't have numbers as keys in a dictionary. Simply casting them to a string during the declaration and look up seems to work just fine.
Here is some documentation on dictionary keys..
It looks like you're code is setting it as a string and then accessing it as a number. I suspect that is the root of your problem You can try something like this:
public function getStudentName(sID:Number):String
{
return studentsListDict[sID.toString()][1];
}
It is actually perfectly acceptable to use numbers as Keys to a Dictionary. The Dictionary apparently turns the number and a string value of that number to the same key. Here is a sample:
<?xml version="1.0" encoding="utf-8"?>
<s:Application xmlns:fx="http://ns.adobe.com/mxml/2009"
xmlns:s="library://ns.adobe.com/flex/spark"
xmlns:mx="library://ns.adobe.com/flex/mx" minWidth="955" minHeight="600" creationComplete="application1_creationCompleteHandler(event)">
<fx:Script>
<![CDATA[
import mx.events.FlexEvent;
public var dict : Dictionary;
protected function application1_creationCompleteHandler(event:FlexEvent):void
{
dict = new Dictionary();
dict["0"] = "hi";
dict["4"] = "hola";
dict["17"] = "bye";
dict["32"] = "adios";
dict[32] = "adios 2";
dict[3.2] = "adios 3";
dict[50] = "Audio ";
dict["50"] = "Audio 2";
trace(dict["0"]);
trace(dict["4"]);
trace(dict["17"]);
trace(dict["32"]);
trace(dict[32]);
trace(dict[3.2]);
trace(dict[50]);
trace(dict["50"]);
}
]]>
</fx:Script>
</s:Application>
I think it's because getStudentName is using a Number as a key, while createStudentCollection is using a string. In this case, because the keys are numbers/strings, you can simply use an Object: var studentsListDict:Object = {}; — it will automatically coerce all the keys to strings.
Also, as an asside: new Dictionary(); more standard, and it's better form in ActionScript to use [] (eg, var foo:Array = []) than new Array(). That way you can put stuff in the array at the same time:
var tempArray:Array = [
xmlList[i].name.#id,
xmlList[i].name,
…
];

Compiler warns me that binding will not work but why I run the application it does work!

The Flex application below generates the compiler warning: Data binding will not be able to detect assignments to 'dp'. This seems correct since the variable 'dp' is not a bindable property (there is no [Bindable] metadata tag). I have added a button which appends items to the back of 'dp' when it is clicked. Although the compiler warns me that I will not see changes to 'dp', the list shows the new item every time the button is clicked!
I do not understand why I can see new items appear in the list. Can someone explain why this still works although 'dp' is not bindable?
<mx:Application xmlns:mx="http://www.adobe.com/2006/mxml" layout="vertical" minWidth="955" minHeight="600">
<mx:Script>
<![CDATA[
import mx.collections.ArrayCollection;
import mx.rpc.events.ResultEvent;
private var arrayData:Array = [
{name:"banana", cat:"fruit", cost:0.99},
{name:"bread", cat:"bakery", cost:1.99},
{name:"orange", cat:"fruit", cost:0.52},
{name:"donut", cat:"bakery", cost:0.33},
{name:"apple", cat:"fruit", cost:1.05}];
private var dp:ArrayCollection = new ArrayCollection(arrayData);
private function onButtonClick(event:MouseEvent):void
{
var obj:Object = new Object();
obj.name="test";
obj.cat="testcat";
obj.cost=666;
dp.addItem(obj);
}
]]>
</mx:Script>
<mx:HorizontalList dataProvider="{dp}" labelField="name" columnWidth="100" width="80%" height="50"/>
<mx:Button label="Click me" click="onButtonClick(event)" />
The compiler is correct in it's warning.
The compiler is warning you that assignments that change the value of dp from the initial ArrayCollection you specified to another ArrayCollection won't be detected.
However, if you leave the value of dp alone, and only change the contents of it, then your <HorizontalList /> will continue to work.
This may seem trivial, but it's an important distinction, and one that can lead to some very confusing bugs further down the road in your application.
Assignments to the variable dp will not be detected. However, changes to the ArrayCollections list will, because they dispatch a CollectionChangeEvent.
eg:
private var dp:ArrayCollection = new ArrayCollection();
private function test():void
{
// Here, we don't change the value of dp directly,
// instead we just modify it's list.
// The DataGroup will show the strings One,Two
dp.addItem("One")
dp.addItem("Two")
// Here, we change the actual value of dp, by assigning a
// new ArrayCollection to it.
// This change would not be detected, and the list would continue to show
// the contents of the previous value.
// Additionally, the label will show the string "Length: 2",
// even though the length is clearly now 3.
dp = new ArrayCollection();
dp.addItem("Tahi");
dp.addItem("Rua");
dp.addItem("Toru");
}
<s:DataGroup dataProvider="{dp}" />
<s:Label text="Length: {dp.length}" />
Try to use:
[Bindable("__NoChangeEvent__")]
private var dp:ArrayCollection = new ArrayCollection(arrayData);
What about adding elements in list see the code of ListBase:
public function set dataProvider(value:Object):void
{
if (collection)
{
collection.removeEventListener(CollectionEvent.COLLECTION_CHANGE, collectionChangeHandler);
}
if (value is Array)
{
collection = new ArrayCollection(value as Array);
}
else if (value is ICollectionView)
{
collection = ICollectionView(value);
}
else if (value is IList)
{
collection = new ListCollectionView(IList(value));
}
else if (value is XMLList)
{
collection = new XMLListCollection(value as XMLList);
}
else if (value is XML)
{
var xl:XMLList = new XMLList();
xl += value;
collection = new XMLListCollection(xl);
}
else
{
// convert it to an array containing this one item
var tmp:Array = [];
if (value != null)
tmp.push(value);
collection = new ArrayCollection(tmp);
}
// get an iterator for the displaying rows. The CollectionView's
// main iterator is left unchanged so folks can use old DataSelector
// methods if they want to
iterator = collection.createCursor();
collectionIterator = collection.createCursor(); //IViewCursor(collection);
// trace("ListBase added change listener");
collection.addEventListener(CollectionEvent.COLLECTION_CHANGE, collectionChangeHandler, false, 0, true);
clearSelectionData();
var event:CollectionEvent = new CollectionEvent(CollectionEvent.COLLECTION_CHANGE);
event.kind = CollectionEventKind.RESET;
collectionChangeHandler(event);
dispatchEvent(event);
itemsNeedMeasurement = true;
invalidateProperties();
invalidateSize();
invalidateDisplayList();
}
So take a look at line:
collection.addEventListener(CollectionEvent.COLLECTION_CHANGE, collectionChangeHandler, false, 0, true);

Flex: Read bytearray

I use the following to upload a file to Flex:
private var filer:FileReference;
protected function button1_clickHandler(event:MouseEvent):void
{
var fd:String = "Files (*)";
var fe:String = "*";
var ff:FileFilter = new FileFilter(fd, fe);
filer = new FileReference();
filer.addEventListener(Event.SELECT, onFileSelect);
filer.browse(new Array(ff));
filer.addEventListener(Event.COMPLETE,
function (e:Event):void {
e.currentTarget.data.toString();
}
);
}
private function onFileSelect(e:Event):void {
filer.load();
}
And my file looks like this:
Here is the original file: http://sesija.com/up/1.txt
I need to read the uploaded file and parse it. The problem is that in my e.currentTarget.data.toString(); I get only '1' and not the rest of the String.
Any idea on how to successfully read this entire txt file?
The data property is a ByteArray. Instead of using the toString method (which apparently treats NULL byte as end of string), use specific read methods of the ByteArray class like readByte, readInt etc.
var array:Array = [];
var ba:ByteArray = e.currentTarget.data as ByteArray;
while(ba.bytesAvailable != 0){
array.push(ba.readByte());
}
trace(array.join(", "));
You might want to read Working with byte arrays

add Dynamic children to an array or arraycollection

I'm a bit new to flex and cannot get my head around this problem. can someone help. thanks in advance.
I have a string list path.
path 1 - "one/two/three"
path 2 - "one/two/four"
path 3 - "five/six"
i need an advanced datagrid to show a tree structure like so
one/
...two/
........three/
............four
five/
.......six
but i want to achieve this dynamicall with arrays, objects, or arraycollection (as applicable)
I need to loop through each string path using string methods which isnt a problem but how do i create "DYNAMIC" (depth) children? please help as i'm about to pull my hair out.
You can try something like:
var paths:Array = ['one/two/three','one/two/four','five/six'];
var pathsCollection:ArrayCollection = new ArrayCollection();
for(var i:int = 0 ; i < paths.length ; i++){
var folderArr:Array = paths[i].split('/');
var folderNum:int = folderArr.length;
var folderLabel:String = '';
for(var j:int = 0 ; j < folderNum; j++){
trace(folderLabel+folderArr[j]);
pathsCollection.addItem({label:folderLabel+folderArr[j],level:j,path:folderArr});
folderLabel += '...';
}
}
and as sharvey says, do have a look at recursion.
Recursion is the way to go when you are operating on objects which contain an unknown number of elements. Try this sample application:
<mx:Application xmlns:mx="http://www.adobe.com/2006/mxml"
creationComplete="{init();}"
layout="vertical"
verticalAlign="middle">
<mx:Script>
<![CDATA[
import mx.utils.ObjectUtil;
import mx.collections.HierarchicalData;
private var paths:Array = ['one/two/three','one/two/four','five/six'];
private static const DELIMITER:String = "/";
private function init():void {
var test:Array = buildHierarchy(paths);
dg_test.dataProvider = new HierarchicalData(test);
trace(ObjectUtil.toString(test));
}
private function buildHierarchy(arr:Array):Array {
var ret:Array = new Array();
var o:Object = new Object();
/* Loop over the paths array */
for (var i:int = 0; i < arr.length; i++) {
/* Split the string block according to the delimiter */
var parts:Array = String(arr[i]).split(DELIMITER);
if (parts.length) {
/* Make a new object with a label equal to the first string element */
o = new Object();
o.label = parts[0];
/* Remove the first item in the string list */
parts.splice(0, 1);
/* Important - If the string has remaining members, call this
function again with the remaining members. Assign this to
the 'children' property of the newly created object */
if (parts.length > 0)
o.children = buildHierarchy([parts.join(DELIMITER)]);
/* Add the object to the new array */
ret.push(o);
}
}
return ret;
}
]]>
</mx:Script>
<mx:AdvancedDataGrid id="dg_test" height="200" width="400">
<mx:columns>
<mx:AdvancedDataGridColumn id="col_label" dataField="label"/>
</mx:columns>
</mx:AdvancedDataGrid>
This function will call itself once for every element contained in the 'string/string/string' block. The key to getting this structure displayed in the ADG is to set the adg.dataProvider = new HierarchicalData(myArray);
Hope this works! I can't get the code formatted 100% but you should get the idea. Don't forget to add the closing application tag.

Resources