/* AbiWord * Copyright (C) 2003 Tomas Frydrych * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #ifndef UT_ITERATOR_H #define UT_ITERATOR_H ////////////////////////////////////////////////////////////////////////////// // // UT_TextIterator class is an abstraction of a text iterator, making // it possible to iterate sequentially over textual data without // having to know anything about how that data might be stored. // // This class is pure virtual, its sole purpose is to define generic // interface so that we can pass a generic type into and out of // functions. For example of implementation see pd_Iterator.h/cpp // // Notes on imlementation // ---------------------- // Any derrived classes should implement the individual functions to // conform to the behaviour outlined in the comments in the class // definion below. // // In addtion, the actual iterator implementations should provide a // mechanism allowing to restrict upper and lower bounds (either at // construction or subsequently), so that when passing iterators into // functions it is not necessary to pass with them a length // parameter. For example, PD_StruxIterator can provide access to the // entire document from the start of the strux onwards; we might want // to restrict this to the part that only belongs to a particular // TextRun, etc. // // Notes on use // ------------ // When passing iterators into functions, the iterator should be set // at the position where processing is to start, i.e., the user is // not expected to reposition the iterator before commencing // processing. Also, the upper boundary should be restricted // appropriately to indicate where the processing is to stop; this is // preferable to passing an extra length parameter. // // Tomas, November, 2003 // ////////////////////////////////////////////////////////////////////// // the follwoing are values that the getChar() function can fall back // on when things are not entirely right ... // // UT_IT_NOT_CHARACTER: when at the current position we have // something else than text (image, etc) // // UT_IT_ERROR: when things are really not going as they should // NB: this is just to have something to fall back on, // not an error reporting mechanism; for that see // getStatus() below #define UT_IT_NOT_CHARACTER UCS_SPACE #define UT_IT_ERROR 0xffffffff #include "ut_types.h" class PD_Document; class pt_PieceTable; class pf_Frag; ///////////////////////////////////////////////////////////// // // The following enum defines possible iterator states: // // OK: need I say more? // // OutOfBounds: last positioning operation took the iterator // out of bounds; this error state is recoverable // by using the indexing operator [], or calling // setPosition() but the use of relative increment // operators (++, --, +=, -=) in this state will // lead to undefined results. // // Error: any other error; this state is irrecoverable, clean up // and go home // enum UTIterStatus { UTIter_OK, UTIter_OutOfBounds, UTIter_Error }; class ABI_EXPORT UT_TextIterator { public: virtual ~UT_TextIterator() {} ///////////////////////////////////////////////////////////////////////// // data accessor; retrieves character at present position // // NB: I.getChar() is functionally equivalent to I[getPosition()] // virtual UT_UCS4Char getChar() = 0; ///////////////////////////////////////////////////////////////////////// // positon accessor; returns a value representing current postion // // NB: The position can be expressed in an arbitrary coordinate // system, typically one that makes sense to the actual // implementation; when an iterator is passed into a function, the // starting position might not be 0. // virtual UT_uint32 getPosition() const = 0; //////////////////////////////////////////////////////////////////// // moves iterator to position pos // virtual void setPosition(UT_uint32 pos) = 0; /////////////////////////////////////////////////////////////////// // set and retrieve upper bounds // virtual void setUpperLimit(UT_uint32 maxpos) = 0; virtual UT_uint32 getUpperLimit() const = 0; /////////////////////////////////////////////////////////////////// // returns the current state of the iterator (see definition of // UTIterStatus above) // virtual UTIterStatus getStatus() const = 0; /////////////////////////////////////////////////////////////////// // finds first occurence of given string, looking in direction // indicated by bForward // failure is indicated through getStatus() == UTIter_OutOfBounds; // virtual UT_uint32 find(UT_UCS4Char * what, UT_uint32 iLen, bool bForward = true) = 0; virtual UT_uint32 find(UT_TextIterator & text, UT_uint32 iLen, bool bForward = true) = 0; /////////////////////////////////////////////////////////////////// // makes a copy of the iterator in its present state // virtual UT_TextIterator * makeCopy() const = 0; /////////////////////////////////////////////////////////////////// // increment operators // // NB: We intentionally define prefix operators only, as post-fix // versions provide no real advantage, and are less efficient // virtual UT_TextIterator & operator ++ () = 0; virtual UT_TextIterator & operator -- () = 0; virtual UT_TextIterator & operator += (UT_sint32 i) = 0; virtual UT_TextIterator & operator -= (UT_sint32 i) = 0; //////////////////////////////////////////////////////////////////// // subscript operator []; repostions iterator and returns // character at new postion // // NB(1): the operator physically advances the iterator to positon // pos before returning, i.e., // // UT_UCS4Char c = I[p]; // // and // // I.setPosition(p); // UT_UCS4Char c = I.getChar(); // // are exactly equivalent, leaving the iterator in the same state // // NB(2): if passed iterator as an argumenent in a function, you // need to know the initial position to use this operator for // processing which is relative to the state of iterator when // passed to you, i.e., f1() and f2() below do exactly the same // thing, f3() does not. // // function f1(UT_TextIterator & I, UT_uint32 len) // { // UT_uint32 pos = I.getPosition(); // // for(UT_uint32 i = pos; i < len + pos; i++) // { // UT_UCS4Char c = text[i]; // // do something with c ... // } // } // // function f2(UT_TextIterator & I, UT_uint32 len) // { // for(UT_uint32 i = 0; i < len; ++i, ++I) // { // UT_UCS4Char c = text.getChar(); // // do something with c ... // } // } // // In contrast, f3() will start at the leftmost edge of the // theoretical iterator range, which is probably not what you // want; the actual implementation of the iterator can if fact // restrict valid range of the subscript to an arbitrary range // (i.e., I[0] may produce OutOfBounds state). // // function f3(UT_TextIterator & I, UT_uint32 len) // { // for(UT_uint32 i = 0; i < len; i++) // { // UT_UCS4Char c = text[i]; // // do something with c ... // } // } // // Bottom Line: unless told otherwise, assume that // processing is to start from I.getPosition(), not 0. // virtual UT_UCS4Char operator [](UT_uint32 pos) = 0; }; #endif //UT_ITERATOR_H