dirroot}/search/Zend/Search/Lucene/Analysis/Token.php"; /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 */ require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php"; /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive */ require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8/CaseInsensitive.php"; /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num */ require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php"; /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive */ require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num/CaseInsensitive.php"; /** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */ require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php"; /** Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */ require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php"; /** Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum */ require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php"; /** Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive */ require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php"; /** Zend_Search_Lucene_Analysis_TokenFilter_StopWords */ require_once 'Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php'; /** Zend_Search_Lucene_Analysis_TokenFilter_ShortWords */ require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php"; /** * An Analyzer is used to analyze text. * It thus represents a policy for extracting index terms from text. * * Note: * Lucene Java implementation is oriented to streams. It provides effective work * with a huge documents (more then 20Mb). * But engine itself is not oriented such documents. * Thus Zend_Search_Lucene analysis API works with data strings and sets (arrays). * * @category Zend * @package Zend_Search_Lucene * @subpackage Analysis * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */ abstract class Zend_Search_Lucene_Analysis_Analyzer { /** * The Analyzer implementation used by default. * * @var Zend_Search_Lucene_Analysis_Analyzer */ private static $_defaultImpl; /** * Input string * * @var string */ protected $_input = null; /** * Input string encoding * * @var string */ protected $_encoding = ''; /** * Tokenize text to a terms * Returns array of Zend_Search_Lucene_Analysis_Token objects * * Tokens are returned in UTF-8 (internal Zend_Search_Lucene encoding) * * @param string $data * @return array */ public function tokenize($data, $encoding = '') { $this->setInput($data, $encoding); $tokenList = array(); while (($nextToken = $this->nextToken()) !== null) { $tokenList[] = $nextToken; } return $tokenList; } /** * Tokenization stream API * Set input * * @param string $data */ public function setInput($data, $encoding = '') { $this->_input = $data; $this->_encoding = $encoding; $this->reset(); } /** * Reset token stream */ abstract public function reset(); /** * Tokenization stream API * Get next token * Returns null at the end of stream * * Tokens are returned in UTF-8 (internal Zend_Search_Lucene encoding) * * @return Zend_Search_Lucene_Analysis_Token|null */ abstract public function nextToken(); /** * Set the default Analyzer implementation used by indexing code. * * @param Zend_Search_Lucene_Analysis_Analyzer $similarity */ public static function setDefault(Zend_Search_Lucene_Analysis_Analyzer $analyzer) { self::$_defaultImpl = $analyzer; } /** * Return the default Analyzer implementation used by indexing code. * * @return Zend_Search_Lucene_Analysis_Analyzer */ public static function getDefault() { if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Analysis_Analyzer) { self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive(); } return self::$_defaultImpl; } }