Esla

00001 /*************************************************************************
00002  *              ESLA: Embeddable Scripting LAnguage
00003  *                  Copyright (C) 2003 Frantz Maerten
00004  *
00005  *   This  program  is  free  software;  you  can  redistribute it and/or
00006  *   modify  it  under  the  terms  of  the GNU General Public License as
00007  *   published  by  the Free Software Foundation; either version 2 of the
00008  *   License, or (at your option) any later version.
00009  *
00010  *   If  you modify this software, you should contact the author, include
00011  *   a  notice giving the name of the person performing the modification,
00012  *   the date of modification, and the reason for such modification.
00013  *
00014  *   Note   that   the   GNU  General  Public  License  does  not  permit
00015  *   incorporating the Software into proprietary programs.
00016  *
00017  *  Contact: Frantz Maerten
00018  *     frantz@pangea.stanford.edu
00019  *
00020  *     Dept. of Geological & Environmental Sciences
00021  *     Stanford University 
00022  *     Stanford, CA 94305-2115
00023  *     USA
00024  *************************************************************************/
00025 
00026 
00027 #ifndef _REMESH_UTILS_TOKENIZER__
00028 #define _REMESH_UTILS_TOKENIZER__
00029 
00030 #include "../esla_namespace.h"
00031 #include <string>
00032 #include <deque>
00033 #include <vector>
00034     
00035 
00036 BEGIN_LIB_NAMESPACE
00037 
00038 // Set _npos_ to max size of size_t-1
00039 const size_t _npos_ = (size_t)(-1);
00040 
00046 class Tokenizer {
00047  public:
00048    typedef std::vector< std::string> ListTokens ;
00049    enum { max_token_length = 1000 } ;
00050    
00051    Tokenizer() ;
00052    Tokenizer( 
00053         const std::string& seps, 
00054         bool  allowEmpToks, 
00055         const std::string& ignore, 
00056         const std::string& endL, 
00057         const std::string& terminals, 
00058         bool  useSeps) ;
00059    
00060    Tokenizer( const Tokenizer& tokenizer ) ;
00061    Tokenizer& operator=( const Tokenizer& tokenizer ) ;
00062    ListTokens tokenize( const std::string& str, const bool tolowercase=false ) ;
00063    
00064    void separators( const std::string& str ) ;
00065    const std::string& separators() const ;
00066    void use_separators( bool flag ) ;
00067    bool use_separators() const ;
00068    void allow_empty_tokens( bool flag ) ;
00069    bool allow_empty_tokens() const ;
00070    void ignore( const std::string& str ) ;
00071    const std::string& ignore() const ;
00072    void endline( const std::string& str ) ;
00073    const std::string& endline() const ;
00074    void terminals( const std::string& str ) ;
00075    const std::string& terminals() const ;
00076    
00077  protected:
00078    void add_token ( 
00079         std::vector< std::string>& tokens,
00080         char* token,
00081         int& index, 
00082         const bool tolowercase) ;
00083    void tokenize (
00084       const std::string& str, 
00085       std::vector< std::string>& 
00086       tokens, 
00087       const bool tolowercase ) ;
00088    
00089    std::string lower_case(const std::string& s) ;
00090    
00091  private:
00092    std::string separators_ ;
00093    std::string ignore_ ;
00094    std::string terminators_ ;
00095    std::string terminals_ ;
00096    bool include_separators_ ;
00097    bool allow_empty_tokens_ ;   
00098 };
00099 
00100 
00101 inline void 
00102 Tokenizer::separators( const std::string& string ) { 
00103   separators_ = string; 
00104 }
00105 
00106 inline const std::string& Tokenizer::separators() const {
00107   return separators_;
00108 }
00109 
00110 inline bool Tokenizer::use_separators() const {
00111   return include_separators_;
00112 }
00113 
00114 inline void Tokenizer::use_separators( bool flag ) {
00115   include_separators_ = flag;
00116 }
00117 
00118 inline bool Tokenizer::allow_empty_tokens() const {
00119   return allow_empty_tokens_;
00120 }
00121 
00122 inline void Tokenizer::allow_empty_tokens( bool flag ) {
00123   allow_empty_tokens_ = flag;
00124 }
00125 
00126 inline void Tokenizer::ignore( const std::string& string ) {
00127   ignore_ = string;
00128 }
00129 
00130 inline const std::string& Tokenizer::ignore() const { 
00131   return ignore_;
00132 }
00133 
00134 inline void Tokenizer::endline( const std::string& string ) {
00135   terminators_ = string;
00136 }
00137 
00138 inline const std::string& Tokenizer::endline() const {
00139   return terminators_;
00140 }
00141 
00142 inline void Tokenizer::terminals( const std::string& string ) {
00143   terminals_ = string;
00144 }
00145 
00146 inline const std::string& Tokenizer::terminals() const {
00147   return terminals_;
00148 }
00149 
00150 END_LIB_NAMESPACE
00151 
00152 #endif
00153
tokenizer.h