ESLA
Embeddable Scriting LAnguage
Stanford University, Rock Fracture Project research group
© 2003
00001 /************************************************************************* 00002 * ESLA: Embeddable Scripting LAnguage 00003 * Copyright (C) 2003 Frantz Maerten 00004 * 00005 * This program is free software; you can redistribute it and/or 00006 * modify it under the terms of the GNU General Public License as 00007 * published by the Free Software Foundation; either version 2 of the 00008 * License, or (at your option) any later version. 00009 * 00010 * If you modify this software, you should contact the author, include 00011 * a notice giving the name of the person performing the modification, 00012 * the date of modification, and the reason for such modification. 00013 * 00014 * Note that the GNU General Public License does not permit 00015 * incorporating the Software into proprietary programs. 00016 * 00017 * Contact: Frantz Maerten 00018 * frantz@pangea.stanford.edu 00019 * 00020 * Dept. of Geological & Environmental Sciences 00021 * Stanford University 00022 * Stanford, CA 94305-2115 00023 * USA 00024 *************************************************************************/ 00025 00026 00027 #ifndef _REMESH_UTILS_TOKENIZER__ 00028 #define _REMESH_UTILS_TOKENIZER__ 00029 00030 #include "../esla_namespace.h" 00031 #include <string> 00032 #include <deque> 00033 #include <vector> 00034 00035 00036 BEGIN_LIB_NAMESPACE 00037 00038 // Set _npos_ to max size of size_t-1 00039 const size_t _npos_ = (size_t)(-1); 00040 00046 class Tokenizer { 00047 public: 00048 typedef std::vector< std::string> ListTokens ; 00049 enum { max_token_length = 1000 } ; 00050 00051 Tokenizer() ; 00052 Tokenizer( 00053 const std::string& seps, 00054 bool allowEmpToks, 00055 const std::string& ignore, 00056 const std::string& endL, 00057 const std::string& terminals, 00058 bool useSeps) ; 00059 00060 Tokenizer( const Tokenizer& tokenizer ) ; 00061 Tokenizer& operator=( const Tokenizer& tokenizer ) ; 00062 ListTokens tokenize( const std::string& str, const bool tolowercase=false ) ; 00063 00064 void separators( const std::string& str ) ; 00065 const std::string& separators() const ; 00066 void use_separators( bool flag ) ; 00067 bool use_separators() const ; 00068 void allow_empty_tokens( bool flag ) ; 00069 bool allow_empty_tokens() const ; 00070 void ignore( const std::string& str ) ; 00071 const std::string& ignore() const ; 00072 void endline( const std::string& str ) ; 00073 const std::string& endline() const ; 00074 void terminals( const std::string& str ) ; 00075 const std::string& terminals() const ; 00076 00077 protected: 00078 void add_token ( 00079 std::vector< std::string>& tokens, 00080 char* token, 00081 int& index, 00082 const bool tolowercase) ; 00083 void tokenize ( 00084 const std::string& str, 00085 std::vector< std::string>& 00086 tokens, 00087 const bool tolowercase ) ; 00088 00089 std::string lower_case(const std::string& s) ; 00090 00091 private: 00092 std::string separators_ ; 00093 std::string ignore_ ; 00094 std::string terminators_ ; 00095 std::string terminals_ ; 00096 bool include_separators_ ; 00097 bool allow_empty_tokens_ ; 00098 }; 00099 00100 00101 inline void 00102 Tokenizer::separators( const std::string& string ) { 00103 separators_ = string; 00104 } 00105 00106 inline const std::string& Tokenizer::separators() const { 00107 return separators_; 00108 } 00109 00110 inline bool Tokenizer::use_separators() const { 00111 return include_separators_; 00112 } 00113 00114 inline void Tokenizer::use_separators( bool flag ) { 00115 include_separators_ = flag; 00116 } 00117 00118 inline bool Tokenizer::allow_empty_tokens() const { 00119 return allow_empty_tokens_; 00120 } 00121 00122 inline void Tokenizer::allow_empty_tokens( bool flag ) { 00123 allow_empty_tokens_ = flag; 00124 } 00125 00126 inline void Tokenizer::ignore( const std::string& string ) { 00127 ignore_ = string; 00128 } 00129 00130 inline const std::string& Tokenizer::ignore() const { 00131 return ignore_; 00132 } 00133 00134 inline void Tokenizer::endline( const std::string& string ) { 00135 terminators_ = string; 00136 } 00137 00138 inline const std::string& Tokenizer::endline() const { 00139 return terminators_; 00140 } 00141 00142 inline void Tokenizer::terminals( const std::string& string ) { 00143 terminals_ = string; 00144 } 00145 00146 inline const std::string& Tokenizer::terminals() const { 00147 return terminals_; 00148 } 00149 00150 END_LIB_NAMESPACE 00151 00152 #endif 00153