Choreonoid  1.8
Tokenizer.h
Go to the documentation of this file.
1 /*
2  The classes implemented in this file are based on the boost.tokenizer implementation.
3  The copyright of the original implementation is as follows:
4  (c) Copyright Jeremy Siek and John R. Bandela 2001.
5  Distributed under the Boost Software License, Version 1.0.
6  (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 */
8 
9 #ifndef CNOID_UTIL_TOKENIZER_H
10 #define CNOID_UTIL_TOKENIZER_H
11 
12 #include <string>
13 #include <algorithm>
14 #include <stdexcept>
15 #include <cctype>
16 #include <cassert>
17 
18 namespace cnoid {
19 
20 struct escaped_list_error : public std::runtime_error {
21  escaped_list_error(const std::string& what_arg) : std::runtime_error(what_arg) { }
22 };
23 
24 
25 template<class Char, class Traits = typename std::basic_string<Char>::traits_type>
27 {
28 private:
29  typedef std::basic_string<Char,Traits> string_type;
30  struct char_eq {
31  Char e_;
32  char_eq(Char e):e_(e) { }
33  bool operator()(Char c) {
34  return Traits::eq(e_,c);
35  }
36  };
37  string_type escape_;
38  string_type c_;
39  string_type quote_;
40  bool last_;
41 
42  bool is_escape(Char e){
43  char_eq f(e);
44  return std::find_if(escape_.begin(),escape_.end(),f)!=escape_.end();
45  }
46 
47  bool is_c(Char e){
48  char_eq f(e);
49  return std::find_if(c_.begin(),c_.end(),f)!=c_.end();
50  }
51 
52  bool is_quote(Char e){
53  char_eq f(e);
54  return std::find_if(quote_.begin(),quote_.end(),f)!=quote_.end();
55  }
56 
57  template <typename iterator, typename Token>
58  void do_escape(iterator& next,iterator end,Token& tok){
59  if(++next == end){
60  throw escaped_list_error("cannot end with escape");
61  }
62  if(Traits::eq(*next,'n')){
63  tok += '\n';
64  return;
65  } else if(is_quote(*next)){
66  tok += *next;
67  return;
68  } else if(is_c(*next)){
69  tok += *next;
70  return;
71  } else if (is_escape(*next)){
72  tok += *next;
73  return;
74  } else {
75  throw escaped_list_error("unknown escape sequence");
76  }
77  }
78 
79 public:
80  explicit EscapedListSeparator(Char e = '\\', Char c = ',',Char q = '\"')
81  : escape_(1,e), c_(1,c), quote_(1,q), last_(false) { }
82 
83  EscapedListSeparator(string_type e, string_type c, string_type q)
84  : escape_(e), c_(c), quote_(q), last_(false) { }
85 
86  void reset(){ last_=false; }
87 
88  template <typename InputIterator, typename Token>
89  bool operator()(InputIterator& next,InputIterator end,Token& tok) {
90  bool bInQuote = false;
91  tok = Token();
92 
93  if(next == end){
94  if(last_){
95  last_ = false;
96  return true;
97  } else {
98  return false;
99  }
100  }
101  last_ = false;
102  for(; next != end; ++next){
103  if(is_escape(*next)){
104  do_escape(next, end, tok);
105  } else if(is_c(*next)){
106  if(!bInQuote){
107  // If we are not in quote, then we are done
108  ++next;
109  // The last character was a c, that means there is
110  // 1 more blank field
111  last_ = true;
112  return true;
113  }
114  else tok += *next;
115  }
116  else if (is_quote(*next)){
117  bInQuote=!bInQuote;
118  } else {
119  tok += *next;
120  }
121  }
122  return true;
123  }
124 };
125 
126 
128 
129 template <typename Char, typename Tr = typename std::basic_string<Char>::traits_type>
131 {
132  typedef std::basic_string<Char,Tr> string_type;
133 
134 public:
135  explicit
136  CharSeparator(const Char* dropped_delims,
137  const Char* kept_delims = 0,
138  EmptyTokenPolicy empty_tokens = DROP_EMPTY_TOKENS)
139  : m_dropped_delims(dropped_delims),
140  m_use_ispunct(false),
141  m_use_isspace(false),
142  m_empty_tokens(empty_tokens),
143  m_output_done(false)
144  {
145  // Borland workaround
146  if (kept_delims){
147  m_kept_delims = kept_delims;
148  }
149  }
150 
151  // use ispunct() for kept delimiters and isspace for dropped.
152  explicit
154  : m_use_ispunct(true),
155  m_use_isspace(true),
156  m_empty_tokens(DROP_EMPTY_TOKENS),
157  m_output_done(false) { }
158 
159  void reset() { }
160 
161  template <typename InputIterator, typename Token>
162  bool operator()(InputIterator& next, InputIterator end, Token& tok)
163  {
164  // skip past all dropped_delims
165  if(m_empty_tokens == DROP_EMPTY_TOKENS){
166  for(; next != end && is_dropped(*next); ++next){ }
167  }
168 
169  InputIterator start(next);
170 
171  if(m_empty_tokens == DROP_EMPTY_TOKENS){
172  if(next == end){
173  return false;
174  }
175  // if we are on a kept_delims move past it and stop
176  if(is_kept(*next)){
177  ++next;
178  } else {
179  // append all the non delim characters
180  for(; next != end && !is_dropped(*next) && !is_kept(*next); ++next);
181  }
182  } else { // m_empty_tokens == keep_empty_tokens
183 
184  // Handle empty token at the end
185  if(next == end){
186  if(m_output_done == false){
187  m_output_done = true;
188  tok.assign(start, next);
189  return true;
190  } else {
191  return false;
192  }
193  }
194 
195  if(is_kept(*next)){
196  if(m_output_done == false){
197  m_output_done = true;
198  } else {
199  ++next;
200  m_output_done = false;
201  }
202  } else if(m_output_done == false && is_dropped(*next)){
203  m_output_done = true;
204  } else {
205  if(is_dropped(*next)){
206  start=++next;
207  }
208  for(; next != end && !is_dropped(*next) && !is_kept(*next); ++next);
209  m_output_done = true;
210  }
211  }
212  tok.assign(start, next);
213  return true;
214  }
215 
216 private:
217  string_type m_kept_delims;
218  string_type m_dropped_delims;
219  bool m_use_ispunct;
220  bool m_use_isspace;
221  EmptyTokenPolicy m_empty_tokens;
222  bool m_output_done;
223 
224  bool is_kept(Char E) const
225  {
226  if(m_kept_delims.length()){
227  return m_kept_delims.find(E) != string_type::npos;
228  } else if(m_use_ispunct){
229  return std::ispunct(E) != 0;
230  } else {
231  return false;
232  }
233  }
234  bool is_dropped(Char E) const
235  {
236  if(m_dropped_delims.length()){
237  return m_dropped_delims.find(E) != string_type::npos;
238  } else if(m_use_isspace){
239  return std::isspace(E) != 0;
240  } else {
241  return false;
242  }
243  }
244 };
245 
246 
247 template <class TokenizerFunc, class Iterator, class Type>
249 {
250  friend class iterator_core_access;
251 
252  TokenizerFunc f_;
253  Iterator begin_;
254  Iterator end_;
255  bool valid_;
256  Type tok_;
257 
258  void increment(){
259  assert(valid_);
260  valid_ = f_(begin_,end_,tok_);
261  }
262 
263  const Type& dereference() const {
264  assert(valid_);
265  return tok_;
266  }
267 
268  template<class Other>
269  bool equal(const Other& a) const {
270  return (a.valid_ && valid_)
271  ?( (a.begin_==begin_) && (a.end_ == end_) )
272  :(a.valid_==valid_);
273 
274  }
275 
276  void initialize(){
277  if(valid_) return;
278  f_.reset();
279  valid_ = (begin_ != end_)?
280  f_(begin_,end_,tok_):false;
281  }
282 
283 public:
284  TokenIterator():begin_(),end_(),valid_(false),tok_() { }
285 
286  TokenIterator(TokenizerFunc f, Iterator begin, Iterator e = Iterator())
287  : f_(f),begin_(begin),end_(e),valid_(false),tok_(){ initialize(); }
288 
289  TokenIterator(Iterator begin, Iterator e = Iterator())
290  : f_(),begin_(begin),end_(e),valid_(false),tok_() {initialize();}
291 
293  increment();
294  return *this;
295  }
296 
298  TokenIterator retval = *this;
299  increment();
300  return retval;
301  }
302 
303  bool operator==(const TokenIterator& other) const {
304  return equal(other);
305  }
306 
307  bool operator!=(const TokenIterator& other) const {
308  return !(*this == other);
309  }
310 
311  const Type& operator*() const {
312  return dereference();
313  }
314 
315  Type const * operator->() const {
316  return &dereference();
317  }
318 
319  Iterator base()const{return begin_;}
320 
321  Iterator end()const{return end_;}
322 
323  TokenizerFunc tokenizer_function()const{return f_;}
324 
325  Type current_token()const{return tok_;}
326 
327  bool at_end()const{return !valid_;}
328 };
329 
330 
331 template <
332  class TokenizerFunc = CharSeparator<char>,
333  class Iterator = std::string::const_iterator,
334  class Type = std::string
335  >
337 {
338 public:
340 };
341 
342 
343 // Type has to be first because it needs to be explicitly specified
344 // because there is no way the function can deduce it.
345 template<class Type, class Iterator, class TokenizerFunc>
347 make_token_iterator(Iterator begin, Iterator end,const TokenizerFunc& fun){
348  typedef typename
350  return ret_type(fun,begin,end);
351 }
352 
353 
354 template <
355  typename TokenizerFunc = CharSeparator<char>,
356  typename Iterator = std::string::const_iterator,
357  typename Type = std::string
358  >
360 {
361 private:
363  typedef typename TGen::type iter;
364 
365 public:
366  typedef iter iterator;
368  typedef Type value_type;
370  typedef const value_type& const_reference;
371  typedef value_type* pointer;
372  typedef const pointer const_pointer;
373  typedef void size_type;
374  typedef void difference_type;
375 
376  Tokenizer(const TokenizerFunc& f)
377  : f_(f) { }
378 
379  Tokenizer(Iterator first, Iterator last, const TokenizerFunc& f = TokenizerFunc())
380  : first_(first), last_(last), f_(f) { }
381 
382  template <typename Container>
383  Tokenizer(const Container& c)
384  : first_(c.begin()), last_(c.end()), f_() { }
385 
386  template <typename Container>
387  Tokenizer(const Container& c,const TokenizerFunc& f)
388  : first_(c.begin()), last_(c.end()), f_(f) { }
389 
390  void assign(Iterator first, Iterator last){
391  first_ = first;
392  last_ = last;
393  }
394 
395  void assign(Iterator first, Iterator last, const TokenizerFunc& f){
396  assign(first,last);
397  f_ = f;
398  }
399 
400  template <typename Container>
401  void assign(const Container& c){
402  assign(c.begin(),c.end());
403  }
404 
405  template <typename Container>
406  void assign(const Container& c, const TokenizerFunc& f){
407  assign(c.begin(),c.end(),f);
408  }
409 
410  iter begin() const { return iter(f_,first_,last_); }
411  iter end() const { return iter(f_,last_,last_); }
412 
413 private:
414  Iterator first_;
415  Iterator last_;
416  TokenizerFunc f_;
417 };
418 
419 }
420 
421 #endif
cnoid::CharSeparator
Definition: Tokenizer.h:130
cnoid::EmptyTokenPolicy
EmptyTokenPolicy
Definition: Tokenizer.h:127
cnoid::TokenIterator::operator!=
bool operator!=(const TokenIterator &other) const
Definition: Tokenizer.h:307
cnoid::EscapedListSeparator::operator()
bool operator()(InputIterator &next, InputIterator end, Token &tok)
Definition: Tokenizer.h:89
cnoid::Tokenizer::assign
void assign(Iterator first, Iterator last)
Definition: Tokenizer.h:390
cnoid::Tokenizer::const_iterator
iter const_iterator
Definition: Tokenizer.h:367
cnoid::TokenIterator::at_end
bool at_end() const
Definition: Tokenizer.h:327
cnoid::Tokenizer
Definition: Tokenizer.h:359
cnoid::KEEP_EMPTY_TOKENS
@ KEEP_EMPTY_TOKENS
Definition: Tokenizer.h:127
cnoid::Tokenizer::Tokenizer
Tokenizer(Iterator first, Iterator last, const TokenizerFunc &f=TokenizerFunc())
Definition: Tokenizer.h:379
cnoid::TokenIterator::TokenIterator
TokenIterator(Iterator begin, Iterator e=Iterator())
Definition: Tokenizer.h:289
cnoid::CharSeparator::CharSeparator
CharSeparator(const Char *dropped_delims, const Char *kept_delims=0, EmptyTokenPolicy empty_tokens=DROP_EMPTY_TOKENS)
Definition: Tokenizer.h:136
cnoid::CharSeparator::operator()
bool operator()(InputIterator &next, InputIterator end, Token &tok)
Definition: Tokenizer.h:162
cnoid::EscapedListSeparator::reset
void reset()
Definition: Tokenizer.h:86
cnoid::TokenIterator::base
Iterator base() const
Definition: Tokenizer.h:319
cnoid::Tokenizer::const_pointer
const typedef pointer const_pointer
Definition: Tokenizer.h:372
cnoid::TokenIterator::TokenIterator
TokenIterator(TokenizerFunc f, Iterator begin, Iterator e=Iterator())
Definition: Tokenizer.h:286
cnoid::Tokenizer::const_reference
const typedef value_type & const_reference
Definition: Tokenizer.h:370
cnoid::make_token_iterator
TokenIteratorGenerator< TokenizerFunc, Iterator, Type >::type make_token_iterator(Iterator begin, Iterator end, const TokenizerFunc &fun)
Definition: Tokenizer.h:347
cnoid::DROP_EMPTY_TOKENS
@ DROP_EMPTY_TOKENS
Definition: Tokenizer.h:127
cnoid::Tokenizer::Tokenizer
Tokenizer(const TokenizerFunc &f)
Definition: Tokenizer.h:376
cnoid::TokenIterator::operator*
const Type & operator*() const
Definition: Tokenizer.h:311
cnoid::TokenIterator::tokenizer_function
TokenizerFunc tokenizer_function() const
Definition: Tokenizer.h:323
cnoid::TokenIteratorGenerator
Definition: Tokenizer.h:336
cnoid::escaped_list_error
Definition: Tokenizer.h:20
cnoid::TokenIterator::iterator_core_access
friend class iterator_core_access
Definition: Tokenizer.h:250
cnoid::Tokenizer::size_type
void size_type
Definition: Tokenizer.h:373
cnoid::Tokenizer::assign
void assign(const Container &c, const TokenizerFunc &f)
Definition: Tokenizer.h:406
cnoid
Definition: AbstractSceneLoader.h:11
cnoid::TokenIterator
Definition: Tokenizer.h:248
cnoid::TokenIterator::operator++
TokenIterator & operator++()
Definition: Tokenizer.h:292
cnoid::Tokenizer::difference_type
void difference_type
Definition: Tokenizer.h:374
cnoid::TokenIterator::end
Iterator end() const
Definition: Tokenizer.h:321
cnoid::Tokenizer::Tokenizer
Tokenizer(const Container &c)
Definition: Tokenizer.h:383
cnoid::Tokenizer::assign
void assign(const Container &c)
Definition: Tokenizer.h:401
cnoid::Tokenizer::end
iter end() const
Definition: Tokenizer.h:411
cnoid::Tokenizer::assign
void assign(Iterator first, Iterator last, const TokenizerFunc &f)
Definition: Tokenizer.h:395
cnoid::TokenIterator::current_token
Type current_token() const
Definition: Tokenizer.h:325
cnoid::TokenIterator::operator==
bool operator==(const TokenIterator &other) const
Definition: Tokenizer.h:303
cnoid::Tokenizer::Tokenizer
Tokenizer(const Container &c, const TokenizerFunc &f)
Definition: Tokenizer.h:387
cnoid::Tokenizer::begin
iter begin() const
Definition: Tokenizer.h:410
cnoid::escaped_list_error::escaped_list_error
escaped_list_error(const std::string &what_arg)
Definition: Tokenizer.h:21
cnoid::Tokenizer::pointer
value_type * pointer
Definition: Tokenizer.h:371
cnoid::Tokenizer::iterator
iter iterator
Definition: Tokenizer.h:366
cnoid::TokenIterator::operator->
const Type * operator->() const
Definition: Tokenizer.h:315
cnoid::TokenIterator::operator++
TokenIterator operator++(int)
Definition: Tokenizer.h:297
cnoid::TokenIteratorGenerator::type
TokenIterator< TokenizerFunc, Iterator, Type > type
Definition: Tokenizer.h:339
cnoid::EscapedListSeparator::EscapedListSeparator
EscapedListSeparator(string_type e, string_type c, string_type q)
Definition: Tokenizer.h:83
cnoid::TokenIterator::TokenIterator
TokenIterator()
Definition: Tokenizer.h:284
cnoid::CharSeparator::CharSeparator
CharSeparator()
Definition: Tokenizer.h:153
cnoid::Tokenizer::reference
value_type & reference
Definition: Tokenizer.h:369
cnoid::CharSeparator::reset
void reset()
Definition: Tokenizer.h:159
cnoid::Tokenizer::value_type
Type value_type
Definition: Tokenizer.h:368
cnoid::EscapedListSeparator
Definition: Tokenizer.h:26
cnoid::EscapedListSeparator::EscapedListSeparator
EscapedListSeparator(Char e='\\', Char c=',', Char q='\"')
Definition: Tokenizer.h:80