Subversion Repositories public

Rev

Rev 55 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
45 luk 1
 
2
/// string tokenizer header
3
/**
4
 * \file strtok.h
5
 *
6
 * string tokenizer
7
 *
100 luk 8
 * Copyright (C) 2006, 2007, 2008 Lukas Jelinek, <lukas@aiken.cz>
45 luk 9
 *
10
 * This program is free software; you can redistribute it and/or
11
 * modify it under the terms of one of the following licenses:
12
 *
13
 * \li 1. X11-style license (see LICENSE-X11)
14
 * \li 2. GNU Lesser General Public License, version 2.1 (see LICENSE-LGPL)
15
 * \li 3. GNU General Public License, version 2  (see LICENSE-GPL)
16
 *
17
 * If you want to help with choosing the best license for you,
18
 * please visit http://www.gnu.org/licenses/license-list.html.
19
 *
20
 */
21
 
22
 
23
#ifndef _STRTOK_H_
24
#define _STRTOK_H_
25
 
26
 
27
#include <string>
28
 
55 luk 29
typedef std::string::size_type SIZE;
30
 
47 luk 31
/// Simple string tokenizer class.
32
/**
33
 * This class implements a string tokenizer. It splits a string
34
 * by a character to a number of elements (tokens) which are
35
 * provided sequentially.
36
 *
55 luk 37
 * All operations are made on a copy of the original string
38
 * (which may be in fact a copy-on-write instance).
47 luk 39
 *
40
 * The original string is left unchanged. All tokens are returned
41
 * as newly created strings.
55 luk 42
 *
43
 * There is possibility to specify a prefix character which
44
 * causes the consecutive character is not considered as
45
 * a delimiter. If you don't specify this character (or specify
46
 * the NUL character, 0x00) this feature is disabled. The mostly
47
 * used prefix is a backslash ('\').
48
 *
49
 * This class is not thread-safe.
50
 *
51
 * Performance note: This class is currently not intended
52
 * to be very fast. Speed optimizations will be done later.
47 luk 53
 */
45 luk 54
class StringTokenizer
55
{
56
public:
47 luk 57
  /// Constructor.
58
  /**
59
   * Creates a ready-to-use tokenizer.
60
   *
61
   * \param[in] rStr string for tokenizing
62
   * \param[in] cDelim delimiter (separator) character
55 luk 63
   * \param[in] cPrefix character which is prepended if a
64
   *            character must not separate tokens
47 luk 65
   */
55 luk 66
  StringTokenizer(const std::string& rStr, char cDelim = ',', char cPrefix = '\0');
45 luk 67
 
47 luk 68
  /// Destructor.
45 luk 69
  ~StringTokenizer() {}
70
 
47 luk 71
  /// Checks whether the tokenizer can provide more tokens.
72
  /**
73
   * \return true = more tokens available, false = otherwise
74
   */
45 luk 75
  inline bool HasMoreTokens() const
76
  {
77
    return m_pos < m_len;
78
  }
79
 
47 luk 80
  /// Returns the next token.
81
  /**
55 luk 82
   * If a prefix is defined it is stripped from the returned
83
   * string (e.g. 'abc\ def' is transformed to 'abc def'
84
   * while the prefix is '\').
85
   *
86
   * \param[in] fSkipEmpty skip empty strings (more consecutive delimiters)
47 luk 87
   * \return next token or "" if no more tokens available
55 luk 88
   *
89
   * \sa GetNextTokenRaw()
47 luk 90
   */
55 luk 91
  std::string GetNextToken(bool fSkipEmpty = false);
45 luk 92
 
55 luk 93
  /// Returns the next token.
94
  /**
95
   * This method always returns an unmodified string even
96
   * if it contains prefix characters.
97
   *
98
   * \param[in] fSkipEmpty skip empty strings (more consecutive delimiters)
99
   * \return next token or "" if no more tokens available
100
   *
101
   * \sa GetNextToken()
102
   */
103
  std::string GetNextTokenRaw(bool fSkipEmpty = false);
104
 
105
  /// Returns the remainder of the source string.
106
  /**
107
   * This method returns everything what has not been
108
   * processed (tokenized) yet and moves the current
109
   * position to the end of the string.
110
   *
111
   * If a prefix is defined it is stripped from
112
   * the returned string.
113
   *
114
   * \return remainder string
115
   */
116
  std::string GetRemainder();
117
 
47 luk 118
  /// Sets a delimiter (separator) character.
119
  /**
120
   * The new delimiter has effect only to tokens returned later;
121
   * the position in the string is not affected.
122
   *
55 luk 123
   * If you specify a NUL character (0x00) here the prefix
124
   * will not be used.
125
   *
47 luk 126
   * \param[in] cDelim delimiter character
127
   */
45 luk 128
  inline void SetDelimiter(char cDelim)
129
  {
130
    m_cDelim = cDelim;
131
  }
132
 
47 luk 133
  /// Returns the delimiter (separator) character.
134
  /**
135
   * \return delimiter character
136
   */
45 luk 137
  inline char GetDelimiter() const
138
  {
139
    return m_cDelim;
140
  }
141
 
55 luk 142
  /// Sets a prefix character.
143
  /**
144
   * The new prefix has effect only to tokens returned later;
145
   * the position in the string is not affected.
146
   *
147
   * \param[in] cPrefix prefix character
148
   *
149
   * \sa SetNoPrefix()
150
   */
151
  inline void SetPrefix(char cPrefix)
152
  {
153
    m_cPrefix = cPrefix;
154
  }
155
 
156
  /// Returns the prefix character.
157
  /**
158
   * \return prefix character
159
   */
160
  inline char GetPrefix() const
161
  {
162
    return m_cPrefix;
163
  }
164
 
165
  /// Sets the prefix to 'no prefix'.
166
  /**
167
   * Calling this method is equivalent to SetPrefix((char) 0).
168
   *
169
   * \sa SetPrefix()
170
   */
171
  inline void SetNoPrefix()
172
  {
173
    SetPrefix('\0');
174
  }
175
 
47 luk 176
  /// Resets the tokenizer.
177
  /**
178
   * Re-initializes tokenizing to the start of the string.
179
   */
45 luk 180
  inline void Reset()
181
  {
182
    m_pos = 0;
183
  }
184
 
185
private:
47 luk 186
  std::string m_str;            ///< tokenized string
187
  char m_cDelim;                ///< delimiter character
55 luk 188
  char m_cPrefix;               ///< prefix character
47 luk 189
  std::string::size_type m_pos; ///< current position
190
  std::string::size_type m_len; ///< string length
55 luk 191
 
192
  /// Strips all prefix characters.
193
  /**
194
   * \param[in] s source string
195
   * \param[in] cnt string length
196
   * \return modified string
197
   */
198
  std::string StripPrefix(const char* s, SIZE cnt);
199
 
200
  /// Extracts the next token (internal method).
201
  /**
202
   * The extracted token may be empty.
203
   *
204
   * \param[out] rToken extracted token
205
   * \param[in] fStripPrefix strip prefix characters yes/no
206
   */
207
  void _GetNextToken(std::string& rToken, bool fStripPrefix);
208
 
209
  /// Extracts the next token (internal method).
210
  /**
211
   * This method does no checking about the prefix character.
212
   *
213
   * The extracted token may be empty.
214
   *
215
   * \param[out] rToken extracted token
216
   */
217
  void _GetNextTokenNoPrefix(std::string& rToken);
218
 
219
  /// Extracts the next token (internal method).
220
  /**
221
   * This method does checking about the prefix character.
222
   *
223
   * The extracted token may be empty.
224
   *
225
   * \param[out] rToken extracted token
226
   */
227
  void _GetNextTokenWithPrefix(std::string& rToken);
45 luk 228
};
229
 
230
 
231
#endif //_STRTOK_H_