Subversion Repositories public

Rev

Rev 55 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 55 Rev 68
1
1
2
/// string tokenizer header
2
/// string tokenizer header
3
/**
3
/**
4
 * \file strtok.h
4
 * \file strtok.h
5
 *
5
 *
6
 * string tokenizer
6
 * string tokenizer
7
 *
7
 *
8
 * Copyright (C) 2006 Lukas Jelinek, <lukas@aiken.cz>
8
 * Copyright (C) 2006 Lukas Jelinek, <lukas@aiken.cz>
9
 *
9
 *
10
 * This program is free software; you can redistribute it and/or
10
 * This program is free software; you can redistribute it and/or
11
 * modify it under the terms of one of the following licenses:
11
 * modify it under the terms of one of the following licenses:
12
 *
12
 *
13
 * \li 1. X11-style license (see LICENSE-X11)
13
 * \li 1. X11-style license (see LICENSE-X11)
14
 * \li 2. GNU Lesser General Public License, version 2.1 (see LICENSE-LGPL)
14
 * \li 2. GNU Lesser General Public License, version 2.1 (see LICENSE-LGPL)
15
 * \li 3. GNU General Public License, version 2  (see LICENSE-GPL)
15
 * \li 3. GNU General Public License, version 2  (see LICENSE-GPL)
16
 *
16
 *
17
 * If you want to help with choosing the best license for you,
17
 * If you want to help with choosing the best license for you,
18
 * please visit http://www.gnu.org/licenses/license-list.html.
18
 * please visit http://www.gnu.org/licenses/license-list.html.
19
 *
19
 *
20
 */
20
 */
21
21
22
22
23
#ifndef _STRTOK_H_
23
#ifndef _STRTOK_H_
24
#define _STRTOK_H_
24
#define _STRTOK_H_
25
25
26
26
27
#include <string>
27
#include <string>
28
28
29
typedef std::string::size_type SIZE;
29
typedef std::string::size_type SIZE;
30
30
31
/// Simple string tokenizer class.
31
/// Simple string tokenizer class.
32
/**
32
/**
33
 * This class implements a string tokenizer. It splits a string
33
 * This class implements a string tokenizer. It splits a string
34
 * by a character to a number of elements (tokens) which are
34
 * by a character to a number of elements (tokens) which are
35
 * provided sequentially.
35
 * provided sequentially.
36
 *
36
 *
37
 * All operations are made on a copy of the original string
37
 * All operations are made on a copy of the original string
38
 * (which may be in fact a copy-on-write instance).
38
 * (which may be in fact a copy-on-write instance).
39
 *
39
 *
40
 * The original string is left unchanged. All tokens are returned
40
 * The original string is left unchanged. All tokens are returned
41
 * as newly created strings.
41
 * as newly created strings.
42
 *
42
 *
43
 * There is possibility to specify a prefix character which
43
 * There is possibility to specify a prefix character which
44
 * causes the consecutive character is not considered as
44
 * causes the consecutive character is not considered as
45
 * a delimiter. If you don't specify this character (or specify
45
 * a delimiter. If you don't specify this character (or specify
46
 * the NUL character, 0x00) this feature is disabled. The mostly
46
 * the NUL character, 0x00) this feature is disabled. The mostly
47
 * used prefix is a backslash ('\').
47
 * used prefix is a backslash ('\').
48
 *
48
 *
49
 * This class is not thread-safe.
49
 * This class is not thread-safe.
50
 *
50
 *
51
 * Performance note: This class is currently not intended
51
 * Performance note: This class is currently not intended
52
 * to be very fast. Speed optimizations will be done later.
52
 * to be very fast. Speed optimizations will be done later.
53
 */
53
 */
54
class StringTokenizer
54
class StringTokenizer
55
{
55
{
56
public:
56
public:
57
  /// Constructor.
57
  /// Constructor.
58
  /**
58
  /**
59
   * Creates a ready-to-use tokenizer.
59
   * Creates a ready-to-use tokenizer.
60
   *
60
   *
61
   * \param[in] rStr string for tokenizing
61
   * \param[in] rStr string for tokenizing
62
   * \param[in] cDelim delimiter (separator) character
62
   * \param[in] cDelim delimiter (separator) character
63
   * \param[in] cPrefix character which is prepended if a
63
   * \param[in] cPrefix character which is prepended if a
64
   *            character must not separate tokens
64
   *            character must not separate tokens
65
   */
65
   */
66
  StringTokenizer(const std::string& rStr, char cDelim = ',', char cPrefix = '\0');
66
  StringTokenizer(const std::string& rStr, char cDelim = ',', char cPrefix = '\0');
67
 
67
 
68
  /// Destructor.
68
  /// Destructor.
69
  ~StringTokenizer() {}
69
  ~StringTokenizer() {}
70
 
70
 
71
  /// Checks whether the tokenizer can provide more tokens.
71
  /// Checks whether the tokenizer can provide more tokens.
72
  /**
72
  /**
73
   * \return true = more tokens available, false = otherwise
73
   * \return true = more tokens available, false = otherwise
74
   */
74
   */
75
  inline bool HasMoreTokens() const
75
  inline bool HasMoreTokens() const
76
  {
76
  {
77
    return m_pos < m_len;
77
    return m_pos < m_len;
78
  }
78
  }
79
 
79
 
80
  /// Returns the next token.
80
  /// Returns the next token.
81
  /**
81
  /**
82
   * If a prefix is defined it is stripped from the returned
82
   * If a prefix is defined it is stripped from the returned
83
   * string (e.g. 'abc\ def' is transformed to 'abc def'
83
   * string (e.g. 'abc\ def' is transformed to 'abc def'
84
   * while the prefix is '\').
84
   * while the prefix is '\').
85
   *
85
   *
86
   * \param[in] fSkipEmpty skip empty strings (more consecutive delimiters)
86
   * \param[in] fSkipEmpty skip empty strings (more consecutive delimiters)
87
   * \return next token or "" if no more tokens available
87
   * \return next token or "" if no more tokens available
88
   *
88
   *
89
   * \sa GetNextTokenRaw()
89
   * \sa GetNextTokenRaw()
90
   */
90
   */
91
  std::string GetNextToken(bool fSkipEmpty = false);
91
  std::string GetNextToken(bool fSkipEmpty = false);
92
 
92
 
93
  /// Returns the next token.
93
  /// Returns the next token.
94
  /**
94
  /**
95
   * This method always returns an unmodified string even
95
   * This method always returns an unmodified string even
96
   * if it contains prefix characters.
96
   * if it contains prefix characters.
97
   *
97
   *
98
   * \param[in] fSkipEmpty skip empty strings (more consecutive delimiters)
98
   * \param[in] fSkipEmpty skip empty strings (more consecutive delimiters)
99
   * \return next token or "" if no more tokens available
99
   * \return next token or "" if no more tokens available
100
   *
100
   *
101
   * \sa GetNextToken()
101
   * \sa GetNextToken()
102
   */
102
   */
103
  std::string GetNextTokenRaw(bool fSkipEmpty = false);
103
  std::string GetNextTokenRaw(bool fSkipEmpty = false);
104
 
104
 
105
  /// Returns the remainder of the source string.
105
  /// Returns the remainder of the source string.
106
  /**
106
  /**
107
   * This method returns everything what has not been
107
   * This method returns everything what has not been
108
   * processed (tokenized) yet and moves the current
108
   * processed (tokenized) yet and moves the current
109
   * position to the end of the string.
109
   * position to the end of the string.
110
   *
110
   *
111
   * If a prefix is defined it is stripped from
111
   * If a prefix is defined it is stripped from
112
   * the returned string.
112
   * the returned string.
113
   *
113
   *
114
   * \return remainder string
114
   * \return remainder string
115
   */
115
   */
116
  std::string GetRemainder();
116
  std::string GetRemainder();
117
   
117
   
118
  /// Sets a delimiter (separator) character.
118
  /// Sets a delimiter (separator) character.
119
  /**
119
  /**
120
   * The new delimiter has effect only to tokens returned later;
120
   * The new delimiter has effect only to tokens returned later;
121
   * the position in the string is not affected.
121
   * the position in the string is not affected.
122
   *
122
   *
123
   * If you specify a NUL character (0x00) here the prefix
123
   * If you specify a NUL character (0x00) here the prefix
124
   * will not be used.
124
   * will not be used.
125
   *
125
   *
126
   * \param[in] cDelim delimiter character
126
   * \param[in] cDelim delimiter character
127
   */
127
   */
128
  inline void SetDelimiter(char cDelim)
128
  inline void SetDelimiter(char cDelim)
129
  {
129
  {
130
    m_cDelim = cDelim;
130
    m_cDelim = cDelim;
131
  }
131
  }
132
 
132
 
133
  /// Returns the delimiter (separator) character.
133
  /// Returns the delimiter (separator) character.
134
  /**
134
  /**
135
   * \return delimiter character
135
   * \return delimiter character
136
   */
136
   */
137
  inline char GetDelimiter() const
137
  inline char GetDelimiter() const
138
  {
138
  {
139
    return m_cDelim;
139
    return m_cDelim;
140
  }
140
  }
141
 
141
 
142
  /// Sets a prefix character.
142
  /// Sets a prefix character.
143
  /**
143
  /**
144
   * The new prefix has effect only to tokens returned later;
144
   * The new prefix has effect only to tokens returned later;
145
   * the position in the string is not affected.
145
   * the position in the string is not affected.
146
   *
146
   *
147
   * \param[in] cPrefix prefix character
147
   * \param[in] cPrefix prefix character
148
   *
148
   *
149
   * \sa SetNoPrefix()
149
   * \sa SetNoPrefix()
150
   */
150
   */
151
  inline void SetPrefix(char cPrefix)
151
  inline void SetPrefix(char cPrefix)
152
  {
152
  {
153
    m_cPrefix = cPrefix;
153
    m_cPrefix = cPrefix;
154
  }
154
  }
155
 
155
 
156
  /// Returns the prefix character.
156
  /// Returns the prefix character.
157
  /**
157
  /**
158
   * \return prefix character
158
   * \return prefix character
159
   */
159
   */
160
  inline char GetPrefix() const
160
  inline char GetPrefix() const
161
  {
161
  {
162
    return m_cPrefix;
162
    return m_cPrefix;
163
  }
163
  }
164
 
164
 
165
  /// Sets the prefix to 'no prefix'.
165
  /// Sets the prefix to 'no prefix'.
166
  /**
166
  /**
167
   * Calling this method is equivalent to SetPrefix((char) 0).
167
   * Calling this method is equivalent to SetPrefix((char) 0).
168
   *
168
   *
169
   * \sa SetPrefix()
169
   * \sa SetPrefix()
170
   */
170
   */
171
  inline void SetNoPrefix()
171
  inline void SetNoPrefix()
172
  {
172
  {
173
    SetPrefix('\0');
173
    SetPrefix('\0');
174
  }
174
  }
175
 
175
 
176
  /// Resets the tokenizer.
176
  /// Resets the tokenizer.
177
  /**
177
  /**
178
   * Re-initializes tokenizing to the start of the string.
178
   * Re-initializes tokenizing to the start of the string.
179
   */
179
   */
180
  inline void Reset()
180
  inline void Reset()
181
  {
181
  {
182
    m_pos = 0;
182
    m_pos = 0;
183
  }
183
  }
184
 
184
 
185
private:
185
private:
186
  std::string m_str;            ///< tokenized string
186
  std::string m_str;            ///< tokenized string
187
  char m_cDelim;                ///< delimiter character
187
  char m_cDelim;                ///< delimiter character
188
  char m_cPrefix;               ///< prefix character
188
  char m_cPrefix;               ///< prefix character
189
  std::string::size_type m_pos; ///< current position
189
  std::string::size_type m_pos; ///< current position
190
  std::string::size_type m_len; ///< string length
190
  std::string::size_type m_len; ///< string length
191
 
191
 
192
  /// Strips all prefix characters.
192
  /// Strips all prefix characters.
193
  /**
193
  /**
194
   * \param[in] s source string
194
   * \param[in] s source string
195
   * \param[in] cnt string length
195
   * \param[in] cnt string length
196
   * \return modified string
196
   * \return modified string
197
   */
197
   */
198
  std::string StripPrefix(const char* s, SIZE cnt);
198
  std::string StripPrefix(const char* s, SIZE cnt);
199
 
199
 
200
  /// Extracts the next token (internal method).
200
  /// Extracts the next token (internal method).
201
  /**
201
  /**
202
   * The extracted token may be empty.
202
   * The extracted token may be empty.
203
   *
203
   *
204
   * \param[out] rToken extracted token
204
   * \param[out] rToken extracted token
205
   * \param[in] fStripPrefix strip prefix characters yes/no
205
   * \param[in] fStripPrefix strip prefix characters yes/no
206
   */
206
   */
207
  void _GetNextToken(std::string& rToken, bool fStripPrefix);
207
  void _GetNextToken(std::string& rToken, bool fStripPrefix);
208
 
208
 
209
  /// Extracts the next token (internal method).
209
  /// Extracts the next token (internal method).
210
  /**
210
  /**
211
   * This method does no checking about the prefix character.
211
   * This method does no checking about the prefix character.
212
   *
212
   *
213
   * The extracted token may be empty.
213
   * The extracted token may be empty.
214
   *
214
   *
215
   * \param[out] rToken extracted token
215
   * \param[out] rToken extracted token
216
   */
216
   */
217
  void _GetNextTokenNoPrefix(std::string& rToken);
217
  void _GetNextTokenNoPrefix(std::string& rToken);
218
 
218
 
219
  /// Extracts the next token (internal method).
219
  /// Extracts the next token (internal method).
220
  /**
220
  /**
221
   * This method does checking about the prefix character.
221
   * This method does checking about the prefix character.
222
   *
222
   *
223
   * The extracted token may be empty.
223
   * The extracted token may be empty.
224
   *
224
   *
225
   * \param[out] rToken extracted token
225
   * \param[out] rToken extracted token
226
   */
226
   */
227
  void _GetNextTokenWithPrefix(std::string& rToken);
227
  void _GetNextTokenWithPrefix(std::string& rToken);
228
};
228
};
229
229
230
230
231
#endif //_STRTOK_H_
231
#endif //_STRTOK_H_
232
 
232