Rev 55 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
45 | luk | 1 | |
2 | /// string tokenizer header |
||
3 | /** |
||
4 | * \file strtok.h |
||
5 | * |
||
6 | * string tokenizer |
||
7 | * |
||
100 | luk | 8 | * Copyright (C) 2006, 2007, 2008 Lukas Jelinek, <lukas@aiken.cz> |
45 | luk | 9 | * |
10 | * This program is free software; you can redistribute it and/or |
||
11 | * modify it under the terms of one of the following licenses: |
||
12 | * |
||
13 | * \li 1. X11-style license (see LICENSE-X11) |
||
14 | * \li 2. GNU Lesser General Public License, version 2.1 (see LICENSE-LGPL) |
||
15 | * \li 3. GNU General Public License, version 2 (see LICENSE-GPL) |
||
16 | * |
||
17 | * If you want to help with choosing the best license for you, |
||
18 | * please visit http://www.gnu.org/licenses/license-list.html. |
||
19 | * |
||
20 | */ |
||
21 | |||
22 | |||
23 | #ifndef _STRTOK_H_ |
||
24 | #define _STRTOK_H_ |
||
25 | |||
26 | |||
27 | #include <string> |
||
28 | |||
55 | luk | 29 | typedef std::string::size_type SIZE; |
30 | |||
47 | luk | 31 | /// Simple string tokenizer class. |
32 | /** |
||
33 | * This class implements a string tokenizer. It splits a string |
||
34 | * by a character to a number of elements (tokens) which are |
||
35 | * provided sequentially. |
||
36 | * |
||
55 | luk | 37 | * All operations are made on a copy of the original string |
38 | * (which may be in fact a copy-on-write instance). |
||
47 | luk | 39 | * |
40 | * The original string is left unchanged. All tokens are returned |
||
41 | * as newly created strings. |
||
55 | luk | 42 | * |
43 | * There is possibility to specify a prefix character which |
||
44 | * causes the consecutive character is not considered as |
||
45 | * a delimiter. If you don't specify this character (or specify |
||
46 | * the NUL character, 0x00) this feature is disabled. The mostly |
||
47 | * used prefix is a backslash ('\'). |
||
48 | * |
||
49 | * This class is not thread-safe. |
||
50 | * |
||
51 | * Performance note: This class is currently not intended |
||
52 | * to be very fast. Speed optimizations will be done later. |
||
47 | luk | 53 | */ |
45 | luk | 54 | class StringTokenizer |
55 | { |
||
56 | public: |
||
47 | luk | 57 | /// Constructor. |
58 | /** |
||
59 | * Creates a ready-to-use tokenizer. |
||
60 | * |
||
61 | * \param[in] rStr string for tokenizing |
||
62 | * \param[in] cDelim delimiter (separator) character |
||
55 | luk | 63 | * \param[in] cPrefix character which is prepended if a |
64 | * character must not separate tokens |
||
47 | luk | 65 | */ |
55 | luk | 66 | StringTokenizer(const std::string& rStr, char cDelim = ',', char cPrefix = '\0'); |
45 | luk | 67 | |
47 | luk | 68 | /// Destructor. |
45 | luk | 69 | ~StringTokenizer() {} |
70 | |||
47 | luk | 71 | /// Checks whether the tokenizer can provide more tokens. |
72 | /** |
||
73 | * \return true = more tokens available, false = otherwise |
||
74 | */ |
||
45 | luk | 75 | inline bool HasMoreTokens() const |
76 | { |
||
77 | return m_pos < m_len; |
||
78 | } |
||
79 | |||
47 | luk | 80 | /// Returns the next token. |
81 | /** |
||
55 | luk | 82 | * If a prefix is defined it is stripped from the returned |
83 | * string (e.g. 'abc\ def' is transformed to 'abc def' |
||
84 | * while the prefix is '\'). |
||
85 | * |
||
86 | * \param[in] fSkipEmpty skip empty strings (more consecutive delimiters) |
||
47 | luk | 87 | * \return next token or "" if no more tokens available |
55 | luk | 88 | * |
89 | * \sa GetNextTokenRaw() |
||
47 | luk | 90 | */ |
55 | luk | 91 | std::string GetNextToken(bool fSkipEmpty = false); |
45 | luk | 92 | |
55 | luk | 93 | /// Returns the next token. |
94 | /** |
||
95 | * This method always returns an unmodified string even |
||
96 | * if it contains prefix characters. |
||
97 | * |
||
98 | * \param[in] fSkipEmpty skip empty strings (more consecutive delimiters) |
||
99 | * \return next token or "" if no more tokens available |
||
100 | * |
||
101 | * \sa GetNextToken() |
||
102 | */ |
||
103 | std::string GetNextTokenRaw(bool fSkipEmpty = false); |
||
104 | |||
105 | /// Returns the remainder of the source string. |
||
106 | /** |
||
107 | * This method returns everything what has not been |
||
108 | * processed (tokenized) yet and moves the current |
||
109 | * position to the end of the string. |
||
110 | * |
||
111 | * If a prefix is defined it is stripped from |
||
112 | * the returned string. |
||
113 | * |
||
114 | * \return remainder string |
||
115 | */ |
||
116 | std::string GetRemainder(); |
||
117 | |||
47 | luk | 118 | /// Sets a delimiter (separator) character. |
119 | /** |
||
120 | * The new delimiter has effect only to tokens returned later; |
||
121 | * the position in the string is not affected. |
||
122 | * |
||
55 | luk | 123 | * If you specify a NUL character (0x00) here the prefix |
124 | * will not be used. |
||
125 | * |
||
47 | luk | 126 | * \param[in] cDelim delimiter character |
127 | */ |
||
45 | luk | 128 | inline void SetDelimiter(char cDelim) |
129 | { |
||
130 | m_cDelim = cDelim; |
||
131 | } |
||
132 | |||
47 | luk | 133 | /// Returns the delimiter (separator) character. |
134 | /** |
||
135 | * \return delimiter character |
||
136 | */ |
||
45 | luk | 137 | inline char GetDelimiter() const |
138 | { |
||
139 | return m_cDelim; |
||
140 | } |
||
141 | |||
55 | luk | 142 | /// Sets a prefix character. |
143 | /** |
||
144 | * The new prefix has effect only to tokens returned later; |
||
145 | * the position in the string is not affected. |
||
146 | * |
||
147 | * \param[in] cPrefix prefix character |
||
148 | * |
||
149 | * \sa SetNoPrefix() |
||
150 | */ |
||
151 | inline void SetPrefix(char cPrefix) |
||
152 | { |
||
153 | m_cPrefix = cPrefix; |
||
154 | } |
||
155 | |||
156 | /// Returns the prefix character. |
||
157 | /** |
||
158 | * \return prefix character |
||
159 | */ |
||
160 | inline char GetPrefix() const |
||
161 | { |
||
162 | return m_cPrefix; |
||
163 | } |
||
164 | |||
165 | /// Sets the prefix to 'no prefix'. |
||
166 | /** |
||
167 | * Calling this method is equivalent to SetPrefix((char) 0). |
||
168 | * |
||
169 | * \sa SetPrefix() |
||
170 | */ |
||
171 | inline void SetNoPrefix() |
||
172 | { |
||
173 | SetPrefix('\0'); |
||
174 | } |
||
175 | |||
47 | luk | 176 | /// Resets the tokenizer. |
177 | /** |
||
178 | * Re-initializes tokenizing to the start of the string. |
||
179 | */ |
||
45 | luk | 180 | inline void Reset() |
181 | { |
||
182 | m_pos = 0; |
||
183 | } |
||
184 | |||
185 | private: |
||
47 | luk | 186 | std::string m_str; ///< tokenized string |
187 | char m_cDelim; ///< delimiter character |
||
55 | luk | 188 | char m_cPrefix; ///< prefix character |
47 | luk | 189 | std::string::size_type m_pos; ///< current position |
190 | std::string::size_type m_len; ///< string length |
||
55 | luk | 191 | |
192 | /// Strips all prefix characters. |
||
193 | /** |
||
194 | * \param[in] s source string |
||
195 | * \param[in] cnt string length |
||
196 | * \return modified string |
||
197 | */ |
||
198 | std::string StripPrefix(const char* s, SIZE cnt); |
||
199 | |||
200 | /// Extracts the next token (internal method). |
||
201 | /** |
||
202 | * The extracted token may be empty. |
||
203 | * |
||
204 | * \param[out] rToken extracted token |
||
205 | * \param[in] fStripPrefix strip prefix characters yes/no |
||
206 | */ |
||
207 | void _GetNextToken(std::string& rToken, bool fStripPrefix); |
||
208 | |||
209 | /// Extracts the next token (internal method). |
||
210 | /** |
||
211 | * This method does no checking about the prefix character. |
||
212 | * |
||
213 | * The extracted token may be empty. |
||
214 | * |
||
215 | * \param[out] rToken extracted token |
||
216 | */ |
||
217 | void _GetNextTokenNoPrefix(std::string& rToken); |
||
218 | |||
219 | /// Extracts the next token (internal method). |
||
220 | /** |
||
221 | * This method does checking about the prefix character. |
||
222 | * |
||
223 | * The extracted token may be empty. |
||
224 | * |
||
225 | * \param[out] rToken extracted token |
||
226 | */ |
||
227 | void _GetNextTokenWithPrefix(std::string& rToken); |
||
45 | luk | 228 | }; |
229 | |||
230 | |||
231 | #endif //_STRTOK_H_ |