1
2
3
4 package jwutil.strings;
5
6 import java.util.Enumeration;
7 import java.util.NoSuchElementException;
8
9 /***
10 * MyStringTokenizer is like StringTokenizer, but gives you access to the string
11 * and position, and also ignores tokens inbetween quotation marks.
12 */
13 public class MyStringTokenizer implements Enumeration {
14
15 /***
16 * Returns the string that is being tokenized.
17 *
18 * @return string that is being tokenized
19 */
20 public String getString() {
21 return str;
22 }
23
24 /***
25 * Returns the current position of the tokenizer in the string.
26 *
27 * @return current position of the tokenizer in the string
28 */
29 public int getPosition() {
30 return currentPosition;
31 }
32
33 private int currentPosition;
34 private int newPosition;
35 private int maxPosition;
36 private String str;
37 private String delimiters;
38 private boolean retDelims;
39 private boolean delimsChanged;
40 private char maxDelimChar;
41
42 /***
43 * Set maxDelimChar to the highest char in the delimiter set.
44 *
45 * see java.util.StringTokenizer#setMaxDelimChar()
46 */
47 private void setMaxDelimChar() {
48 if (delimiters == null) {
49 maxDelimChar = 0;
50 return;
51 }
52 char m = 0;
53 for (int i = 0; i < delimiters.length(); i++) {
54 char c = delimiters.charAt(i);
55 if (m < c) m = c;
56 }
57 maxDelimChar = m;
58 }
59
60 /***
61 * Constructs a string tokenizer for the specified string. All
62 * characters in the <code>delim</code> argument are the delimiters
63 * for separating tokens.
64 * <p>
65 * If the <code>returnDelims</code> flag is <code>true</code>, then
66 * the delimiter characters are also returned as tokens. Each
67 * delimiter is returned as a string of length one. If the flag is
68 * <code>false</code>, the delimiter characters are skipped and only
69 * serve as separators between tokens.
70 * <p>
71 * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
72 * not throw an exception. However, trying to invoke other methods on the
73 * resulting <tt>StringTokenizer</tt> may result in a
74 * <tt>NullPointerException</tt>.
75 *
76 * @param str a string to be parsed.
77 * @param delim the delimiters.
78 * @param returnDelims flag indicating whether to return the delimiters
79 * as tokens.
80 * @exception NullPointerException if str is <CODE>null</CODE>
81 *
82 * @see java.util.StringTokenizer#StringTokenizer(java.lang.String,java.lang.String,boolean)
83 */
84 public MyStringTokenizer(String str, String delim, boolean returnDelims) {
85 currentPosition = 0;
86 newPosition = -1;
87 delimsChanged = false;
88 this.str = str;
89 maxPosition = str.length();
90 delimiters = delim;
91 retDelims = returnDelims;
92 setMaxDelimChar();
93 }
94
95 /***
96 * Constructs a string tokenizer for the specified string. The
97 * characters in the <code>delim</code> argument are the delimiters
98 * for separating tokens. Delimiter characters themselves will not
99 * be treated as tokens.
100 * <p>
101 * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
102 * not throw an exception. However, trying to invoke other methods on the
103 * resulting <tt>StringTokenizer</tt> may result in a
104 * <tt>NullPointerException</tt>.
105 *
106 * @param str a string to be parsed.
107 * @param delim the delimiters.
108 * @exception NullPointerException if str is <CODE>null</CODE>
109 *
110 * @see java.util.StringTokenizer#StringTokenizer(java.lang.String,java.lang.String)
111 */
112 public MyStringTokenizer(String str, String delim) {
113 this(str, delim, false);
114 }
115
116 /***
117 * Constructs a string tokenizer for the specified string. The
118 * tokenizer uses the default delimiter set, which is
119 * <code>" \t\n\r\f"</code>: the space character,
120 * the tab character, the newline character, the carriage-return character,
121 * and the form-feed character. Delimiter characters themselves will
122 * not be treated as tokens.
123 *
124 * @param str a string to be parsed.
125 * @exception NullPointerException if str is <CODE>null</CODE>
126 *
127 * @see java.util.StringTokenizer#StringTokenizer(java.lang.String)
128 */
129 public MyStringTokenizer(String str) {
130 this(str, " \t\n\r\f");
131 }
132
133 /***
134 * Skips delimiters starting from the specified position. If retDelims
135 * is false, returns the index of the first non-delimiter character at or
136 * after startPos. If retDelims is true, startPos is returned.
137 *
138 * see java.util.StringTokenizer#skipDelimiters(int)
139 */
140 private int skipDelimiters(int startPos) {
141 if (delimiters == null) throw new NullPointerException();
142 int position = startPos;
143 while (!retDelims && position < maxPosition) {
144 char c = str.charAt(position);
145 if ((c > maxDelimChar) || (delimiters.indexOf(c) < 0)) break;
146 position++;
147 }
148 return position;
149 }
150
151 /***
152 * Skips ahead from startPos and returns the index of the next delimiter
153 * character encountered, or maxPosition if no such delimiter is found.
154 *
155 * see java.util.StringTokenizer#scanToken(int)
156 */
157 private int scanToken(int startPos) {
158 int position = startPos;
159 boolean inString = false;
160 while (position < maxPosition) {
161 char c = str.charAt(position);
162 if (c == '"') {
163 inString = !inString;
164 } else if (!inString
165 && ((c <= maxDelimChar) && (delimiters.indexOf(c) >= 0))) break;
166 position++;
167 }
168 if (retDelims && (startPos == position)) {
169 char c = str.charAt(position);
170 if ((c <= maxDelimChar) && (delimiters.indexOf(c) >= 0)) position++;
171 }
172 return position;
173 }
174
175 /***
176 * Tests if there are more tokens available from this tokenizer's string.
177 * If this method returns <tt>true</tt>, then a subsequent call to
178 * <tt>nextToken</tt> with no argument will successfully return a token.
179 *
180 * @return <code>true</code> if and only if there is at least one token
181 * in the string after the current position; <code>false</code>
182 * otherwise.
183 *
184 * @see java.util.StringTokenizer#hasMoreTokens()
185 */
186 public boolean hasMoreTokens() {
187 newPosition = skipDelimiters(currentPosition);
188 return (newPosition < maxPosition);
189 }
190
191 /***
192 * Returns the next token from this string tokenizer.
193 *
194 * @return the next token from this string tokenizer.
195 * @exception NoSuchElementException if there are no more tokens in this
196 * tokenizer's string.
197 *
198 * @see java.util.StringTokenizer#nextToken()
199 */
200 public String nextToken() {
201 currentPosition = (newPosition >= 0 && !delimsChanged)
202 ? newPosition
203 : skipDelimiters(currentPosition);
204 delimsChanged = false;
205 newPosition = -1;
206 if (currentPosition >= maxPosition) throw new NoSuchElementException();
207 int start = currentPosition;
208 currentPosition = scanToken(currentPosition);
209 return str.substring(start, currentPosition);
210 }
211
212 /***
213 * Returns the next token in this string tokenizer's string. First,
214 * the set of characters considered to be delimiters by this
215 * <tt>StringTokenizer</tt> object is changed to be the characters in
216 * the string <tt>delim</tt>. Then the next token in the string
217 * after the current position is returned. The current position is
218 * advanced beyond the recognized token. The new delimiter set
219 * remains the default after this call.
220 *
221 * @param delim the new delimiters.
222 * @return the next token, after switching to the new delimiter set.
223 * @exception NoSuchElementException if there are no more tokens in this
224 * tokenizer's string.
225 * @exception NullPointerException if delim is <CODE>null</CODE>
226 *
227 * @see java.util.StringTokenizer#nextToken(String)
228 */
229 public String nextToken(String delim) {
230 delimiters = delim;
231 delimsChanged = true;
232 setMaxDelimChar();
233 return nextToken();
234 }
235
236
237
238
239 public boolean hasMoreElements() {
240 return hasMoreTokens();
241 }
242
243
244
245
246 public Object nextElement() {
247 return nextToken();
248 }
249
250 /***
251 * Calculates the number of times that this tokenizer's
252 * <code>nextToken</code> method can be called before it generates an
253 * exception. The current position is not advanced.
254 *
255 * @return the number of tokens remaining in the string using the current
256 * delimiter set.
257 *
258 * @see jwutil.strings.MyStringTokenizer#nextToken()
259 * @see java.util.StringTokenizer#countTokens()
260 */
261 public int countTokens() {
262 int count = 0;
263 int currpos = currentPosition;
264 while (currpos < maxPosition) {
265 currpos = skipDelimiters(currpos);
266 if (currpos >= maxPosition) break;
267 currpos = scanToken(currpos);
268 count++;
269 }
270 return count;
271 }
272 }