View Javadoc

1   // MyStringTokenizer.java, created Apr 21, 2004 7:06:28 PM 2004 by jwhaley
2   // Copyright (C) 2004 John Whaley <jwhaley@alum.mit.edu>
3   // Licensed under the terms of the GNU LGPL; see COPYING for details.
4   package jwutil.strings;
5   
6   import java.util.Enumeration;
7   import java.util.NoSuchElementException;
8   
9   /***
10   * MyStringTokenizer is like StringTokenizer, but gives you access to the string
11   * and position, and also ignores tokens inbetween quotation marks.
12   */
13  public class MyStringTokenizer implements Enumeration {
14      
15      /***
16       * Returns the string that is being tokenized.
17       * 
18       * @return  string that is being tokenized
19       */
20      public String getString() {
21          return str;
22      }
23  
24      /***
25       * Returns the current position of the tokenizer in the string.
26       * 
27       * @return  current position of the tokenizer in the string
28       */
29      public int getPosition() {
30          return currentPosition;
31      }
32      
33      private int currentPosition;
34      private int newPosition;
35      private int maxPosition;
36      private String str;
37      private String delimiters;
38      private boolean retDelims;
39      private boolean delimsChanged;
40      private char maxDelimChar;
41  
42      /***
43       * Set maxDelimChar to the highest char in the delimiter set.
44       * 
45       * see java.util.StringTokenizer#setMaxDelimChar()
46       */
47      private void setMaxDelimChar() {
48          if (delimiters == null) {
49              maxDelimChar = 0;
50              return;
51          }
52          char m = 0;
53          for (int i = 0; i < delimiters.length(); i++) {
54              char c = delimiters.charAt(i);
55              if (m < c) m = c;
56          }
57          maxDelimChar = m;
58      }
59  
60      /***
61       * Constructs a string tokenizer for the specified string. All  
62       * characters in the <code>delim</code> argument are the delimiters 
63       * for separating tokens. 
64       * <p>
65       * If the <code>returnDelims</code> flag is <code>true</code>, then 
66       * the delimiter characters are also returned as tokens. Each 
67       * delimiter is returned as a string of length one. If the flag is 
68       * <code>false</code>, the delimiter characters are skipped and only 
69       * serve as separators between tokens. 
70       * <p>
71       * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
72       * not throw an exception. However, trying to invoke other methods on the
73       * resulting <tt>StringTokenizer</tt> may result in a 
74       * <tt>NullPointerException</tt>.
75       *
76       * @param   str            a string to be parsed.
77       * @param   delim          the delimiters.
78       * @param   returnDelims   flag indicating whether to return the delimiters
79       *                         as tokens.
80       * @exception NullPointerException if str is <CODE>null</CODE>
81       * 
82       * @see java.util.StringTokenizer#StringTokenizer(java.lang.String,java.lang.String,boolean)
83       */
84      public MyStringTokenizer(String str, String delim, boolean returnDelims) {
85          currentPosition = 0;
86          newPosition = -1;
87          delimsChanged = false;
88          this.str = str;
89          maxPosition = str.length();
90          delimiters = delim;
91          retDelims = returnDelims;
92          setMaxDelimChar();
93      }
94  
95      /***
96       * Constructs a string tokenizer for the specified string. The 
97       * characters in the <code>delim</code> argument are the delimiters 
98       * for separating tokens. Delimiter characters themselves will not 
99       * be treated as tokens.
100      * <p>
101      * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
102      * not throw an exception. However, trying to invoke other methods on the
103      * resulting <tt>StringTokenizer</tt> may result in a
104      * <tt>NullPointerException</tt>.
105      *
106      * @param   str     a string to be parsed.
107      * @param   delim   the delimiters.
108      * @exception NullPointerException if str is <CODE>null</CODE>
109      * 
110      * @see java.util.StringTokenizer#StringTokenizer(java.lang.String,java.lang.String)
111      */
112     public MyStringTokenizer(String str, String delim) {
113         this(str, delim, false);
114     }
115 
116     /***
117      * Constructs a string tokenizer for the specified string. The 
118      * tokenizer uses the default delimiter set, which is 
119      * <code>"&nbsp;&#92;t&#92;n&#92;r&#92;f"</code>: the space character, 
120      * the tab character, the newline character, the carriage-return character,
121      * and the form-feed character. Delimiter characters themselves will 
122      * not be treated as tokens.
123      *
124      * @param   str   a string to be parsed.
125      * @exception NullPointerException if str is <CODE>null</CODE> 
126      * 
127      * @see java.util.StringTokenizer#StringTokenizer(java.lang.String)
128      */
129     public MyStringTokenizer(String str) {
130         this(str, " \t\n\r\f");
131     }
132 
133     /***
134      * Skips delimiters starting from the specified position. If retDelims
135      * is false, returns the index of the first non-delimiter character at or
136      * after startPos. If retDelims is true, startPos is returned.
137      * 
138      * see java.util.StringTokenizer#skipDelimiters(int)
139      */
140     private int skipDelimiters(int startPos) {
141         if (delimiters == null) throw new NullPointerException();
142         int position = startPos;
143         while (!retDelims && position < maxPosition) {
144             char c = str.charAt(position);
145             if ((c > maxDelimChar) || (delimiters.indexOf(c) < 0)) break;
146             position++;
147         }
148         return position;
149     }
150 
151     /***
152      * Skips ahead from startPos and returns the index of the next delimiter
153      * character encountered, or maxPosition if no such delimiter is found.
154      * 
155      * see java.util.StringTokenizer#scanToken(int)
156      */
157     private int scanToken(int startPos) {
158         int position = startPos;
159         boolean inString = false;
160         while (position < maxPosition) {
161             char c = str.charAt(position);
162             if (c == '"') {
163                 inString = !inString;
164             } else if (!inString
165                 && ((c <= maxDelimChar) && (delimiters.indexOf(c) >= 0))) break;
166             position++;
167         }
168         if (retDelims && (startPos == position)) {
169             char c = str.charAt(position);
170             if ((c <= maxDelimChar) && (delimiters.indexOf(c) >= 0)) position++;
171         }
172         return position;
173     }
174 
175     /***
176      * Tests if there are more tokens available from this tokenizer's string. 
177      * If this method returns <tt>true</tt>, then a subsequent call to 
178      * <tt>nextToken</tt> with no argument will successfully return a token.
179      *
180      * @return  <code>true</code> if and only if there is at least one token 
181      *          in the string after the current position; <code>false</code> 
182      *          otherwise.
183      * 
184      * @see java.util.StringTokenizer#hasMoreTokens()
185      */
186     public boolean hasMoreTokens() {
187         newPosition = skipDelimiters(currentPosition);
188         return (newPosition < maxPosition);
189     }
190 
191     /***
192      * Returns the next token from this string tokenizer.
193      *
194      * @return     the next token from this string tokenizer.
195      * @exception  NoSuchElementException  if there are no more tokens in this
196      *               tokenizer's string.
197      * 
198      * @see java.util.StringTokenizer#nextToken()
199      */
200     public String nextToken() {
201         currentPosition = (newPosition >= 0 && !delimsChanged)
202             ? newPosition
203             : skipDelimiters(currentPosition);
204         delimsChanged = false;
205         newPosition = -1;
206         if (currentPosition >= maxPosition) throw new NoSuchElementException();
207         int start = currentPosition;
208         currentPosition = scanToken(currentPosition);
209         return str.substring(start, currentPosition);
210     }
211 
212     /***
213      * Returns the next token in this string tokenizer's string. First, 
214      * the set of characters considered to be delimiters by this 
215      * <tt>StringTokenizer</tt> object is changed to be the characters in 
216      * the string <tt>delim</tt>. Then the next token in the string
217      * after the current position is returned. The current position is 
218      * advanced beyond the recognized token.  The new delimiter set 
219      * remains the default after this call. 
220      *
221      * @param      delim   the new delimiters.
222      * @return     the next token, after switching to the new delimiter set.
223      * @exception  NoSuchElementException  if there are no more tokens in this
224      *               tokenizer's string.
225      * @exception NullPointerException if delim is <CODE>null</CODE>
226      * 
227      * @see java.util.StringTokenizer#nextToken(String)
228      */
229     public String nextToken(String delim) {
230         delimiters = delim;
231         delimsChanged = true;
232         setMaxDelimChar();
233         return nextToken();
234     }
235 
236     /* (non-Javadoc)
237      * @see java.util.Enumeration#hasMoreElements()
238      */
239     public boolean hasMoreElements() {
240         return hasMoreTokens();
241     }
242 
243     /* (non-Javadoc)
244      * @see java.util.Enumeration#nextElement()
245      */
246     public Object nextElement() {
247         return nextToken();
248     }
249 
250     /***
251      * Calculates the number of times that this tokenizer's 
252      * <code>nextToken</code> method can be called before it generates an 
253      * exception. The current position is not advanced.
254      *
255      * @return  the number of tokens remaining in the string using the current
256      *          delimiter set.
257      * 
258      * @see jwutil.strings.MyStringTokenizer#nextToken()
259      * @see java.util.StringTokenizer#countTokens()
260      */
261     public int countTokens() {
262         int count = 0;
263         int currpos = currentPosition;
264         while (currpos < maxPosition) {
265             currpos = skipDelimiters(currpos);
266             if (currpos >= maxPosition) break;
267             currpos = scanToken(currpos);
268             count++;
269         }
270         return count;
271     }
272 }