View Javadoc

1   
2   package org.paneris.util;
3    
4   
5   /**
6    * This is a complete butchery of java.util.StringTokenizer, 
7    * it needs sorting out, but appears to process lines in a .csv file correctly.
8    */
9   
10  public class CSVStringTokenizer implements java.util.Enumeration {
11      private int currentPosition;
12      private int maxPosition;
13      private String str;
14      private String delimiters;
15      private boolean retTokens;
16  
17  
18      /**
19       * Constructs a string tokenizer for the specified string. The 
20       * characters in the <code>delim</code> argument are the delimiters 
21       * for separating tokens. 
22       * <p>
23       * If the <code>returnTokens</code> flag is <code>true</code>, then 
24       * the delimiter characters are also returned as tokens. Each 
25       * delimiter is returned as a string of length one. If the flag is 
26       * <code>false</code>, the delimiter characters are skipped and only 
27       * serve as separators between tokens. 
28       *
29       * @param   str            a string to be parsed.
30       * @param   delim          the delimiters.
31       * @param   returnTokens   flag indicating whether to return the delimiters
32       *                         as tokens.
33       * @since   JDK1.0
34       */
35      public CSVStringTokenizer(String str, String delim, boolean returnTokens) {
36        currentPosition = 0;
37        this.str = str;
38        maxPosition = str.length();
39        delimiters = delim;
40        retTokens = returnTokens;
41      }
42  
43      /**
44       * Constructs a string tokenizer for the specified string. The 
45       * characters in the <code>delim</code> argument are the delimiters 
46       * for separating tokens. 
47       *
48       * @param   str     a string to be parsed.
49       * @param   delim   the delimiters.
50       * @since   JDK1.0
51       */
52      public CSVStringTokenizer(String str, String delim) {
53        this(str, delim, false);
54      }
55  
56      /**
57       * Constructs a string tokenizer for the specified string. The 
58       * tokenizer uses the default delimiter set, which is 
59       * <code>"&#92;t&#92;n&#92;r"</code>: the space character, the tab
60       * character, the newline character, and the carriage-return character. 
61       *
62       * @param   str   a string to be parsed.
63       * @since   JDK1.0
64       */
65      public CSVStringTokenizer(String str) {
66        this(str, " \t\n\r", false);
67      }
68  
69      /**
70       * Skips delimiters.
71       */
72  /*
73      private void skipDelimiters() {
74    while (!retTokens &&
75             (currentPosition < maxPosition) &&
76           (delimiters.indexOf(str.charAt(currentPosition)) >= 0)) {
77        currentPosition++;
78    }
79      }
80  */
81      /**
82       * Skips delimiter.
83       */
84  /*
85       private void skipDelimiter() {
86    if (!retTokens &&
87             (currentPosition < maxPosition) &&
88           (delimiters.indexOf(str.charAt(currentPosition)) >= 0)) {
89        currentPosition++;
90    }
91      }
92  */
93      /**
94       * Tests if there are more tokens available from this tokenizer's string.
95       *
96       * @return  <code>true</code> if there are more tokens available from this
97       *          tokenizer's string; <code>false</code> otherwise.
98       * @since   JDK1.0
99       */
100     public boolean hasMoreTokens() {
101 //  skipDelimiters();
102       return (currentPosition < maxPosition);
103     }
104 
105     /**
106      * Returns the next token from this string tokenizer.
107      *
108      * @return     the next token from this string tokenizer.
109      * @exception  NoSuchElementException  if there are no more tokens in this
110      *               tokenizer's string.
111      * @since      JDK1.0
112      */
113     public String nextToken() {
114       // always move forward (as long as we have already started
115       if ((currentPosition > 0) || (maxPosition == 0)){    
116         currentPosition++;
117       }
118       if (currentPosition >= maxPosition) {
119         return "";
120       }
121 
122       int start = currentPosition;
123       while ((currentPosition < maxPosition) && 
124              (delimiters.indexOf(str.charAt(currentPosition)) < 0)) {
125         currentPosition++;
126       }
127       return str.substring(start, currentPosition);
128     }
129 
130     /**
131      * Returns the next token in this string tokenizer's string. The new 
132      * delimiter set remains the default after this call. 
133      *
134      * @param      delim   the new delimiters.
135      * @return     the next token, after switching to the new delimiter set.
136      * @exception  NoSuchElementException  if there are no more tokens in this
137      *               tokenizer's string.
138      * @since   JDK1.0
139      */
140     public String nextToken(String delim) {
141       delimiters = delim;
142       return nextToken();
143     }
144 
145     /**
146      * Returns the same value as the <code>hasMoreTokens</code>
147      * method. It exists so that this class can implement the
148      * <code>Enumeration</code> interface. 
149      *
150      * @return  <code>true</code> if there are more tokens;
151      *          <code>false</code> otherwise.
152      * @see     java.util.Enumeration
153      * @see     java.util.StringTokenizer#hasMoreTokens()
154      * @since   JDK1.0
155      */
156     public boolean hasMoreElements() {
157       return hasMoreTokens();
158     }
159 
160     /**
161      * Returns the same value as the <code>nextToken</code> method,
162      * except that its declared return value is <code>Object</code> rather than
163      * <code>String</code>. It exists so that this class can implement the
164      * <code>Enumeration</code> interface. 
165      *
166      * @return     the next token in the string.
167      * @exception  NoSuchElementException  if there are no more tokens in this
168      *               tokenizer's string.
169      * @see        java.util.Enumeration
170      * @see        java.util.StringTokenizer#nextToken()
171      * @since      JDK1.0
172      */
173     public Object nextElement() {
174       return nextToken();
175     }
176 
177     /**
178      * Calculates the number of times that this tokenizer's 
179      * <code>nextToken</code> method can be called before it generates an 
180      * exception. 
181      *
182      * @return  the number of tokens remaining in the string using the current
183      *          delimiter set.
184      * @see     java.util.StringTokenizer#nextToken()
185      * @since   JDK1.0
186      */
187     public int countTokens() {
188       int count = 0;
189       int currpos = currentPosition;
190 
191       while (currpos < maxPosition) {
192       /*
193        * This is just skipDelimiters(); but it does not affect
194        * currentPosition.
195        */
196       while (!retTokens &&
197              (currpos < maxPosition) &&
198              (delimiters.indexOf(str.charAt(currpos)) >= 0)) {
199         currpos++;
200       }
201 
202       if (currpos >= maxPosition) {
203         break;
204       }
205 
206       int start = currpos;
207       while ((currpos < maxPosition) && 
208              (delimiters.indexOf(str.charAt(currpos)) < 0)) {
209         currpos++;
210       }
211       if (retTokens && (start == currpos) &&
212           (delimiters.indexOf(str.charAt(currpos)) >= 0)) {
213         currpos++;
214       }
215       count++;
216 
217     }
218     return count;
219   }
220 }
221