View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.vfs.provider;
18  
19  import org.apache.commons.vfs.FileName;
20  import org.apache.commons.vfs.FileSystemException;
21  import org.apache.commons.vfs.FileType;
22  import org.apache.commons.vfs.VFS;
23  
24  /***
25   * Utilities for dealing with URIs. See RFC 2396 for details.
26   * 
27   * @author <a href="mailto:adammurdoch@apache.org">Adam Murdoch</a>
28   * @version $Revision: 480428 $ $Date: 2005-10-13 21:11:33 +0200 (Do, 13 Okt
29   *          2005) $
30   */
31  public final class UriParser
32  {
33  	/***
34  	 * The normalised separator to use.
35  	 */
36  	private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR;
37  
38  	/***
39  	 * The set of valid separators. These are all converted to the normalised
40  	 * one. Does <i>not</i> contain the normalised separator
41  	 */
42  	// public static final char[] separators = {'//'};
43  	public static final char TRANS_SEPARATOR = '//';
44  
45  	private UriParser()
46  	{
47  	}
48  
49  	/***
50  	 * Extracts the first element of a path.
51  	 */
52  	public static String extractFirstElement(final StringBuffer name)
53  	{
54  		final int len = name.length();
55  		if (len < 1)
56  		{
57  			return null;
58  		}
59  		int startPos = 0;
60  		if (name.charAt(0) == SEPARATOR_CHAR)
61  		{
62  			startPos = 1;
63  		}
64  		for (int pos = startPos; pos < len; pos++)
65  		{
66  			if (name.charAt(pos) == SEPARATOR_CHAR)
67  			{
68  				// Found a separator
69  				final String elem = name.substring(startPos, pos);
70  				name.delete(startPos, pos + 1);
71  				return elem;
72  			}
73  		}
74  
75  		// No separator
76  		final String elem = name.substring(startPos);
77  		name.setLength(0);
78  		return elem;
79  	}
80  
81  	/***
82  	 * Normalises a path. Does the following:
83  	 * <ul>
84  	 * <li>Removes empty path elements.
85  	 * <li>Handles '.' and '..' elements.
86  	 * <li>Removes trailing separator.
87  	 * </ul>
88  	 * 
89  	 * Its assumed that the separators are already fixed.
90  	 * 
91  	 *  @see #fixSeparators
92  	 */
93  	public static FileType normalisePath(final StringBuffer path)
94  			throws FileSystemException
95  	{
96  		FileType fileType = FileType.FOLDER;
97  		if (path.length() == 0)
98  		{
99  			return fileType;
100 		}
101 
102 		if (path.charAt(path.length() - 1) != '/')
103 		{
104 			fileType = FileType.FILE;
105 		}
106 
107 		// Adjust separators
108 		// fixSeparators(path);
109 
110 		// Determine the start of the first element
111 		int startFirstElem = 0;
112 		if (path.charAt(0) == SEPARATOR_CHAR)
113 		{
114 			if (path.length() == 1)
115 			{
116 				return fileType;
117 			}
118 			startFirstElem = 1;
119 		}
120 
121 		// Iterate over each element
122 		int startElem = startFirstElem;
123 		int maxlen = path.length();
124 		while (startElem < maxlen)
125 		{
126 			// Find the end of the element
127 			int endElem = startElem;
128 			for (; endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR; endElem++)
129 			{
130 			}
131 
132 			final int elemLen = endElem - startElem;
133 			if (elemLen == 0)
134 			{
135 				// An empty element - axe it
136 				path.delete(endElem, endElem + 1);
137 				maxlen = path.length();
138 				continue;
139 			}
140 			if (elemLen == 1 && path.charAt(startElem) == '.')
141 			{
142 				// A '.' element - axe it
143 				path.delete(startElem, endElem + 1);
144 				maxlen = path.length();
145 				continue;
146 			}
147 			if (elemLen == 2 && path.charAt(startElem) == '.'
148 					&& path.charAt(startElem + 1) == '.')
149 			{
150 				// A '..' element - remove the previous element
151 				if (startElem == startFirstElem)
152 				{
153 					// Previous element is missing
154 					throw new FileSystemException(
155 							"vfs.provider/invalid-relative-path.error");
156 				}
157 
158 				// Find start of previous element
159 				int pos = startElem - 2;
160 				for (; pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR; pos--)
161 				{
162 				}
163 				startElem = pos + 1;
164 
165 				path.delete(startElem, endElem + 1);
166 				maxlen = path.length();
167 				continue;
168 			}
169 
170 			// A regular element
171 			startElem = endElem + 1;
172 		}
173 
174 		// Remove trailing separator
175 		if (!VFS.isUriStyle())
176 		{
177 			if (maxlen > 0 && path.charAt(maxlen - 1) == SEPARATOR_CHAR
178 					&& maxlen > 1)
179 			{
180 				path.delete(maxlen - 1, maxlen);
181 			}
182 		}
183 
184 		return fileType;
185 	}
186 
187 	/***
188 	 * Normalises the separators in a name.
189 	 */
190 	public static boolean fixSeparators(final StringBuffer name)
191 	{
192 		boolean changed = false;
193 		final int maxlen = name.length();
194 		for (int i = 0; i < maxlen; i++)
195 		{
196 			final char ch = name.charAt(i);
197 			if (ch == TRANS_SEPARATOR)
198 			{
199 				name.setCharAt(i, SEPARATOR_CHAR);
200 				changed = true;
201 			}
202 		}
203 		return changed;
204 	}
205 
206 	/***
207 	 * Extracts the scheme from a URI.
208 	 * 
209 	 * @param uri
210 	 *            The URI.
211 	 * @return The scheme name. Returns null if there is no scheme.
212 	 */
213 	public static String extractScheme(final String uri)
214 	{
215 		return extractScheme(uri, null);
216 	}
217 
218 	/***
219 	 * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from
220 	 * the front of the URI.
221 	 * 
222 	 * @param uri
223 	 *            The URI.
224 	 * @param buffer
225 	 *            Returns the remainder of the URI.
226 	 * @return The scheme name. Returns null if there is no scheme.
227 	 */
228 	public static String extractScheme(final String uri,
229 			final StringBuffer buffer)
230 	{
231 		if (buffer != null)
232 		{
233 			buffer.setLength(0);
234 			buffer.append(uri);
235 		}
236 
237 		final int maxPos = uri.length();
238 		for (int pos = 0; pos < maxPos; pos++)
239 		{
240 			final char ch = uri.charAt(pos);
241 
242 			if (ch == ':')
243 			{
244 				// Found the end of the scheme
245 				final String scheme = uri.substring(0, pos);
246 				if (buffer != null)
247 				{
248 					buffer.delete(0, pos + 1);
249 				}
250 				return scheme.intern();
251 			}
252 
253 			if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
254 			{
255 				// A scheme character
256 				continue;
257 			}
258 			if (pos > 0
259 					&& ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' || ch == '.'))
260 			{
261 				// A scheme character (these are not allowed as the first
262 				// character of the scheme, but can be used as subsequent
263 				// characters.
264 				continue;
265 			}
266 
267 			// Not a scheme character
268 			break;
269 		}
270 
271 		// No scheme in URI
272 		return null;
273 	}
274 
275 	/***
276 	 * Removes %nn encodings from a string.
277 	 */
278 	public static String decode(final String encodedStr)
279 			throws FileSystemException
280 	{
281 		if (encodedStr == null)
282 		{
283 			return null;
284 		}
285 		if (encodedStr.indexOf('%') < 0)
286 		{
287 			return encodedStr;
288 		}
289 		final StringBuffer buffer = new StringBuffer(encodedStr);
290 		decode(buffer, 0, buffer.length());
291 		return buffer.toString();
292 	}
293 
294 	/***
295 	 * Removes %nn encodings from a string.
296 	 */
297 	public static void decode(final StringBuffer buffer, final int offset,
298 			final int length) throws FileSystemException
299 	{
300 		int index = offset;
301 		int count = length;
302 		for (; count > 0; count--, index++)
303 		{
304 			final char ch = buffer.charAt(index);
305 			if (ch != '%')
306 			{
307 				continue;
308 			}
309 			if (count < 3)
310 			{
311 				throw new FileSystemException(
312 						"vfs.provider/invalid-escape-sequence.error", buffer
313 								.substring(index, index + count));
314 			}
315 
316 			// Decode
317 			int dig1 = Character.digit(buffer.charAt(index + 1), 16);
318 			int dig2 = Character.digit(buffer.charAt(index + 2), 16);
319 			if (dig1 == -1 || dig2 == -1)
320 			{
321 				throw new FileSystemException(
322 						"vfs.provider/invalid-escape-sequence.error", buffer
323 								.substring(index, index + 3));
324 			}
325 			char value = (char) (dig1 << 4 | dig2);
326 
327 			// Replace
328 			buffer.setCharAt(index, value);
329 			buffer.delete(index + 1, index + 3);
330 			count -= 2;
331 		}
332 	}
333 
334 	/***
335 	 * Encodes and appends a string to a StringBuffer.
336 	 */
337 	public static void appendEncoded(final StringBuffer buffer,
338 			final String unencodedValue, final char[] reserved)
339 	{
340 		final int offset = buffer.length();
341 		buffer.append(unencodedValue);
342 		encode(buffer, offset, unencodedValue.length(), reserved);
343 	}
344 
345 	/***
346 	 * Encodes a set of reserved characters in a StringBuffer, using the URI %nn
347 	 * encoding. Always encodes % characters.
348 	 */
349 	public static void encode(final StringBuffer buffer, final int offset,
350 			final int length, final char[] reserved)
351 	{
352 		int index = offset;
353 		int count = length;
354 		for (; count > 0; index++, count--)
355 		{
356 			final char ch = buffer.charAt(index);
357 			boolean match = (ch == '%');
358 			if (reserved != null)
359 			{
360 				for (int i = 0; !match && i < reserved.length; i++)
361 				{
362 					if (ch == reserved[i])
363 					{
364 						match = true;
365 					}
366 				}
367 			}
368 			if (match)
369 			{
370 				// Encode
371 				char[] digits =
372 				{ Character.forDigit(((ch >> 4) & 0xF), 16),
373 						Character.forDigit((ch & 0xF), 16) };
374 				buffer.setCharAt(index, '%');
375 				buffer.insert(index + 1, digits);
376 				index += 2;
377 			}
378 		}
379 	}
380 
381 	/***
382 	 * Removes %nn encodings from a string.
383 	 */
384 	public static String encode(final String decodedStr)
385 	{
386 		return encode(decodedStr, null);
387 	}
388 
389 	public static String encode(final String decodedStr, final char[] reserved)
390 	{
391 		if (decodedStr == null)
392 		{
393 			return null;
394 		}
395 		final StringBuffer buffer = new StringBuffer(decodedStr);
396 		encode(buffer, 0, buffer.length(), reserved);
397 		return buffer.toString();
398 	}
399 
400 	public static String[] encode(String[] strings)
401 	{
402 		if (strings == null)
403 		{
404 			return null;
405 		}
406 		for (int i = 0; i < strings.length; i++)
407 		{
408 			strings[i] = encode(strings[i]);
409 		}
410 		return strings;
411 	}
412 
413 	public static void checkUriEncoding(String uri) throws FileSystemException
414 	{
415 		decode(uri);
416 	}
417 
418 	public static void canonicalizePath(StringBuffer buffer, int offset,
419 			int length, FileNameParser fileNameParser)
420 			throws FileSystemException
421 	{
422 		int index = offset;
423 		int count = length;
424 		for (; count > 0; count--, index++)
425 		{
426 			final char ch = buffer.charAt(index);
427 			if (ch == '%')
428 			{
429 				if (count < 3)
430 				{
431 					throw new FileSystemException(
432 							"vfs.provider/invalid-escape-sequence.error",
433 							buffer.substring(index, index + count));
434 				}
435 
436 				// Decode
437 				int dig1 = Character.digit(buffer.charAt(index + 1), 16);
438 				int dig2 = Character.digit(buffer.charAt(index + 2), 16);
439 				if (dig1 == -1 || dig2 == -1)
440 				{
441 					throw new FileSystemException(
442 							"vfs.provider/invalid-escape-sequence.error",
443 							buffer.substring(index, index + 3));
444 				}
445 				char value = (char) (dig1 << 4 | dig2);
446 
447 				boolean match = (value == '%')
448 						|| (fileNameParser != null && fileNameParser
449 								.encodeCharacter(value));
450 
451 				if (match)
452 				{
453 					// this is a reserved character, not allowed to decode
454 					index += 2;
455 					count -= 2;
456 					continue;
457 				}
458 
459 				// Replace
460 				buffer.setCharAt(index, value);
461 				buffer.delete(index + 1, index + 3);
462 				count -= 2;
463 			}
464 			else if (fileNameParser.encodeCharacter(ch))
465 			{
466 				// Encode
467 				char[] digits =
468 				{ Character.forDigit(((ch >> 4) & 0xF), 16),
469 						Character.forDigit((ch & 0xF), 16) };
470 				buffer.setCharAt(index, '%');
471 				buffer.insert(index + 1, digits);
472 				index += 2;
473 			}
474 		}
475 	}
476 
477 	public static String extractQueryString(StringBuffer name)
478 	{
479 		for (int pos = 0; pos < name.length(); pos++)
480 		{
481 			if (name.charAt(pos) == '?')
482 			{
483 				String queryString = name.substring(pos + 1);
484 				name.delete(pos, name.length());
485 				return queryString;
486 			}
487 		}
488 
489 		return null;
490 	}
491 }