View Javadoc
1   /*
2    * Copyright 2002-2014 the original author or authors.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.springframework.web.util;
18  
19  import java.io.UnsupportedEncodingException;
20  import java.net.URLDecoder;
21  import java.util.LinkedHashMap;
22  import java.util.Map;
23  import java.util.Map.Entry;
24  import java.util.Properties;
25  import javax.servlet.http.HttpServletRequest;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  
30  import org.springframework.util.LinkedMultiValueMap;
31  import org.springframework.util.MultiValueMap;
32  import org.springframework.util.StringUtils;
33  
34  /**
35   * Helper class for URL path matching. Provides support for URL paths in
36   * RequestDispatcher includes and support for consistent URL decoding.
37   *
38   * <p>Used by {@link org.springframework.web.servlet.handler.AbstractUrlHandlerMapping},
39   * {@link org.springframework.web.servlet.mvc.multiaction.AbstractUrlMethodNameResolver}
40   * and {@link org.springframework.web.servlet.support.RequestContext} for path matching
41   * and/or URI determination.
42   *
43   * @author Juergen Hoeller
44   * @author Rob Harrop
45   * @author Rossen Stoyanchev
46   * @since 14.01.2004
47   */
48  public class UrlPathHelper {
49  
50  	/**
51  	 * Special WebSphere request attribute, indicating the original request URI.
52  	 * Preferable over the standard Servlet 2.4 forward attribute on WebSphere,
53  	 * simply because we need the very first URI in the request forwarding chain.
54  	 */
55  	private static final String WEBSPHERE_URI_ATTRIBUTE = "com.ibm.websphere.servlet.uri_non_decoded";
56  
57  	private static final Log logger = LogFactory.getLog(UrlPathHelper.class);
58  
59  	static volatile Boolean websphereComplianceFlag;
60  
61  
62  	private boolean alwaysUseFullPath = false;
63  
64  	private boolean urlDecode = true;
65  
66  	private boolean removeSemicolonContent = true;
67  
68  	private String defaultEncoding = WebUtils.DEFAULT_CHARACTER_ENCODING;
69  
70  
71  	/**
72  	 * Set if URL lookup should always use full path within current servlet
73  	 * context. Else, the path within the current servlet mapping is used
74  	 * if applicable (i.e. in the case of a ".../*" servlet mapping in web.xml).
75  	 * Default is "false".
76  	 */
77  	public void setAlwaysUseFullPath(boolean alwaysUseFullPath) {
78  		this.alwaysUseFullPath = alwaysUseFullPath;
79  	}
80  
81  	/**
82  	 * Set if context path and request URI should be URL-decoded.
83  	 * Both are returned <i>undecoded</i> by the Servlet API,
84  	 * in contrast to the servlet path.
85  	 * <p>Uses either the request encoding or the default encoding according
86  	 * to the Servlet spec (ISO-8859-1).
87  	 * <p>Default is "true", as of Spring 2.5.
88  	 * @see #getServletPath
89  	 * @see #getContextPath
90  	 * @see #getRequestUri
91  	 * @see WebUtils#DEFAULT_CHARACTER_ENCODING
92  	 * @see javax.servlet.ServletRequest#getCharacterEncoding()
93  	 * @see java.net.URLDecoder#decode(String, String)
94  	 */
95  	public void setUrlDecode(boolean urlDecode) {
96  		this.urlDecode = urlDecode;
97  	}
98  
99  	/**
100 	 * Set if ";" (semicolon) content should be stripped from the request URI.
101 	 * <p>Default is "true".
102 	 */
103 	public void setRemoveSemicolonContent(boolean removeSemicolonContent) {
104 		this.removeSemicolonContent = removeSemicolonContent;
105 	}
106 
107 	/**
108 	 * Whether configured to remove ";" (semicolon) content from the request URI.
109 	 */
110 	public boolean shouldRemoveSemicolonContent() {
111 		return this.removeSemicolonContent;
112 	}
113 
114 	/**
115 	 * Set the default character encoding to use for URL decoding.
116 	 * Default is ISO-8859-1, according to the Servlet spec.
117 	 * <p>If the request specifies a character encoding itself, the request
118 	 * encoding will override this setting. This also allows for generically
119 	 * overriding the character encoding in a filter that invokes the
120 	 * {@code ServletRequest.setCharacterEncoding} method.
121 	 * @param defaultEncoding the character encoding to use
122 	 * @see #determineEncoding
123 	 * @see javax.servlet.ServletRequest#getCharacterEncoding()
124 	 * @see javax.servlet.ServletRequest#setCharacterEncoding(String)
125 	 * @see WebUtils#DEFAULT_CHARACTER_ENCODING
126 	 */
127 	public void setDefaultEncoding(String defaultEncoding) {
128 		this.defaultEncoding = defaultEncoding;
129 	}
130 
131 	/**
132 	 * Return the default character encoding to use for URL decoding.
133 	 */
134 	protected String getDefaultEncoding() {
135 		return this.defaultEncoding;
136 	}
137 
138 
139 	/**
140 	 * Return the mapping lookup path for the given request, within the current
141 	 * servlet mapping if applicable, else within the web application.
142 	 * <p>Detects include request URL if called within a RequestDispatcher include.
143 	 * @param request current HTTP request
144 	 * @return the lookup path
145 	 * @see #getPathWithinApplication
146 	 * @see #getPathWithinServletMapping
147 	 */
148 	public String getLookupPathForRequest(HttpServletRequest request) {
149 		// Always use full path within current servlet context?
150 		if (this.alwaysUseFullPath) {
151 			return getPathWithinApplication(request);
152 		}
153 		// Else, use path within current servlet mapping if applicable
154 		String rest = getPathWithinServletMapping(request);
155 		if (!"".equals(rest)) {
156 			return rest;
157 		}
158 		else {
159 			return getPathWithinApplication(request);
160 		}
161 	}
162 
163 	/**
164 	 * Return the path within the servlet mapping for the given request,
165 	 * i.e. the part of the request's URL beyond the part that called the servlet,
166 	 * or "" if the whole URL has been used to identify the servlet.
167 	 * <p>Detects include request URL if called within a RequestDispatcher include.
168 	 * <p>E.g.: servlet mapping = "/test/*"; request URI = "/test/a" -> "/a".
169 	 * <p>E.g.: servlet mapping = "/test"; request URI = "/test" -> "".
170 	 * <p>E.g.: servlet mapping = "/*.test"; request URI = "/a.test" -> "".
171 	 * @param request current HTTP request
172 	 * @return the path within the servlet mapping, or ""
173 	 */
174 	public String getPathWithinServletMapping(HttpServletRequest request) {
175 		String pathWithinApp = getPathWithinApplication(request);
176 		String servletPath = getServletPath(request);
177 		String path = getRemainingPath(pathWithinApp, servletPath, false);
178 		if (path != null) {
179 			// Normal case: URI contains servlet path.
180 			return path;
181 		}
182 		else {
183 			// Special case: URI is different from servlet path.
184 			String pathInfo = request.getPathInfo();
185 			if (pathInfo != null) {
186 				// Use path info if available. Indicates index page within a servlet mapping?
187 				// e.g. with index page: URI="/", servletPath="/index.html"
188 				return pathInfo;
189 			}
190 			if (!this.urlDecode) {
191 				// No path info... (not mapped by prefix, nor by extension, nor "/*")
192 				// For the default servlet mapping (i.e. "/"), urlDecode=false can
193 				// cause issues since getServletPath() returns a decoded path.
194 				// If decoding pathWithinApp yields a match just use pathWithinApp.
195 				path = getRemainingPath(decodeInternal(request, pathWithinApp), servletPath, false);
196 				if (path != null) {
197 					return pathWithinApp;
198 				}
199 			}
200 			// Otherwise, use the full servlet path.
201 			return servletPath;
202 		}
203 	}
204 
205 	/**
206 	 * Return the path within the web application for the given request.
207 	 * <p>Detects include request URL if called within a RequestDispatcher include.
208 	 * @param request current HTTP request
209 	 * @return the path within the web application
210 	 */
211 	public String getPathWithinApplication(HttpServletRequest request) {
212 		String contextPath = getContextPath(request);
213 		String requestUri = getRequestUri(request);
214 		String path = getRemainingPath(requestUri, contextPath, true);
215 		if (path != null) {
216 			// Normal case: URI contains context path.
217 			return (StringUtils.hasText(path) ? path : "/");
218 		}
219 		else {
220 			return requestUri;
221 		}
222 	}
223 
224 	/**
225 	 * Match the given "mapping" to the start of the "requestUri" and if there
226 	 * is a match return the extra part. This method is needed because the
227 	 * context path and the servlet path returned by the HttpServletRequest are
228 	 * stripped of semicolon content unlike the requesUri.
229 	 */
230 	private String getRemainingPath(String requestUri, String mapping, boolean ignoreCase) {
231 		int index1 = 0;
232 		int index2 = 0;
233 		for (; (index1 < requestUri.length()) && (index2 < mapping.length()); index1++, index2++) {
234 			char c1 = requestUri.charAt(index1);
235 			char c2 = mapping.charAt(index2);
236 			if (c1 == ';') {
237 				index1 = requestUri.indexOf('/', index1);
238 				if (index1 == -1) {
239 					return null;
240 				}
241 				c1 = requestUri.charAt(index1);
242 			}
243 			if (c1 == c2) {
244 				continue;
245 			}
246 			if (ignoreCase && (Character.toLowerCase(c1) == Character.toLowerCase(c2))) {
247 				continue;
248 			}
249 			return null;
250 		}
251 		if (index2 != mapping.length()) {
252 			return null;
253 		}
254 		if (index1 == requestUri.length()) {
255 			return "";
256 		}
257 		else if (requestUri.charAt(index1) == ';') {
258 			index1 = requestUri.indexOf('/', index1);
259 		}
260 		return (index1 != -1 ? requestUri.substring(index1) : "");
261 	}
262 
263 	/**
264 	 * Return the request URI for the given request, detecting an include request
265 	 * URL if called within a RequestDispatcher include.
266 	 * <p>As the value returned by {@code request.getRequestURI()} is <i>not</i>
267 	 * decoded by the servlet container, this method will decode it.
268 	 * <p>The URI that the web container resolves <i>should</i> be correct, but some
269 	 * containers like JBoss/Jetty incorrectly include ";" strings like ";jsessionid"
270 	 * in the URI. This method cuts off such incorrect appendices.
271 	 * @param request current HTTP request
272 	 * @return the request URI
273 	 */
274 	public String getRequestUri(HttpServletRequest request) {
275 		String uri = (String) request.getAttribute(WebUtils.INCLUDE_REQUEST_URI_ATTRIBUTE);
276 		if (uri == null) {
277 			uri = request.getRequestURI();
278 		}
279 		return decodeAndCleanUriString(request, uri);
280 	}
281 
282 	/**
283 	 * Return the context path for the given request, detecting an include request
284 	 * URL if called within a RequestDispatcher include.
285 	 * <p>As the value returned by {@code request.getContextPath()} is <i>not</i>
286 	 * decoded by the servlet container, this method will decode it.
287 	 * @param request current HTTP request
288 	 * @return the context path
289 	 */
290 	public String getContextPath(HttpServletRequest request) {
291 		String contextPath = (String) request.getAttribute(WebUtils.INCLUDE_CONTEXT_PATH_ATTRIBUTE);
292 		if (contextPath == null) {
293 			contextPath = request.getContextPath();
294 		}
295 		if ("/".equals(contextPath)) {
296 			// Invalid case, but happens for includes on Jetty: silently adapt it.
297 			contextPath = "";
298 		}
299 		return decodeRequestString(request, contextPath);
300 	}
301 
302 	/**
303 	 * Return the servlet path for the given request, regarding an include request
304 	 * URL if called within a RequestDispatcher include.
305 	 * <p>As the value returned by {@code request.getServletPath()} is already
306 	 * decoded by the servlet container, this method will not attempt to decode it.
307 	 * @param request current HTTP request
308 	 * @return the servlet path
309 	 */
310 	public String getServletPath(HttpServletRequest request) {
311 		String servletPath = (String) request.getAttribute(WebUtils.INCLUDE_SERVLET_PATH_ATTRIBUTE);
312 		if (servletPath == null) {
313 			servletPath = request.getServletPath();
314 		}
315 		if (servletPath.length() > 1 && servletPath.endsWith("/") && shouldRemoveTrailingServletPathSlash(request)) {
316 			// On WebSphere, in non-compliant mode, for a "/foo/" case that would be "/foo"
317 			// on all other servlet containers: removing trailing slash, proceeding with
318 			// that remaining slash as final lookup path...
319 			servletPath = servletPath.substring(0, servletPath.length() - 1);
320 		}
321 		return servletPath;
322 	}
323 
324 
325 	/**
326 	 * Return the request URI for the given request. If this is a forwarded request,
327 	 * correctly resolves to the request URI of the original request.
328 	 */
329 	public String getOriginatingRequestUri(HttpServletRequest request) {
330 		String uri = (String) request.getAttribute(WEBSPHERE_URI_ATTRIBUTE);
331 		if (uri == null) {
332 			uri = (String) request.getAttribute(WebUtils.FORWARD_REQUEST_URI_ATTRIBUTE);
333 			if (uri == null) {
334 				uri = request.getRequestURI();
335 			}
336 		}
337 		return decodeAndCleanUriString(request, uri);
338 	}
339 
340 	/**
341 	 * Return the context path for the given request, detecting an include request
342 	 * URL if called within a RequestDispatcher include.
343 	 * <p>As the value returned by {@code request.getContextPath()} is <i>not</i>
344 	 * decoded by the servlet container, this method will decode it.
345 	 * @param request current HTTP request
346 	 * @return the context path
347 	 */
348 	public String getOriginatingContextPath(HttpServletRequest request) {
349 		String contextPath = (String) request.getAttribute(WebUtils.FORWARD_CONTEXT_PATH_ATTRIBUTE);
350 		if (contextPath == null) {
351 			contextPath = request.getContextPath();
352 		}
353 		return decodeRequestString(request, contextPath);
354 	}
355 
356 	/**
357 	 * Return the servlet path for the given request, detecting an include request
358 	 * URL if called within a RequestDispatcher include.
359 	 * @param request current HTTP request
360 	 * @return the servlet path
361 	 */
362 	public String getOriginatingServletPath(HttpServletRequest request) {
363 		String servletPath = (String) request.getAttribute(WebUtils.FORWARD_SERVLET_PATH_ATTRIBUTE);
364 		if (servletPath == null) {
365 			servletPath = request.getServletPath();
366 		}
367 		return servletPath;
368 	}
369 
370 	/**
371 	 * Return the query string part of the given request's URL. If this is a forwarded request,
372 	 * correctly resolves to the query string of the original request.
373 	 * @param request current HTTP request
374 	 * @return the query string
375 	 */
376 	public String getOriginatingQueryString(HttpServletRequest request) {
377 		if ((request.getAttribute(WebUtils.FORWARD_REQUEST_URI_ATTRIBUTE) != null) ||
378 			(request.getAttribute(WebUtils.ERROR_REQUEST_URI_ATTRIBUTE) != null)) {
379 			return (String) request.getAttribute(WebUtils.FORWARD_QUERY_STRING_ATTRIBUTE);
380 		}
381 		else {
382 			return request.getQueryString();
383 		}
384 	}
385 
386 	/**
387 	 * Decode the supplied URI string and strips any extraneous portion after a ';'.
388 	 */
389 	private String decodeAndCleanUriString(HttpServletRequest request, String uri) {
390 		uri = removeSemicolonContent(uri);
391 		uri = decodeRequestString(request, uri);
392 		return uri;
393 	}
394 
395 	/**
396 	 * Decode the given source string with a URLDecoder. The encoding will be taken
397 	 * from the request, falling back to the default "ISO-8859-1".
398 	 * <p>The default implementation uses {@code URLDecoder.decode(input, enc)}.
399 	 * @param request current HTTP request
400 	 * @param source the String to decode
401 	 * @return the decoded String
402 	 * @see WebUtils#DEFAULT_CHARACTER_ENCODING
403 	 * @see javax.servlet.ServletRequest#getCharacterEncoding
404 	 * @see java.net.URLDecoder#decode(String, String)
405 	 * @see java.net.URLDecoder#decode(String)
406 	 */
407 	public String decodeRequestString(HttpServletRequest request, String source) {
408 		if (this.urlDecode) {
409 			return decodeInternal(request, source);
410 		}
411 		return source;
412 	}
413 
414 	@SuppressWarnings("deprecation")
415 	private String decodeInternal(HttpServletRequest request, String source) {
416 		String enc = determineEncoding(request);
417 		try {
418 			return UriUtils.decode(source, enc);
419 		}
420 		catch (UnsupportedEncodingException ex) {
421 			if (logger.isWarnEnabled()) {
422 				logger.warn("Could not decode request string [" + source + "] with encoding '" + enc +
423 						"': falling back to platform default encoding; exception message: " + ex.getMessage());
424 			}
425 			return URLDecoder.decode(source);
426 		}
427 	}
428 
429 	/**
430 	 * Determine the encoding for the given request.
431 	 * Can be overridden in subclasses.
432 	 * <p>The default implementation checks the request encoding,
433 	 * falling back to the default encoding specified for this resolver.
434 	 * @param request current HTTP request
435 	 * @return the encoding for the request (never {@code null})
436 	 * @see javax.servlet.ServletRequest#getCharacterEncoding()
437 	 * @see #setDefaultEncoding
438 	 */
439 	protected String determineEncoding(HttpServletRequest request) {
440 		String enc = request.getCharacterEncoding();
441 		if (enc == null) {
442 			enc = getDefaultEncoding();
443 		}
444 		return enc;
445 	}
446 
447 	/**
448 	 * Remove ";" (semicolon) content from the given request URI if the
449 	 * {@linkplain #setRemoveSemicolonContent(boolean) removeSemicolonContent}
450 	 * property is set to "true". Note that "jssessionid" is always removed.
451 	 * @param requestUri the request URI string to remove ";" content from
452 	 * @return the updated URI string
453 	 */
454 	public String removeSemicolonContent(String requestUri) {
455 		return this.removeSemicolonContent ?
456 				removeSemicolonContentInternal(requestUri) : removeJsessionid(requestUri);
457 	}
458 
459 	private String removeSemicolonContentInternal(String requestUri) {
460 		int semicolonIndex = requestUri.indexOf(';');
461 		while (semicolonIndex != -1) {
462 			int slashIndex = requestUri.indexOf('/', semicolonIndex);
463 			String start = requestUri.substring(0, semicolonIndex);
464 			requestUri = (slashIndex != -1) ? start + requestUri.substring(slashIndex) : start;
465 			semicolonIndex = requestUri.indexOf(';', semicolonIndex);
466 		}
467 		return requestUri;
468 	}
469 
470 	private String removeJsessionid(String requestUri) {
471 		int startIndex = requestUri.toLowerCase().indexOf(";jsessionid=");
472 		if (startIndex != -1) {
473 			int endIndex = requestUri.indexOf(';', startIndex + 12);
474 			String start = requestUri.substring(0, startIndex);
475 			requestUri = (endIndex != -1) ? start + requestUri.substring(endIndex) : start;
476 		}
477 		return requestUri;
478 	}
479 
480 	/**
481 	 * Decode the given URI path variables via
482 	 * {@link #decodeRequestString(HttpServletRequest, String)} unless
483 	 * {@link #setUrlDecode(boolean)} is set to {@code true} in which case it is
484 	 * assumed the URL path from which the variables were extracted is already
485 	 * decoded through a call to
486 	 * {@link #getLookupPathForRequest(HttpServletRequest)}.
487 	 * @param request current HTTP request
488 	 * @param vars URI variables extracted from the URL path
489 	 * @return the same Map or a new Map instance
490 	 */
491 	public Map<String, String> decodePathVariables(HttpServletRequest request, Map<String, String> vars) {
492 		if (this.urlDecode) {
493 			return vars;
494 		}
495 		else {
496 			Map<String, String> decodedVars = new LinkedHashMap<String, String>(vars.size());
497 			for (Entry<String, String> entry : vars.entrySet()) {
498 				decodedVars.put(entry.getKey(), decodeInternal(request, entry.getValue()));
499 			}
500 			return decodedVars;
501 		}
502 	}
503 
504 	/**
505 	 * Decode the given matrix variables via
506 	 * {@link #decodeRequestString(HttpServletRequest, String)} unless
507 	 * {@link #setUrlDecode(boolean)} is set to {@code true} in which case it is
508 	 * assumed the URL path from which the variables were extracted is already
509 	 * decoded through a call to
510 	 * {@link #getLookupPathForRequest(HttpServletRequest)}.
511 	 * @param request current HTTP request
512 	 * @param vars URI variables extracted from the URL path
513 	 * @return the same Map or a new Map instance
514 	 */
515 	public MultiValueMap<String, String> decodeMatrixVariables(HttpServletRequest request, MultiValueMap<String, String> vars) {
516 		if (this.urlDecode) {
517 			return vars;
518 		}
519 		else {
520 			MultiValueMap<String, String> decodedVars = new LinkedMultiValueMap	<String, String>(vars.size());
521 			for (String key : vars.keySet()) {
522 				for (String value : vars.get(key)) {
523 					decodedVars.add(key, decodeInternal(request, value));
524 				}
525 			}
526 			return decodedVars;
527 		}
528 	}
529 
530 	private boolean shouldRemoveTrailingServletPathSlash(HttpServletRequest request) {
531 		if (request.getAttribute(WEBSPHERE_URI_ATTRIBUTE) == null) {
532 			// Regular servlet container: behaves as expected in any case,
533 			// so the trailing slash is the result of a "/" url-pattern mapping.
534 			// Don't remove that slash.
535 			return false;
536 		}
537 		if (websphereComplianceFlag == null) {
538 			ClassLoader classLoader = UrlPathHelper.class.getClassLoader();
539 			String className = "com.ibm.ws.webcontainer.WebContainer";
540 			String methodName = "getWebContainerProperties";
541 			String propName = "com.ibm.ws.webcontainer.removetrailingservletpathslash";
542 			boolean flag = false;
543 			try {
544 				Class<?> cl = classLoader.loadClass(className);
545 				Properties prop = (Properties) cl.getMethod(methodName).invoke(null);
546 				flag = Boolean.parseBoolean(prop.getProperty(propName));
547 			}
548 			catch (Throwable ex) {
549 				if (logger.isDebugEnabled()) {
550 					logger.debug("Could not introspect WebSphere web container properties: " + ex);
551 				}
552 			}
553 			websphereComplianceFlag = flag;
554 		}
555 		// Don't bother if WebSphere is configured to be fully Servlet compliant.
556 		// However, if it is not compliant, do remove the improper trailing slash!
557 		return !websphereComplianceFlag;
558 	}
559 
560 }