1 /* 2 * Copyright (C) 2006 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.escape; 18 19 import static com.google.common.base.Preconditions.checkNotNull; 20 21 import com.google.common.annotations.Beta; 22 import com.google.common.annotations.GwtCompatible; 23 24 /** 25 * An object that converts literal text into a format safe for inclusion in a particular context 26 * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the 27 * text is performed automatically by the relevant parser. 28 * 29 * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code 30 * "Foo<Bar>"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the 31 * resulting XML document is parsed, the parser API will return this text as the original literal 32 * string {@code "Foo<Bar>"}. 33 * 34 * <p>A {@code CharEscaper} instance is required to be stateless, and safe when used concurrently by 35 * multiple threads. 36 * 37 * <p>Several popular escapers are defined as constants in classes like {@link 38 * com.google.common.html.HtmlEscapers}, {@link com.google.common.xml.XmlEscapers}, and {@link 39 * SourceCodeEscapers}. To create your own escapers extend this class and implement the {@link 40 * #escape(char)} method. 41 * 42 * @author Sven Mawson 43 * @since 15.0 44 */ 45 @Beta 46 @GwtCompatible 47 public abstract class CharEscaper extends Escaper { 48 /** Constructor for use by subclasses. */ 49 protected CharEscaper() {} 50 51 /** 52 * Returns the escaped form of a given literal string. 53 * 54 * @param string the literal string to be escaped 55 * @return the escaped form of {@code string} 56 * @throws NullPointerException if {@code string} is null 57 */ 58 @Override public String escape(String string) { 59 checkNotNull(string); // GWT specific check (do not optimize) 60 // Inlineable fast-path loop which hands off to escapeSlow() only if needed 61 int length = string.length(); 62 for (int index = 0; index < length; index++) { 63 if (escape(string.charAt(index)) != null) { 64 return escapeSlow(string, index); 65 } 66 } 67 return string; 68 } 69 70 /** 71 * Returns the escaped form of a given literal string, starting at the given index. This method is 72 * called by the {@link #escape(String)} method when it discovers that escaping is required. It is 73 * protected to allow subclasses to override the fastpath escaping function to inline their 74 * escaping test. See {@link CharEscaperBuilder} for an example usage. 75 * 76 * @param s the literal string to be escaped 77 * @param index the index to start escaping from 78 * @return the escaped form of {@code string} 79 * @throws NullPointerException if {@code string} is null 80 */ 81 protected final String escapeSlow(String s, int index) { 82 int slen = s.length(); 83 84 // Get a destination buffer and setup some loop variables. 85 char[] dest = Platform.charBufferFromThreadLocal(); 86 int destSize = dest.length; 87 int destIndex = 0; 88 int lastEscape = 0; 89 90 // Loop through the rest of the string, replacing when needed into the 91 // destination buffer, which gets grown as needed as well. 92 for (; index < slen; index++) { 93 94 // Get a replacement for the current character. 95 char[] r = escape(s.charAt(index)); 96 97 // If no replacement is needed, just continue. 98 if (r == null) continue; 99 100 int rlen = r.length; 101 int charsSkipped = index - lastEscape; 102 103 // This is the size needed to add the replacement, not the full size 104 // needed by the string. We only regrow when we absolutely must, and 105 // when we do grow, grow enough to avoid excessive growing. Grow. 106 int sizeNeeded = destIndex + charsSkipped + rlen; 107 if (destSize < sizeNeeded) { 108 destSize = sizeNeeded + DEST_PAD_MULTIPLIER * (slen - index); 109 dest = growBuffer(dest, destIndex, destSize); 110 } 111 112 // If we have skipped any characters, we need to copy them now. 113 if (charsSkipped > 0) { 114 s.getChars(lastEscape, index, dest, destIndex); 115 destIndex += charsSkipped; 116 } 117 118 // Copy the replacement string into the dest buffer as needed. 119 if (rlen > 0) { 120 System.arraycopy(r, 0, dest, destIndex, rlen); 121 destIndex += rlen; 122 } 123 lastEscape = index + 1; 124 } 125 126 // Copy leftover characters if there are any. 127 int charsLeft = slen - lastEscape; 128 if (charsLeft > 0) { 129 int sizeNeeded = destIndex + charsLeft; 130 if (destSize < sizeNeeded) { 131 132 // Regrow and copy, expensive! No padding as this is the final copy. 133 dest = growBuffer(dest, destIndex, sizeNeeded); 134 } 135 s.getChars(lastEscape, slen, dest, destIndex); 136 destIndex = sizeNeeded; 137 } 138 return new String(dest, 0, destIndex); 139 } 140 141 /** 142 * Returns the escaped form of the given character, or {@code null} if this character does not 143 * need to be escaped. If an empty array is returned, this effectively strips the input character 144 * from the resulting text. 145 * 146 * <p>If the character does not need to be escaped, this method should return {@code null}, rather 147 * than a one-character array containing the character itself. This enables the escaping algorithm 148 * to perform more efficiently. 149 * 150 * <p>An escaper is expected to be able to deal with any {@code char} value, so this method should 151 * not throw any exceptions. 152 * 153 * @param c the character to escape if necessary 154 * @return the replacement characters, or {@code null} if no escaping was needed 155 */ 156 protected abstract char[] escape(char c); 157 158 /** 159 * Helper method to grow the character buffer as needed, this only happens once in a while so it's 160 * ok if it's in a method call. If the index passed in is 0 then no copying will be done. 161 */ 162 private static char[] growBuffer(char[] dest, int index, int size) { 163 char[] copy = new char[size]; 164 if (index > 0) { 165 System.arraycopy(dest, 0, copy, 0, index); 166 } 167 return copy; 168 } 169 170 /** 171 * The multiplier for padding to use when growing the escape buffer. 172 */ 173 private static final int DEST_PAD_MULTIPLIER = 2; 174 }