View Javadoc
1   /*
2    * Copyright (C) 2009 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package com.google.common.escape;
18  
19  import com.google.common.annotations.GwtCompatible;
20  import com.google.common.collect.ImmutableMap;
21  import com.google.common.escape.testing.EscaperAsserts;
22  
23  import junit.framework.TestCase;
24  
25  import java.io.IOException;
26  import java.util.Map;
27  
28  /**
29   * @author David Beaumont
30   */
31  @GwtCompatible
32  public class ArrayBasedUnicodeEscaperTest extends TestCase {
33    private static final Map<Character, String> NO_REPLACEMENTS =
34        ImmutableMap.of();
35    private static final Map<Character, String> SIMPLE_REPLACEMENTS =
36        ImmutableMap.of(
37            '\n', "<newline>",
38            '\t', "<tab>",
39            '&', "<and>");
40    private static final char[] NO_CHARS = new char[0];
41  
42    public void testReplacements() throws IOException {
43      // In reality this is not a very sensible escaper to have (if you are only
44      // escaping elements from a map you would use a ArrayBasedCharEscaper).
45      UnicodeEscaper escaper = new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS,
46          Character.MIN_VALUE, Character.MAX_CODE_POINT, null) {
47            @Override protected char[] escapeUnsafe(int c) {
48              return NO_CHARS;
49            }
50      };
51      EscaperAsserts.assertBasic(escaper);
52      assertEquals("<tab>Fish <and> Chips<newline>",
53          escaper.escape("\tFish & Chips\n"));
54  
55      // Verify that everything else is left unescaped.
56      String safeChars = "\0\u0100\uD800\uDC00\uFFFF";
57      assertEquals(safeChars, escaper.escape(safeChars));
58  
59      // Ensure that Unicode escapers behave correctly wrt badly formed input.
60      String badUnicode = "\uDC00\uD800";
61      try {
62        escaper.escape(badUnicode);
63        fail("should fail for bad Unicode");
64      } catch (IllegalArgumentException e) {
65        // Pass
66      }
67    }
68  
69    public void testSafeRange() throws IOException {
70      // Basic escaping of unsafe chars (wrap them in {,}'s)
71      UnicodeEscaper wrappingEscaper =
72          new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) {
73            @Override protected char[] escapeUnsafe(int c) {
74              return ("{" + (char) c + "}").toCharArray();
75            }
76          };
77      EscaperAsserts.assertBasic(wrappingEscaper);
78      // '[' and '@' lie either side of [A-Z].
79      assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]"));
80    }
81  
82    public void testDeleteUnsafeChars() throws IOException {
83      UnicodeEscaper deletingEscaper =
84          new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) {
85            @Override protected char[] escapeUnsafe(int c) {
86              return NO_CHARS;
87            }
88          };
89      EscaperAsserts.assertBasic(deletingEscaper);
90      assertEquals("Everything outside the printable ASCII range is deleted.",
91          deletingEscaper.escape("\tEverything\0 outside the\uD800\uDC00 " +
92              "printable ASCII \uFFFFrange is \u007Fdeleted.\n"));
93    }
94  
95    public void testReplacementPriority() throws IOException {
96      UnicodeEscaper replacingEscaper =
97          new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) {
98            private final char[] unknown = new char[] { '?' };
99            @Override protected char[] escapeUnsafe(int c) {
100             return unknown;
101           }
102         };
103     EscaperAsserts.assertBasic(replacingEscaper);
104 
105     // Replacements are applied first regardless of whether the character is in
106     // the safe range or not ('&' is a safe char while '\t' and '\n' are not).
107     assertEquals("<tab>Fish <and>? Chips?<newline>",
108         replacingEscaper.escape("\tFish &\0 Chips\r\n"));
109   }
110 
111   public void testCodePointsFromSurrogatePairs() throws IOException {
112     UnicodeEscaper surrogateEscaper =
113         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) {
114           private final char[] escaped = new char[] { 'X' };
115           @Override protected char[] escapeUnsafe(int c) {
116             return escaped;
117           }
118         };
119     EscaperAsserts.assertBasic(surrogateEscaper);
120 
121     // A surrogate pair defining a code point within the safe range.
122     String safeInput = "\uD800\uDC00";  // 0x10000
123     assertEquals(safeInput, surrogateEscaper.escape(safeInput));
124 
125     // A surrogate pair defining a code point outside the safe range (but both
126     // of the surrogate characters lie within the safe range). It is important
127     // not to accidentally treat this as a sequence of safe characters.
128     String unsafeInput = "\uDBFF\uDFFF";  // 0x10FFFF
129     assertEquals("X", surrogateEscaper.escape(unsafeInput));
130   }
131 }