View Javadoc
1   /*
2    * Copyright (C) 2011 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   */
14  
15  package com.google.common.hash;
16  
17  import static com.google.common.base.Preconditions.checkArgument;
18  import static com.google.common.base.Preconditions.checkNotNull;
19  import static com.google.common.base.Preconditions.checkState;
20  
21  import com.google.common.annotations.Beta;
22  import com.google.common.base.Preconditions;
23  import com.google.common.primitives.Ints;
24  import com.google.common.primitives.UnsignedInts;
25  
26  import java.io.Serializable;
27  import java.security.MessageDigest;
28  
29  import javax.annotation.Nullable;
30  
31  /**
32   * An immutable hash code of arbitrary bit length.
33   *
34   * @author Dimitris Andreou
35   * @author Kurt Alfred Kluever
36   * @since 11.0
37   */
38  @Beta
39  public abstract class HashCode {
40    HashCode() {}
41  
42    /**
43     * Returns the number of bits in this hash code; a positive multiple of 8.
44     */
45    public abstract int bits();
46  
47    /**
48     * Returns the first four bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to
49     * an {@code int} value in little-endian order.
50     *
51     * @throws IllegalStateException if {@code bits() < 32}
52     */
53    public abstract int asInt();
54  
55    /**
56     * Returns the first eight bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to
57     * a {@code long} value in little-endian order.
58     *
59     * @throws IllegalStateException if {@code bits() < 64}
60     */
61    public abstract long asLong();
62  
63    /**
64     * If this hashcode has enough bits, returns {@code asLong()}, otherwise returns a {@code long}
65     * value with {@code asBytes()} as the least-significant bytes and {@code 0x00} as the remaining
66     * most-significant bytes.
67     *
68     * @since 14.0 (since 11.0 as {@code Hashing.padToLong(HashCode)})
69     */
70    public abstract long padToLong();
71  
72    /**
73     * Returns the value of this hash code as a byte array. The caller may modify the byte array;
74     * changes to it will <i>not</i> be reflected in this {@code HashCode} object or any other arrays
75     * returned by this method.
76     */
77    // TODO(user): consider ByteString here, when that is available
78    public abstract byte[] asBytes();
79  
80    /**
81     * Copies bytes from this hash code into {@code dest}.
82     *
83     * @param dest the byte array into which the hash code will be written
84     * @param offset the start offset in the data
85     * @param maxLength the maximum number of bytes to write
86     * @return the number of bytes written to {@code dest}
87     * @throws IndexOutOfBoundsException if there is not enough room in {@code dest}
88     */
89    public int writeBytesTo(byte[] dest, int offset, int maxLength) {
90      maxLength = Ints.min(maxLength, bits() / 8);
91      Preconditions.checkPositionIndexes(offset, offset + maxLength, dest.length);
92      writeBytesToImpl(dest, offset, maxLength);
93      return maxLength;
94    }
95  
96    abstract void writeBytesToImpl(byte[] dest, int offset, int maxLength);
97  
98    /**
99     * Returns a mutable view of the underlying bytes for the given {@code HashCode} if it is a
100    * byte-based hashcode. Otherwise it returns {@link HashCode#asBytes}. Do <i>not</i> mutate this
101    * array or else you will break the immutability contract of {@code HashCode}.
102    */
103   byte[] getBytesInternal() {
104     return asBytes();
105   }
106 
107   /**
108    * Returns whether this {@code HashCode} and that {@code HashCode} have the same value, given that
109    * they have the same number of bits.
110    */
111   abstract boolean equalsSameBits(HashCode that);
112 
113   /**
114    * Creates a 32-bit {@code HashCode} representation of the given int value. The underlying bytes
115    * are interpreted in little endian order.
116    *
117    * @since 15.0 (since 12.0 in HashCodes)
118    */
119   public static HashCode fromInt(int hash) {
120     return new IntHashCode(hash);
121   }
122 
123   private static final class IntHashCode extends HashCode implements Serializable {
124     final int hash;
125 
126     IntHashCode(int hash) {
127       this.hash = hash;
128     }
129 
130     @Override
131     public int bits() {
132       return 32;
133     }
134 
135     @Override
136     public byte[] asBytes() {
137       return new byte[] {
138           (byte) hash,
139           (byte) (hash >> 8),
140           (byte) (hash >> 16),
141           (byte) (hash >> 24)};
142     }
143 
144     @Override
145     public int asInt() {
146       return hash;
147     }
148 
149     @Override
150     public long asLong() {
151       throw new IllegalStateException("this HashCode only has 32 bits; cannot create a long");
152     }
153 
154     @Override
155     public long padToLong() {
156       return UnsignedInts.toLong(hash);
157     }
158 
159     @Override
160     void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
161       for (int i = 0; i < maxLength; i++) {
162         dest[offset + i] = (byte) (hash >> (i * 8));
163       }
164     }
165 
166     @Override boolean equalsSameBits(HashCode that) {
167       return hash == that.asInt();
168     }
169 
170     private static final long serialVersionUID = 0;
171   }
172 
173   /**
174    * Creates a 64-bit {@code HashCode} representation of the given long value. The underlying bytes
175    * are interpreted in little endian order.
176    *
177    * @since 15.0 (since 12.0 in HashCodes)
178    */
179   public static HashCode fromLong(long hash) {
180     return new LongHashCode(hash);
181   }
182 
183   private static final class LongHashCode extends HashCode implements Serializable {
184     final long hash;
185 
186     LongHashCode(long hash) {
187       this.hash = hash;
188     }
189 
190     @Override
191     public int bits() {
192       return 64;
193     }
194 
195     @Override
196     public byte[] asBytes() {
197       return new byte[] {
198           (byte) hash,
199           (byte) (hash >> 8),
200           (byte) (hash >> 16),
201           (byte) (hash >> 24),
202           (byte) (hash >> 32),
203           (byte) (hash >> 40),
204           (byte) (hash >> 48),
205           (byte) (hash >> 56)};
206     }
207 
208     @Override
209     public int asInt() {
210       return (int) hash;
211     }
212 
213     @Override
214     public long asLong() {
215       return hash;
216     }
217 
218     @Override
219     public long padToLong() {
220       return hash;
221     }
222 
223     @Override
224     void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
225       for (int i = 0; i < maxLength; i++) {
226         dest[offset + i] = (byte) (hash >> (i * 8));
227       }
228     }
229 
230     @Override
231     boolean equalsSameBits(HashCode that) {
232       return hash == that.asLong();
233     }
234 
235     private static final long serialVersionUID = 0;
236   }
237 
238   /**
239    * Creates a {@code HashCode} from a byte array. The array is defensively copied to preserve
240    * the immutability contract of {@code HashCode}. The array cannot be empty.
241    *
242    * @since 15.0 (since 12.0 in HashCodes)
243    */
244   public static HashCode fromBytes(byte[] bytes) {
245     checkArgument(bytes.length >= 1, "A HashCode must contain at least 1 byte.");
246     return fromBytesNoCopy(bytes.clone());
247   }
248 
249   /**
250    * Creates a {@code HashCode} from a byte array. The array is <i>not</i> copied defensively,
251    * so it must be handed-off so as to preserve the immutability contract of {@code HashCode}.
252    */
253   static HashCode fromBytesNoCopy(byte[] bytes) {
254     return new BytesHashCode(bytes);
255   }
256 
257   private static final class BytesHashCode extends HashCode implements Serializable {
258     final byte[] bytes;
259 
260     BytesHashCode(byte[] bytes) {
261       this.bytes = checkNotNull(bytes);
262     }
263 
264     @Override
265     public int bits() {
266       return bytes.length * 8;
267     }
268 
269     @Override
270     public byte[] asBytes() {
271       return bytes.clone();
272     }
273 
274     @Override
275     public int asInt() {
276       checkState(bytes.length >= 4,
277           "HashCode#asInt() requires >= 4 bytes (it only has %s bytes).", bytes.length);
278       return (bytes[0] & 0xFF)
279           | ((bytes[1] & 0xFF) << 8)
280           | ((bytes[2] & 0xFF) << 16)
281           | ((bytes[3] & 0xFF) << 24);
282     }
283 
284     @Override
285     public long asLong() {
286       checkState(bytes.length >= 8,
287           "HashCode#asLong() requires >= 8 bytes (it only has %s bytes).", bytes.length);
288       return padToLong();
289     }
290 
291     @Override
292     public long padToLong() {
293       long retVal = (bytes[0] & 0xFF);
294       for (int i = 1; i < Math.min(bytes.length, 8); i++) {
295         retVal |= (bytes[i] & 0xFFL) << (i * 8);
296       }
297       return retVal;
298     }
299 
300     @Override
301     void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
302       System.arraycopy(bytes, 0, dest, offset, maxLength);
303     }
304 
305     @Override
306     byte[] getBytesInternal() {
307       return bytes;
308     }
309 
310     @Override
311     boolean equalsSameBits(HashCode that) {
312       return MessageDigest.isEqual(bytes, that.getBytesInternal());
313     }
314 
315     private static final long serialVersionUID = 0;
316   }
317 
318   /**
319    * Creates a {@code HashCode} from a hexadecimal ({@code base 16}) encoded string. The string must
320    * be at least 2 characters long, and contain only valid, lower-cased hexadecimal characters.
321    *
322    * <p>This method accepts the exact format generated by {@link #toString}. If you require more
323    * lenient {@code base 16} decoding, please use
324    * {@link com.google.common.io.BaseEncoding#decode} (and pass the result to {@link #fromBytes}).
325    *
326    * @since 15.0
327    */
328   public static HashCode fromString(String string) {
329     checkArgument(string.length() >= 2,
330         "input string (%s) must have at least 2 characters", string);
331     checkArgument(string.length() % 2 == 0,
332         "input string (%s) must have an even number of characters", string);
333 
334     byte[] bytes = new byte[string.length() / 2];
335     for (int i = 0; i < string.length(); i += 2) {
336       int ch1 = decode(string.charAt(i)) << 4;
337       int ch2 = decode(string.charAt(i + 1));
338       bytes[i / 2] = (byte) (ch1 + ch2);
339     }
340     return fromBytesNoCopy(bytes);
341   }
342 
343   private static int decode(char ch) {
344     if (ch >= '0' && ch <= '9') {
345       return ch - '0';
346     }
347     if (ch >= 'a' && ch <= 'f') {
348       return ch - 'a' + 10;
349     }
350     throw new IllegalArgumentException("Illegal hexadecimal character: " + ch);
351   }
352 
353   @Override
354   public final boolean equals(@Nullable Object object) {
355     if (object instanceof HashCode) {
356       HashCode that = (HashCode) object;
357       return bits() == that.bits() && equalsSameBits(that);
358     }
359     return false;
360   }
361 
362   /**
363    * Returns a "Java hash code" for this {@code HashCode} instance; this is well-defined
364    * (so, for example, you can safely put {@code HashCode} instances into a {@code
365    * HashSet}) but is otherwise probably not what you want to use.
366    */
367   @Override
368   public final int hashCode() {
369     // If we have at least 4 bytes (32 bits), just take the first 4 bytes. Since this is
370     // already a (presumably) high-quality hash code, any four bytes of it will do.
371     if (bits() >= 32) {
372       return asInt();
373     }
374     // If we have less than 4 bytes, use them all.
375     byte[] bytes = asBytes();
376     int val = (bytes[0] & 0xFF);
377     for (int i = 1; i < bytes.length; i++) {
378       val |= ((bytes[i] & 0xFF) << (i * 8));
379     }
380     return val;
381   }
382 
383   /**
384    * Returns a string containing each byte of {@link #asBytes}, in order, as a two-digit unsigned
385    * hexadecimal number in lower case.
386    *
387    * <p>Note that if the output is considered to be a single hexadecimal number, this hash code's
388    * bytes are the <i>big-endian</i> representation of that number. This may be surprising since
389    * everything else in the hashing API uniformly treats multibyte values as little-endian. But
390    * this format conveniently matches that of utilities such as the UNIX {@code md5sum} command.
391    *
392    * <p>To create a {@code HashCode} from its string representation, see {@link #fromString}.
393    */
394   @Override
395   public final String toString() {
396     byte[] bytes = asBytes();
397     StringBuilder sb = new StringBuilder(2 * bytes.length);
398     for (byte b : bytes) {
399       sb.append(hexDigits[(b >> 4) & 0xf]).append(hexDigits[b & 0xf]);
400     }
401     return sb.toString();
402   }
403 
404   private static final char[] hexDigits = "0123456789abcdef".toCharArray();
405 }