001    /*
002     *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003     *
004     *  This file is licensed to You under the Eclipse Public License (EPL);
005     *  You may not use this file except in compliance with the License. You
006     *  may obtain a copy of the License at
007     *
008     *      http://www.opensource.org/licenses/eclipse-1.0.php
009     *
010     *  See the COPYRIGHT.txt file distributed with this work for information
011     *  regarding copyright ownership.
012     */
013    package org.jikesrvm.classloader;
014    
015    import static org.jikesrvm.classloader.ClassLoaderConstants.ArrayTypeCode;
016    import static org.jikesrvm.classloader.ClassLoaderConstants.BooleanTypeCode;
017    import static org.jikesrvm.classloader.ClassLoaderConstants.ByteTypeCode;
018    import static org.jikesrvm.classloader.ClassLoaderConstants.CharTypeCode;
019    import static org.jikesrvm.classloader.ClassLoaderConstants.ClassTypeCode;
020    import static org.jikesrvm.classloader.ClassLoaderConstants.DoubleTypeCode;
021    import static org.jikesrvm.classloader.ClassLoaderConstants.FloatTypeCode;
022    import static org.jikesrvm.classloader.ClassLoaderConstants.IntTypeCode;
023    import static org.jikesrvm.classloader.ClassLoaderConstants.LongTypeCode;
024    import static org.jikesrvm.classloader.ClassLoaderConstants.ShortTypeCode;
025    import static org.jikesrvm.classloader.ClassLoaderConstants.VoidTypeCode;
026    
027    import java.io.UTFDataFormatException;
028    import java.lang.ref.WeakReference;
029    import java.util.WeakHashMap;
030    
031    import org.jikesrvm.VM;
032    import org.jikesrvm.runtime.Statics;
033    import org.jikesrvm.util.ImmutableEntryHashMapRVM;
034    import org.jikesrvm.util.StringUtilities;
035    import org.vmmagic.pragma.Pure;
036    import org.vmmagic.pragma.Uninterruptible;
037    import org.vmmagic.unboxed.Offset;
038    
039    /**
040     * An  utf8-encoded byte string.
041     * <p>
042     * Atom's are interned (canonicalized)
043     * so they may be compared for equality using the "==" operator.
044     * <p>
045     * Atoms are used to represent names, descriptors, and string literals
046     * appearing in a class's constant pool.
047     * <p>
048     * There is almost always a zero-length Atom, since any class which
049     * contains statements like:
050     * <pre>
051     *          return "";
052     * </pre>
053     * will have one in its constant pool.
054     */
055    public final class Atom {
056    
057      /**
058       * Used to canonicalize Atoms: possibly non-canonical Atom => Atom
059       */
060      private static final ImmutableEntryHashMapRVM<Atom, Atom> dictionary =
061        new ImmutableEntryHashMapRVM<Atom, Atom>(12000);
062    
063      /**
064       * 2^LOG_ROW_SIZE is the number of elements per row
065       */
066      private static final int LOG_ROW_SIZE = 10;
067      /**
068       * Mask to ascertain row from id number
069       */
070      private static final int ROW_MASK = (1 << LOG_ROW_SIZE)-1;
071      /**
072       * Dictionary of all Atom instances.
073       */
074      private static Atom[][] atoms = new Atom[36][1 << LOG_ROW_SIZE];
075    
076      /**
077       * Used to assign ids. Don't use id 0 to allow clients to use id 0 as a 'null'.
078       */
079      private static int nextId = 1;
080    
081      /**
082       * A reference to either a unicode String encoding the atom, an offset in the
083       * JTOC holding a unicode string encoding the atom or null.
084       */
085      private Object unicodeStringOrJTOCoffset;
086    
087      /**
088       * The utf8 value this atom represents
089       */
090      private final byte[] val;
091    
092      /**
093       * The id of this atom
094       */
095      private final int id;
096    
097      /**
098       *@return the id of this atom.
099       */
100      int getId() { return id; }
101    
102      /**
103       * Find or create an atom.
104       * @param str atom value, as string literal whose characters are unicode
105       * @return atom
106       */
107      @Pure
108      public static Atom findOrCreateUnicodeAtom(String str) {
109        return findOrCreate(null, true, str);
110      }
111    
112      /**
113       * Find an atom.
114       * @param str atom value, as string literal whose characters are unicode
115       * @return atom or null if it doesn't already exist
116       */
117      public static Atom findUnicodeAtom(String str) {
118        return findOrCreate(null, false, str);
119      }
120    
121      /**
122       * Find or create an atom.
123       * @param str atom value, as string literal whose characters are from
124       *            ascii subset of unicode (not including null)
125       * @return atom
126       */
127      @Pure
128      public static Atom findOrCreateAsciiAtom(String str) {
129        return findOrCreate(null, true, str);
130      }
131    
132      /**
133       * Find an atom.
134       * @param str atom value, as string literal whose characters are from
135       *            ascii subset of unicode (not including null)
136       * @return atom or null if it doesn't already exist
137       */
138      public static Atom findAsciiAtom(String str) {
139        return findOrCreate(null, false, str);
140      }
141    
142      /**
143       * Find or create an atom.
144       * @param utf8 atom value, as utf8 encoded bytes
145       * @return atom
146       */
147      @Pure
148      public static Atom findOrCreateUtf8Atom(byte[] utf8) {
149        return findOrCreate(utf8, true, null);
150      }
151    
152      /**
153       * Find an atom.
154       * @param utf8 atom value, as utf8 encoded bytes
155       * @return atom or null it it doesn't already exist
156       */
157      public static Atom findUtf8Atom(byte[] utf8) {
158        return findOrCreate(utf8, false, null);
159      }
160    
161      /**
162       * Find an atom from the subsequence of another
163       * @param utf8 byte backing of atom
164       * @param off offset of new atom
165       * @param len length of new atom
166       * @param str possible string encoding of atom or null
167       * @return atom
168       */
169      private static Atom findOrCreate(byte[] utf8, int off, int len, String str) {
170        if (str != null) {
171          // string substring is cheap, so try to find using this if possible
172          Atom val = new Atom(null, -1, str.substring(off, off+len));
173          val = dictionary.get(val);
174          if (val != null) return val;
175        }
176        byte[] val = new byte[len];
177        for (int i = 0; i < len; ++i) {
178          val[i] = utf8[off++];
179        }
180        return findOrCreate(val, true, null);
181      }
182    
183      /**
184       * This is the findOrCreate() method through which all Atoms are
185       * ultimately created.   The constructor for Atom is a private method, so
186       * someone has to call one of the public findOrCreate() methods to get a new
187       * one.  And they all feed through here.
188       */
189      private static Atom findOrCreate(byte[] bytes, boolean create, String str) {
190        Atom val = new Atom(bytes, -1, str);
191        val = dictionary.get(val);
192        if (val != null || !create) return val;
193        synchronized(Atom.class) {
194          val = new Atom(bytes, nextId++, str);
195          int column = val.id >> LOG_ROW_SIZE;
196          if (column == atoms.length) {
197            Atom[][] tmp = new Atom[column+1][];
198            for (int i=0; i < column; i++) {
199              tmp[i] = atoms[i];
200            }
201            atoms = tmp;
202            atoms[column] = new Atom[1 << LOG_ROW_SIZE];
203          }
204          atoms[column][val.id & ROW_MASK] = val;
205          dictionary.put(val, val);
206        }
207        return val;
208      }
209    
210      /**
211       * @param id the id of an Atom
212       * @return the Atom whose id was given
213       */
214      @Pure
215      @Uninterruptible
216      public static Atom getAtom(int id) {
217        return atoms[id >> LOG_ROW_SIZE][id & ROW_MASK];
218      }
219    
220      //-------------//
221      // conversions //
222      //-------------//
223    
224      /**
225       * Return printable representation of "this" atom.
226       * Does not correctly handle UTF8 translation.
227       */
228      @Override
229      @Pure
230      public String toString() {
231        return StringUtilities.asciiBytesToString(val);
232      }
233    
234      /**
235       * Get at a string-like representation without doing any heap allocation.
236       * Hideous but necessary.  We will use it in the PrintContainer class.
237       */
238      @Uninterruptible
239      public byte[] toByteArray() {
240        return val;
241      }
242    
243      /**
244       * Return atom as a string literal
245       */
246      @Pure
247      public synchronized String toUnicodeString() throws java.io.UTFDataFormatException {
248        if (unicodeStringOrJTOCoffset == null) {
249          String s = UTF8Convert.fromUTF8(val);
250          if (VM.runningVM) {
251            s = InternedStrings.internUnfoundString(s);
252            unicodeStringOrJTOCoffset = s;
253          } else if (!VM.writingImage) {
254            s = s.intern();
255            int offset = Statics.findOrCreateObjectLiteral(s);
256            unicodeStringOrJTOCoffset = offset;
257          }
258          return s;
259        } else if (unicodeStringOrJTOCoffset instanceof String) {
260          return (String)unicodeStringOrJTOCoffset;
261        } else {
262          if (VM.runningVM) {
263            return (String)Statics.getSlotContentsAsObject(Offset.fromIntSignExtend((Integer)unicodeStringOrJTOCoffset));
264          } else {
265            return UTF8Convert.fromUTF8(val).intern();
266          }
267        }
268      }
269    
270      /**
271       * Atom as string literal or null if atom hasn't been converted
272       */
273      private synchronized String toUnicodeStringInternal() {
274        if (unicodeStringOrJTOCoffset == null) {
275          return null;
276        } else if (unicodeStringOrJTOCoffset instanceof String) {
277          return (String)unicodeStringOrJTOCoffset;
278        } else {
279          if (VM.runningVM) {
280            Object result = Statics.getSlotContentsAsObject(Offset.fromIntSignExtend((Integer)unicodeStringOrJTOCoffset));
281            return (String)result;
282          } else {
283            try {
284              return UTF8Convert.fromUTF8(val).intern();
285            } catch (UTFDataFormatException e) {
286              throw new Error("Error in UTF data encoding: ", e);
287            }
288          }
289        }
290      }
291    
292      /**
293       * Offset of an atom's string in the JTOC, for string literals
294       * @return Offset of string literal in JTOC
295       * @throws java.io.UTFDataFormatException
296       */
297      public synchronized int getStringLiteralOffset() throws java.io.UTFDataFormatException {
298        if (unicodeStringOrJTOCoffset == null) {
299          String s = UTF8Convert.fromUTF8(val);
300          if (VM.runningVM) {
301            s = InternedStrings.internUnfoundString(s);
302          } else {
303            s = s.intern();
304          }
305          int offset = Statics.findOrCreateObjectLiteral(s);
306          unicodeStringOrJTOCoffset = offset;
307          return offset;
308        } else if (unicodeStringOrJTOCoffset instanceof String) {
309          int offset = Statics.findOrCreateObjectLiteral(unicodeStringOrJTOCoffset);
310          unicodeStringOrJTOCoffset = offset;
311          return offset;
312        } else {
313          return (Integer)unicodeStringOrJTOCoffset;
314        }
315      }
316    
317      /**
318       * Return array descriptor corresponding to "this" array-element descriptor.
319       * this: array-element descriptor - something like "I" or "Ljava/lang/Object;"
320       * @return array descriptor - something like "[I" or "[Ljava/lang/Object;"
321       */
322      @Pure
323      Atom arrayDescriptorFromElementDescriptor() {
324        if (VM.VerifyAssertions) {
325          VM._assert(val.length > 0);
326        }
327        byte[] sig = new byte[1 + val.length];
328        sig[0] = (byte) '[';
329        for (int i = 0, n = val.length; i < n; ++i) {
330          sig[i + 1] = val[i];
331        }
332        return findOrCreate(sig, true, null);
333      }
334    
335      /**
336       * Return class descriptor corresponding to "this" class name.
337       * this: class name       - something like "java.lang.Object"
338       * @return class descriptor - something like "Ljava/lang/Object;"
339       */
340      @Pure
341      public Atom descriptorFromClassName() {
342        if (VM.VerifyAssertions) {
343          VM._assert(val.length > 0);
344        }
345        if (val[0] == '[') return this;
346        byte[] sig = new byte[1 + val.length + 1];
347        sig[0] = (byte) 'L';
348        for (int i = 0, n = val.length; i < n; ++i) {
349          byte b = val[i];
350          if (b == '.') b = '/';
351          sig[i + 1] = b;
352        }
353        sig[sig.length - 1] = (byte) ';';
354        return findOrCreate(sig, true, null);
355      }
356    
357      /**
358       * Return class name corresponding to "this" class descriptor.
359       * this: class descriptor - something like "Ljava/lang/String;"
360       * @return class name - something like "java.lang.String"
361       */
362      @Pure
363      public String classNameFromDescriptor() {
364        if (VM.VerifyAssertions) {
365          VM._assert(val.length > 0);
366          VM._assert(val[0] == 'L' && val[val.length - 1] == ';');
367        }
368        if (unicodeStringOrJTOCoffset == null) {
369          return StringUtilities.asciiBytesToString(val, 1, val.length - 2).replace('/', '.');
370        } else {
371          return toUnicodeStringInternal().substring(1, val.length-1).replace('/','.');
372        }
373      }
374    
375      /**
376       * Return name of class file corresponding to "this" class descriptor.
377       * this: class descriptor - something like "Ljava/lang/String;"
378       * @return class file name  - something like "java/lang/String.class"
379       */
380      @Pure
381      public String classFileNameFromDescriptor() {
382        if (VM.VerifyAssertions) {
383          VM._assert(val.length > 0);
384          VM._assert(val[0] == 'L' && val[val.length - 1] == ';');
385        }
386        if (unicodeStringOrJTOCoffset == null) {
387          return StringUtilities.asciiBytesToString(val, 1, val.length - 2) + ".class";
388        } else {
389          return toUnicodeStringInternal().substring(1, val.length-1) + ".class";
390        }
391      }
392    
393      //----------------//
394      // classification //
395      //----------------//
396    
397      /**
398       * Is "this" atom a reserved member name?
399       * Note: Sun has reserved all member names starting with '<' for future use.
400       *       At present, only <init> and <clinit> are used.
401       */
402      @Uninterruptible
403      @Pure
404      public boolean isReservedMemberName() {
405        if (VM.VerifyAssertions) VM._assert(val.length > 0);
406        return val[0] == '<';
407      }
408    
409      /**
410       * Is "this" atom a class descriptor?
411       */
412      @Uninterruptible
413      @Pure
414      public boolean isClassDescriptor() {
415        if (VM.VerifyAssertions) VM._assert(val.length > 0);
416        return val[0] == 'L';
417      }
418    
419      /**
420       * Is "this" atom an array descriptor?
421       */
422      @Uninterruptible
423      @Pure
424      public boolean isArrayDescriptor() {
425        if (VM.VerifyAssertions) VM._assert(val.length > 0);
426        return val[0] == '[';
427      }
428    
429      /**
430       * Is "this" atom a method descriptor?
431       */
432      @Uninterruptible
433      @Pure
434      public boolean isMethodDescriptor() {
435        if (VM.VerifyAssertions) VM._assert(val.length > 0);
436        return val[0] == '(';
437      }
438    
439      //--------------------//
440      // descriptor parsing //
441      //--------------------//
442    
443      /**
444       * Parse "this" method descriptor to obtain description of method's
445       * return type.
446       * this: method descriptor - something like "(III)V"
447       * @return type description
448       */
449      @Pure
450      public TypeReference parseForReturnType(ClassLoader cl) {
451        if (VM.VerifyAssertions) {
452          VM._assert(val.length > 0);
453          VM._assert(val[0] == '(', "Method descriptors start with `(`");
454        }
455        int i = 0;
456        while (val[i++] != ')') {
457          if (VM.VerifyAssertions) {
458            VM._assert(i < val.length, "Method descriptor missing closing ')'");
459          }
460        }
461        if (VM.VerifyAssertions) {
462          VM._assert(i < val.length, "Method descriptor missing type after closing ')'");
463        }
464        switch (val[i]) {
465          case VoidTypeCode:
466            return TypeReference.Void;
467          case BooleanTypeCode:
468            return TypeReference.Boolean;
469          case ByteTypeCode:
470            return TypeReference.Byte;
471          case ShortTypeCode:
472            return TypeReference.Short;
473          case IntTypeCode:
474            return TypeReference.Int;
475          case LongTypeCode:
476            return TypeReference.Long;
477          case FloatTypeCode:
478            return TypeReference.Float;
479          case DoubleTypeCode:
480            return TypeReference.Double;
481          case CharTypeCode:
482            return TypeReference.Char;
483          case ClassTypeCode:   // fall through
484          case ArrayTypeCode:
485            return TypeReference.findOrCreate(cl, findOrCreate(val, i, val.length - i, toUnicodeStringInternal()));
486          default:
487            if (VM.VerifyAssertions) {
488              VM._assert(VM.NOT_REACHED,
489                         "Need a valid method descriptor; got \"" +
490                         this +
491                         "\"; can't parse the character '" +
492                         ((char)val[i]) +
493                         "'");
494            }
495            return null;            // NOTREACHED
496        }
497      }
498    
499      /**
500       * Parse "this" method descriptor to obtain descriptions of method's
501       * parameters.
502       * this: method descriptor     - something like "(III)V"
503       * @return parameter descriptions
504       */
505      @Pure
506      public TypeReference[] parseForParameterTypes(ClassLoader cl) {
507        if (VM.VerifyAssertions) {
508          VM._assert(val.length > 0);
509          VM._assert(val[0] == '(', "Method descriptors start with `(`");
510        }
511        TypeReferenceVector sigs = new TypeReferenceVector();
512        int i = 1;
513        while (true) {
514          if (VM.VerifyAssertions) {
515            VM._assert(i < val.length, "Method descriptor missing closing `)`");
516          }
517    
518          switch (val[i++]) {
519            case VoidTypeCode:
520              sigs.addElement(TypeReference.Void);
521              continue;
522            case BooleanTypeCode:
523              sigs.addElement(TypeReference.Boolean);
524              continue;
525            case ByteTypeCode:
526              sigs.addElement(TypeReference.Byte);
527              continue;
528            case ShortTypeCode:
529              sigs.addElement(TypeReference.Short);
530              continue;
531            case IntTypeCode:
532              sigs.addElement(TypeReference.Int);
533              continue;
534            case LongTypeCode:
535              sigs.addElement(TypeReference.Long);
536              continue;
537            case FloatTypeCode:
538              sigs.addElement(TypeReference.Float);
539              continue;
540            case DoubleTypeCode:
541              sigs.addElement(TypeReference.Double);
542              continue;
543            case CharTypeCode:
544              sigs.addElement(TypeReference.Char);
545              continue;
546            case ClassTypeCode: {
547              int off = i - 1;
548              while (val[i++] != ';') {
549                if (VM.VerifyAssertions) {
550                  VM._assert(i < val.length, "class descriptor missing a final ';'");
551                }
552              }
553              sigs.addElement(TypeReference
554                  .findOrCreate(cl, findOrCreate(val, off, i - off, toUnicodeStringInternal())));
555              continue;
556            }
557            case ArrayTypeCode: {
558              int off = i - 1;
559              while (val[i] == ArrayTypeCode) {
560                if (VM.VerifyAssertions) {
561                  VM._assert(i < val.length, "malformed array descriptor");
562                }
563                ++i;
564              }
565              if (val[i++] == ClassTypeCode) while (val[i++] != ';') ;
566              sigs.addElement(TypeReference.findOrCreate(cl, findOrCreate(val, off, i - off, toUnicodeStringInternal())));
567              continue;
568            }
569            case(byte) ')': // end of parameter list
570              return sigs.finish();
571    
572            default:
573              if (VM.VerifyAssertions) {
574                VM._assert(VM.NOT_REACHED,
575                           "The class descriptor \"" +
576                           this +
577                           "\" contains the illegal" +
578                           " character '" +
579                           ((char)val[i]) +
580                           "'");
581              }
582          }
583        }
584      }
585    
586      /**
587       * Parse "this" method descriptor to obtain descriptions of method's
588       * parameters as classes.
589       * this: method descriptor     - something like "(III)V"
590       * @return parameter classes
591       */
592      @Pure
593      public Class<?>[] parseForParameterClasses(ClassLoader cl) {
594        TypeReference[] typeRefs = this.parseForParameterTypes(cl);
595        Class<?>[] classes = new Class<?>[typeRefs.length];
596        for (int i=0; i < typeRefs.length; i++) {
597          TypeReference t = typeRefs[i];
598          classes[i] = t.resolve().getClassForType();
599        }
600        return classes;
601      }
602    
603      /**
604       * Return the underlying set of bytes for the Atom.  This can be used
605       * to perform comparisons without requiring the allocation of a string.
606       */
607      @Uninterruptible
608      public byte[] getBytes() {
609        return val;
610      }
611    
612      /**
613       * Parse "this" field, parameter, or return descriptor to obtain its
614       * type code.
615       * this: descriptor - something like "Ljava/lang/String;" or "[I" or "I"
616       * @return type code  - something like ObjectTypeCode, ArrayTypeCode, or
617       * IntTypeCode
618       *
619       * The type code will be one of the following constants:
620       *
621       * <pre>
622       *               constant         value
623       *           ----------------     -----
624       *            ClassTypeCode        'L'
625       *            ArrayTypeCode        '['
626       *            VoidTypeCode         'V'
627       *            BooleanTypeCode      'Z'
628       *            ByteTypeCode         'B'
629       *            ShortTypeCode        'S'
630       *            IntTypeCode          'I'
631       *            LongTypeCode         'J'
632       *            FloatTypeCode        'F'
633       *            DoubleTypeCode       'D'
634       *            CharTypeCode         'C'
635       * </pre>
636       */
637      @Pure
638      public byte parseForTypeCode() throws IllegalArgumentException {
639        if (VM.VerifyAssertions) {
640          VM._assert(val.length > 0);
641        }
642        return val[0];
643      }
644    
645      /**
646       * Parse "this" array descriptor to obtain number of dimensions in
647       * corresponding array type.
648       * this: descriptor     - something like "[Ljava/lang/String;" or "[[I"
649       * @return dimensionality - something like "1" or "2"
650       */
651      @Pure
652      public int parseForArrayDimensionality() {
653        if (VM.VerifyAssertions) {
654          VM._assert(val.length > 1, "An array descriptor has at least two characters");
655          VM._assert(val[0] == '[', "An array descriptor must start with '['");
656        }
657        for (int i = 0; ; ++i) {
658          if (VM.VerifyAssertions) {
659            VM._assert(i < val.length, "Malformed array descriptor: it can't just have [ characters");
660          }
661          if (val[i] != '[') {
662            return i;
663          }
664        }
665      }
666    
667      /**
668       * Parse "this" array descriptor to obtain type code for its element type.
669       * this: descriptor - something like "[Ljava/lang/String;" or "[I"
670       * @return type code  - something like VM.ObjectTypeCode or VM.IntTypeCode
671       * The type code will be one of the constants appearing in the table above.
672       *
673       * Implementation note: This is supposed to be uninterruptible, since another
674       * allegedly uninterruptible method (RVMArray.getLogElementSize()) calls it.
675       */
676      @Uninterruptible
677      @Pure
678      public byte parseForArrayElementTypeCode() {
679        if (VM.VerifyAssertions) {
680          VM._assert(val.length > 1, "An array descriptor has at least two characters");
681          VM._assert(val[0] == '[', "An array descriptor must start with '['");
682        }
683        return val[1];
684      }
685    
686      /**
687       * Return the innermost element type reference for an array
688       */
689      @Pure
690      public Atom parseForInnermostArrayElementDescriptor() {
691        if (VM.VerifyAssertions) {
692          VM._assert(val.length > 1, "An array descriptor has at least two characters");
693          VM._assert(val[0] == '[', "An array descriptor must start with '['");
694        }
695        int i = 0;
696        while (val[i] == '[') {
697          if (VM.VerifyAssertions) {
698            VM._assert(i < val.length, "Malformed array descriptor: it can't just have [ characters");
699          }
700          i++;
701        }
702        return findOrCreate(val, i, val.length - i, toUnicodeStringInternal());
703      }
704    
705      /**
706       * Parse "this" array descriptor to obtain descriptor for array's element
707       * type.
708       * this: array descriptor         - something like "[I"
709       * @return array element descriptor - something like "I"
710       */
711      @Pure
712      public Atom parseForArrayElementDescriptor() {
713        if (VM.VerifyAssertions) {
714          VM._assert(val.length > 1, "An array descriptor has at least two characters");
715          VM._assert(val[0] == '[', "An array descriptor must start with '['");
716        }
717        return findOrCreate(val, 1, val.length - 1, toUnicodeStringInternal());
718      }
719    
720      /**
721       * The set of class prefixes that MUST be loaded by bootstrap classloader.
722       * @see #isBootstrapClassDescriptor()
723       */
724      private static final byte[][] BOOTSTRAP_CLASS_PREFIX_SET =
725          {"Ljava/".getBytes(),
726           "Lorg/jikesrvm/".getBytes(),
727           "Lgnu/java/".getBytes(),
728           "Lgnu/classpath/debug/".getBytes(),
729           "Lgnu/classpath/jdwp/".getBytes(),
730           "Lgnu/classpath/NotImplementedException".getBytes(),
731           "Lgnu/classpath/Pair".getBytes(),
732           "Lgnu/classpath/Pointer".getBytes(),
733           "Lgnu/classpath/Pointer32".getBytes(),
734           "Lgnu/classpath/Pointer64".getBytes(),
735           "Lgnu/classpath/ServiceFactory".getBytes(),
736           "Lgnu/classpath/ServiceProviderLoadingAction".getBytes(),
737           "Lgnu/classpath/SystemProperties".getBytes(),
738           "Lorg/vmmagic/".getBytes(),
739           "Lorg/mmtk/".getBytes()};
740    
741      /**
742       * The set of class prefixes that MUST NOT be loaded by bootstrap classloader.
743       * @see #isBootstrapClassDescriptor()
744       */
745      private static final byte[][] NON_BOOTSTRAP_CLASS_PREFIX_SET =
746          {"Lorg/jikesrvm/tools/ant/".getBytes(),
747           "Lorg/jikesrvm/tools/apt/".getBytes(),
748           "Lorg/jikesrvm/tools/template/".getBytes()};
749    
750      /**
751       * The set of class prefixes for core RVM classes.
752       * @see #isRVMDescriptor()
753       */
754      private static final byte[][] RVM_CLASS_PREFIXES =
755          {"Lorg/jikesrvm/".getBytes(), "Lorg/vmmagic/".getBytes(), "Lorg/mmtk/".getBytes()};
756    
757      /**
758       * @return true if this is a class descriptor of a bootstrap class
759       * (ie a class that must be loaded by the bootstrap class loader)
760       */
761      @Pure
762      public boolean isBootstrapClassDescriptor() {
763        non_bootstrap_outer:
764        for (final byte[] test : NON_BOOTSTRAP_CLASS_PREFIX_SET) {
765          if (test.length > val.length) continue;
766          for (int j = 0; j < test.length; j++) {
767            if (val[j] != test[j]) {
768              continue non_bootstrap_outer;
769            }
770          }
771          return false;
772        }
773        bootstrap_outer:
774        for (final byte[] test : BOOTSTRAP_CLASS_PREFIX_SET) {
775          if (test.length > val.length) continue;
776          for (int j = 0; j < test.length; j++) {
777            if (val[j] != test[j]) {
778              continue bootstrap_outer;
779            }
780          }
781          return true;
782        }
783        return false;
784      }
785    
786      /**
787       * @return true if this is a class descriptor of a RVM core class.  This is
788       * defined as one that it would be unwise to invalidate, since invalidating
789       * it might make it impossible to recompile.
790       */
791      @Pure
792      public boolean isRVMDescriptor() {
793        outer:
794        for (final byte[] test : RVM_CLASS_PREFIXES) {
795          if (test.length > val.length) continue;
796          for (int j = 0; j < test.length; j++) {
797            if (val[j] != test[j]) {
798              continue outer;
799            }
800          }
801          return true;
802        }
803        return false;
804      }
805    
806      //-------------//
807      // annotations //
808      //-------------//
809    
810      /**
811       * Create an annotation name from a class name. For example
812       * Lfoo.bar; becomes Lfoo.bar$$; NB in Sun VMs the annotation name
813       * of the first annotation is $Proxy1. Classpath may later rely on
814       * this to implement serialization correctly.
815       */
816      @Pure
817      public Atom annotationInterfaceToAnnotationClass() {
818        byte[] annotationClassName_tmp = new byte[val.length + 2];
819        System.arraycopy(val, 0, annotationClassName_tmp, 0, val.length - 1);
820        annotationClassName_tmp[val.length - 1] = '$';
821        annotationClassName_tmp[val.length] = '$';
822        annotationClassName_tmp[val.length + 1] = ';';
823        return Atom.findOrCreateUtf8Atom(annotationClassName_tmp);
824      }
825    
826      /**
827       * Create a class name from a type name. For example Lfoo.bar$$;
828       * becomes the string foo.bar
829       */
830      @Pure
831      public String annotationClassToAnnotationInterface() {
832        if (VM.VerifyAssertions) {
833          VM._assert(val.length > 0);
834          VM._assert(val[0] == 'L' && val[val.length - 1] == ';', toString());
835        }
836        return StringUtilities.asciiBytesToString(val, 1, val.length - 4).replace('/', '.');
837      }
838    
839      /**
840       * Is this an annotation class name of the form Lfoo.bar$$;
841       */
842      @Pure
843      public boolean isAnnotationClass() {
844        return (val.length > 4) && (val[val.length - 3] == '$') && (val[val.length - 2] == '$');
845      }
846    
847      //-----------//
848      // debugging //
849      //-----------//
850    
851      @Uninterruptible
852      public void sysWrite() {
853        for (int i = 0, n = val.length; i < n; ++i) {
854          VM.sysWrite((char) val[i]);
855        }
856      }
857    
858      @Uninterruptible
859      public int length() {
860        return val.length;
861      }
862    
863      /**
864       * Create atom from the key that maps to it.
865       */
866      private Atom(byte[] val, int id, String str) {
867        this.id = id;
868        this.unicodeStringOrJTOCoffset = str;
869        if ((val == null) && (id != -1)) {
870          this.val = UTF8Convert.toUTF8(str);
871        } else {
872          this.val = val;
873        }
874      }
875    
876      /*
877       * Hash table utilities
878       */
879      /**
880       * Return the hashCode of an atom, this equals the unicode string encoding of
881       * the atom
882       */
883      @Override
884      public int hashCode() {
885        try {
886          if (unicodeStringOrJTOCoffset != null) {
887            return toUnicodeStringInternal().hashCode();
888          } else {
889            return UTF8Convert.computeStringHashCode(val);
890          }
891        } catch (UTFDataFormatException e) {
892          return 0;
893        }
894      }
895    
896      /**
897       * Outside of this class atoms are canonical and should be compared using ==.
898       * This method is used to maintain atoms in internal hash tables and shouldn't
899       * be used externally.
900       */
901      @Override
902      @Pure
903      public boolean equals(Object other) {
904        // quick test as atoms are generally canonical
905        if (this == other) {
906          return true;
907        } else {
908          if (other instanceof Atom) {
909            Atom that = (Atom)other;
910            // if the atoms are well formed then their identifiers are unique
911            if ((that.id != -1) && (this.id != -1)) {
912              return that.id == this.id;
913            }
914            // one atom isn't well formed, can we do a string comparison to work out equality?
915            if ((this.unicodeStringOrJTOCoffset != null) && (that.unicodeStringOrJTOCoffset != null)) {
916              return toUnicodeStringInternal().equals(that.toUnicodeStringInternal());
917            }
918            try {
919              // perform byte by byte comparison
920              byte[] val1;
921              if (that.val != null) {
922                val1 = that.val;
923              } else {
924                val1 = UTF8Convert.toUTF8(that.toUnicodeString());
925              }
926              byte[] val2;
927              if (this.val != null) {
928                val2 = this.val;
929              } else {
930                val2 = UTF8Convert.toUTF8(toUnicodeString());
931              }
932              if (val1.length == val2.length) {
933                for (int i = 0; i < val1.length; i++) {
934                  if (val1[i] != val2[i]) return false;
935                }
936                return true;
937              }
938            } catch (UTFDataFormatException e) {
939              throw new Error("Error in UTF data encoding: ",e);
940            }
941          }
942          return false;
943        }
944      }
945    
946    
947      /**
948       * Inner class responsible for string interning. This class' initializer is
949       * run during booting.
950       */
951      private static class InternedStrings {
952        /**
953         * Look up for interned strings.
954         */
955        private static final WeakHashMap<String,WeakReference<String>> internedStrings =
956          new WeakHashMap<String,WeakReference<String>>();
957    
958        /**
959         * Find an interned string but don't create it if not found
960         * @param str string to lookup
961         * @return the interned string or null if it isn't interned
962         */
963        static synchronized String findInternedString(String str) {
964          WeakReference<String> ref;
965          ref = internedStrings.get(str);
966          if (ref != null) {
967            String s = ref.get();
968            if (s != null) {
969              return s;
970            }
971          }
972          return null;
973        }
974    
975        /**
976         * Find a string literal from an atom
977         * @param str string to find
978         * @return the string literal or null
979         */
980        static String findAtomString(String str) {
981          Atom atom = findUnicodeAtom(str);
982          if (atom != null) {
983            try {
984              return atom.toUnicodeString();
985            } catch (UTFDataFormatException e) {
986              throw new Error("Error in UTF data encoding: ", e);
987            }
988          }
989          return null;
990        }
991    
992        /**
993         * Intern a string that is not an atom or already interned string
994         * @param str string to intern
995         * @return interned string
996         */
997        static synchronized String internUnfoundString(String str) {
998          // double check string isn't found as we're holding the lock on the class
999          String s = findInternedString(str);
1000          if (s != null) return s;
1001          // If we get to here, then there is no interned version of the String.
1002          // So we make one.
1003          WeakReference<String> ref = new WeakReference<String>(str);
1004          internedStrings.put(str, ref);
1005          return str;
1006        }
1007      }
1008    
1009      /**
1010       * External string intern method called from String.intern. This method should
1011       * return a canonical string encoding for the given string and this string
1012       * should also be canonical with string literals.
1013       * @param str string to intern
1014       * @return interned version of string
1015       */
1016      public static String internString(String str) {
1017        // Has the string already been interned
1018        String s = InternedStrings.findInternedString(str);
1019        if (s != null) return s;
1020    
1021        // Check to see if this is a StringLiteral:
1022        s = InternedStrings.findAtomString(str);
1023        if (s != null) return s;
1024    
1025        // Intern this string
1026        return InternedStrings.internUnfoundString(str);
1027      }
1028    }