libdex/DexFile.c - platform/dalvik - Git at Google

 /*
  * Copyright (C) 2008 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*
  * Access the contents of a .dex file.
  */

 #include "DexFile.h"
 #include "DexProto.h"
 #include "DexCatch.h"
 #include "Leb128.h"
 #include "sha1.h"
 #include "ZipArchive.h"

 #include <zlib.h>

 #include <stdlib.h>
 #include <stddef.h>
 #include <string.h>
 #include <fcntl.h>
 #include <errno.h>

 // fwd
 static u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader);


 /*
  * Verifying checksums is good, but it slows things down and causes us to
  * touch every page.  In the "optimized" world, it doesn't work at all,
  * because we rewrite the contents.
  */
 static const bool kVerifyChecksum = false;
 static const bool kVerifySignature = false;


 /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
  * code point values for comparison. This treats different encodings
  * for the same code point as equivalent, except that only a real '\0'
  * byte is considered the string terminator. The return value is as
  * for strcmp(). */
 int dexUtf8Cmp(const char* s1, const char* s2) {
     for (;;) {
         if (*s1 == '\0') {
             if (*s2 == '\0') {
                 return 0;
             }
             return -1;
         } else if (*s2 == '\0') {
             return 1;
         }

         int utf1 = dexGetUtf16FromUtf8(&s1);
         int utf2 = dexGetUtf16FromUtf8(&s2);
         int diff = utf1 - utf2;

         if (diff != 0) {
             return diff;
         }
     }
 }

 /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
 u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
     0x00000000, // 00..1f low control characters; nothing valid
     0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
     0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
     0x07fffffe  // 60..7f lowercase etc.; valid: 'a'..'z'
 };

 /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
 bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
     /*
      * It's a multibyte encoded character. Decode it and analyze. We
      * accept anything that isn't (a) an improperly encoded low value,
      * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
      * control character, or (e) a high space, layout, or special
      * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
      * U+fff0..U+ffff).
      */

     u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);

     // Perform follow-up tests based on the high 8 bits.
     switch (utf16 >> 8) {
         case 0x00: {
             // It's only valid if it's above the ISO-8859-1 high space (0xa0).
             return (utf16 > 0x00a0);
         }
         case 0xd8:
         case 0xd9:
         case 0xda:
         case 0xdb: {
             /*
              * It's a leading surrogate. Check to see that a trailing
              * surrogate follows.
              */
             utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
             return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
         }
         case 0xdc:
         case 0xdd:
         case 0xde:
         case 0xdf: {
             // It's a trailing surrogate, which is not valid at this point.
             return false;
         }
         case 0x20:
         case 0xff: {
             // It's in the range that has spaces, controls, and specials.
             switch (utf16 & 0xfff8) {
                 case 0x2000:
                 case 0x2008:
                 case 0x2028:
                 case 0xfff0:
                 case 0xfff8: {
                     return false;
                 }
             }
             break;
         }
     }

     return true;
 }

 /* Return whether the given string is a valid field or method name. */
 bool dexIsValidMemberName(const char* s) {
     bool angleName = false;

     switch (*s) {
         case '\0': {
             // The empty string is not a valid name.
             return false;
         }
         case '<': {
             /*
              * '<' is allowed only at the start of a name, and if present,
              * means that the name must end with '>'.
              */
             angleName = true;
             s++;
             break;
         }
     }

     for (;;) {
         switch (*s) {
             case '\0': {
                 return !angleName;
             }
             case '>': {
                 return angleName && s[1] == '\0';
             }
         }
         if (!dexIsValidMemberNameUtf8(&s)) {
             return false;
         }
     }
 }

 /* Return whether the given string is a valid type descriptor. */
 bool dexIsValidTypeDescriptor(const char* s) {
     int arrayCount = 0;

     while (*s == '[') {
         arrayCount++;
         s++;
     }

     if (arrayCount > 255) {
         // Arrays may have no more than 255 dimensions.
         return false;
     }

     switch (*(s++)) {
         case 'B':
         case 'C':
         case 'D':
         case 'F':
         case 'I':
         case 'J':
         case 'S':
         case 'Z': {
             // These are all single-character descriptors for primitive types.
             return (*s == '\0');
         }
         case 'V': {
             // You can't have an array of void.
             return (arrayCount == 0) && (*s == '\0');
         }
         case 'L': {
             // Break out and continue below.
             break;
         }
         default: {
             // Oddball descriptor character.
             return false;
         }
     }

     // We just consumed the 'L' that introduces a class name.

     bool slashOrFirst = true; // first character or just encountered a slash
     for (;;) {
         u1 c = (u1) *s;
         switch (c) {
             case '\0': {
                 // Premature end.
                 return false;
             }
             case ';': {
                 /*
                  * Make sure that this is the end of the string and that
                  * it doesn't end with an empty component (including the
                  * degenerate case of "L;").
                  */
                 return (s[1] == '\0') && !slashOrFirst;
             }
             case '/': {
                 if (slashOrFirst) {
                     // Slash at start or two slashes in a row.
                     return false;
                 }
                 slashOrFirst = true;
                 s++;
                 break;
             }
             default: {
                 if (!dexIsValidMemberNameUtf8(&s)) {
                     return false;
                 }
                 slashOrFirst = false;
                 break;
             }
         }
     }
 }

 /* Return whether the given string is a valid reference descriptor. This
  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
  * is for a class or array and not a primitive type. */
 bool dexIsReferenceDescriptor(const char* s) {
     if (!dexIsValidTypeDescriptor(s)) {
         return false;
     }

     return (s[0] == 'L') || (s[0] == '[');
 }

 /* Return whether the given string is a valid class descriptor. This
  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
  * is for a class and not an array or primitive type. */
 bool dexIsClassDescriptor(const char* s) {
     if (!dexIsValidTypeDescriptor(s)) {
         return false;
     }

     return s[0] == 'L';
 }

 /* Return whether the given string is a valid field type descriptor. This
  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
  * is for anything but "void". */
 bool dexIsFieldDescriptor(const char* s) {
     if (!dexIsValidTypeDescriptor(s)) {
         return false;
     }

     return s[0] != 'V';
 }

 /* Return the UTF-8 encoded string with the specified string_id index,
  * also filling in the UTF-16 size (number of 16-bit code points).*/
 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
         u4* utf16Size) {
     const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
     const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;

     *utf16Size = readUnsignedLeb128(&ptr);
     return (const char*) ptr;
 }

 /*
  * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
  * least kSHA1DigestOutputLen bytes.
  */
 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);

 /*
  * Compute a SHA-1 digest on a range of bytes.
  */
 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
     unsigned char digest[])
 {
     SHA1_CTX context;
     SHA1Init(&context);
     SHA1Update(&context, data, length);
     SHA1Final(digest, &context);
 }

 /*
  * Format the SHA-1 digest into the buffer, which must be able to hold at
  * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
  */
 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
 {
     static const char hexDigit[] = "0123456789abcdef";
     char* cp;
     int i;

     cp = tmpBuf;
     for (i = 0; i < kSHA1DigestLen; i++) {
         *cp++ = hexDigit[digest[i] >> 4];
         *cp++ = hexDigit[digest[i] & 0x0f];
     }
     *cp++ = '\0';

     assert(cp == tmpBuf + kSHA1DigestOutputLen);

     return tmpBuf;
 }

 /*
  * Compute a hash code on a UTF-8 string, for use with internal hash tables.
  *
  * This may or may not be compatible with UTF-8 hash functions used inside
  * the Dalvik VM.
  *
  * The basic "multiply by 31 and add" approach does better on class names
  * than most other things tried (e.g. adler32).
  */
 static u4 classDescriptorHash(const char* str)
 {
     u4 hash = 1;

     while (*str != '\0')
         hash = hash * 31 + *str++;

     return hash;
 }

 /*
  * Add an entry to the class lookup table.  We hash the string and probe
  * until we find an open slot.
  */
 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
     int stringOff, int classDefOff, int* pNumProbes)
 {
     const char* classDescriptor =
         (const char*) (pDexFile->baseAddr + stringOff);
     const DexClassDef* pClassDef =
         (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
     u4 hash = classDescriptorHash(classDescriptor);
     int mask = pLookup->numEntries-1;
     int idx = hash & mask;

     /*
      * Find the first empty slot.  We oversized the table, so this is
      * guaranteed to finish.
      */
     int probes = 0;
     while (pLookup->table[idx].classDescriptorOffset != 0) {
         idx = (idx + 1) & mask;
         probes++;
     }
     //if (probes > 1)
     //    LOGW("classLookupAdd: probes=%d\n", probes);

     pLookup->table[idx].classDescriptorHash = hash;
     pLookup->table[idx].classDescriptorOffset = stringOff;
     pLookup->table[idx].classDefOffset = classDefOff;
     *pNumProbes = probes;
 }

 /*
  * Round up to the next highest power of 2.
  *
  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
  */
 u4 dexRoundUpPower2(u4 val)
 {
     val--;
     val |= val >> 1;
     val |= val >> 2;
     val |= val >> 4;
     val |= val >> 8;
     val |= val >> 16;
     val++;

     return val;
 }

 /*
  * Create the class lookup hash table.
  *
  * Returns newly-allocated storage.
  */
 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
 {
     DexClassLookup* pLookup;
     int allocSize;
     int i, numEntries;
     int numProbes, totalProbes, maxProbes;

     numProbes = totalProbes = maxProbes = 0;

     assert(pDexFile != NULL);

     /*
      * Using a factor of 3 results in far less probing than a factor of 2,
      * but almost doubles the flash storage requirements for the bootstrap
      * DEX files.  The overall impact on class loading performance seems
      * to be minor.  We could probably get some performance improvement by
      * using a secondary hash.
      */
     numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
     allocSize = offsetof(DexClassLookup, table)
                     + numEntries * sizeof(pLookup->table[0]);

     pLookup = (DexClassLookup*) calloc(1, allocSize);
     if (pLookup == NULL)
         return NULL;
     pLookup->size = allocSize;
     pLookup->numEntries = numEntries;

     for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
         const DexClassDef* pClassDef;
         const char* pString;

         pClassDef = dexGetClassDef(pDexFile, i);
         pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);

         classLookupAdd(pDexFile, pLookup,
             (u1*)pString - pDexFile->baseAddr,
             (u1*)pClassDef - pDexFile->baseAddr, &numProbes);

         if (numProbes > maxProbes)
             maxProbes = numProbes;
         totalProbes += numProbes;
     }

     LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
          " total=%d max=%d\n",
         pDexFile->pHeader->classDefsSize, numEntries,
         (100 * pDexFile->pHeader->classDefsSize) / numEntries,
         allocSize, totalProbes, maxProbes);

     return pLookup;
 }


 /*
  * Set up the basic raw data pointers of a DexFile. This function isn't
  * meant for general use.
  */
 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
     DexHeader *pHeader = (DexHeader*) data;

     pDexFile->baseAddr = data;
     pDexFile->pHeader = pHeader;
     pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
     pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
     pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
     pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
     pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
     pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
     pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
 }


 /*
  * Parse out an index map entry, advancing "*pData" and reducing "*pSize".
  */
 static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding,
     u4* pFullCount, u4* pReducedCount, const u2** pMap)
 {
     const u4* wordPtr = (const u4*) *pData;
     u4 size = *pSize;
     u4 mapCount;

     if (expanding) {
         if (size < 4)
             return false;
         mapCount = *pReducedCount = *wordPtr++;
         *pFullCount = (u4) -1;
         size -= sizeof(u4);
     } else {
         if (size < 8)
             return false;
         mapCount = *pFullCount = *wordPtr++;
         *pReducedCount = *wordPtr++;
         size -= sizeof(u4) * 2;
     }

     u4 mapSize = mapCount * sizeof(u2);

     if (size < mapSize)
         return false;
     *pMap = (const u2*) wordPtr;
     size -= mapSize;

     /* advance the pointer */
     const u1* ptr = (const u1*) wordPtr;
     ptr += (mapSize + 3) & ~0x3;

     /* update pass-by-reference values */
     *pData = (const u1*) ptr;
     *pSize = size;

     return true;
 }

 /*
  * Set up some pointers into the mapped data.
  *
  * See analysis/ReduceConstants.c for the data layout description.
  */
 static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size,
     bool expanding)
 {
     if (!parseIndexMapEntry(&data, &size, expanding,
             &pDexFile->indexMap.classFullCount,
             &pDexFile->indexMap.classReducedCount,
             &pDexFile->indexMap.classMap))
     {
         return false;
     }

     if (!parseIndexMapEntry(&data, &size, expanding,
             &pDexFile->indexMap.methodFullCount,
             &pDexFile->indexMap.methodReducedCount,
             &pDexFile->indexMap.methodMap))
     {
         return false;
     }

     if (!parseIndexMapEntry(&data, &size, expanding,
             &pDexFile->indexMap.fieldFullCount,
             &pDexFile->indexMap.fieldReducedCount,
             &pDexFile->indexMap.fieldMap))
     {
         return false;
     }

     if (!parseIndexMapEntry(&data, &size, expanding,
             &pDexFile->indexMap.stringFullCount,
             &pDexFile->indexMap.stringReducedCount,
             &pDexFile->indexMap.stringMap))
     {
         return false;
     }

     if (expanding) {
         /*
          * The map includes the "reduced" counts; pull the original counts
          * out of the DexFile so that code has a consistent source.
          */
         assert(pDexFile->indexMap.classFullCount == (u4) -1);
         assert(pDexFile->indexMap.methodFullCount == (u4) -1);
         assert(pDexFile->indexMap.fieldFullCount == (u4) -1);
         assert(pDexFile->indexMap.stringFullCount == (u4) -1);

 #if 0   // TODO: not available yet -- do later or just skip this
         pDexFile->indexMap.classFullCount =
             pDexFile->pHeader->typeIdsSize;
         pDexFile->indexMap.methodFullCount =
             pDexFile->pHeader->methodIdsSize;
         pDexFile->indexMap.fieldFullCount =
             pDexFile->pHeader->fieldIdsSize;
         pDexFile->indexMap.stringFullCount =
             pDexFile->pHeader->stringIdsSize;
 #endif
     }

     LOGI("Class : %u %u %u\n",
         pDexFile->indexMap.classFullCount,
         pDexFile->indexMap.classReducedCount,
         pDexFile->indexMap.classMap[0]);
     LOGI("Method: %u %u %u\n",
         pDexFile->indexMap.methodFullCount,
         pDexFile->indexMap.methodReducedCount,
         pDexFile->indexMap.methodMap[0]);
     LOGI("Field : %u %u %u\n",
         pDexFile->indexMap.fieldFullCount,
         pDexFile->indexMap.fieldReducedCount,
         pDexFile->indexMap.fieldMap[0]);
     LOGI("String: %u %u %u\n",
         pDexFile->indexMap.stringFullCount,
         pDexFile->indexMap.stringReducedCount,
         pDexFile->indexMap.stringMap[0]);

     return true;
 }

 /*
  * Parse some auxillary data tables.
  *
  * v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup
  * table.  Subsequent versions switched to the "chunk" format.
  */
 static bool parseAuxData(const u1* data, DexFile* pDexFile)
 {
     const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset);
     u4 indexMapType = 0;

     /* v1.0 format? */
     if (*pAux == 0) {
         LOGV("+++ found OLD dex format\n");
         pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1);
         return true;
     }
     LOGV("+++ found NEW dex format\n");

     /* process chunks until we see the end marker */
     while (*pAux != kDexChunkEnd) {
         u4 size = *(pAux+1);
         u1* data = (u1*) (pAux + 2);

         switch (*pAux) {
         case kDexChunkClassLookup:
             pDexFile->pClassLookup = (const DexClassLookup*) data;
             break;
         case kDexChunkReducingIndexMap:
             LOGI("+++ found reducing index map, size=%u\n", size);
             if (!parseIndexMap(pDexFile, data, size, false)) {
                 LOGE("Failed parsing reducing index map\n");
                 return false;
             }
             indexMapType = *pAux;
             break;
         case kDexChunkExpandingIndexMap:
             LOGI("+++ found expanding index map, size=%u\n", size);
             if (!parseIndexMap(pDexFile, data, size, true)) {
                 LOGE("Failed parsing expanding index map\n");
                 return false;
             }
             indexMapType = *pAux;
             break;
         case kDexChunkRegisterMaps:
             LOGV("+++ found register maps, size=%u\n", size);
             pDexFile->pRegisterMapPool = data;
             break;
         default:
             LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n",
                 *pAux,
                 (char) ((*pAux) >> 24), (char) ((*pAux) >> 16),
                 (char) ((*pAux) >> 8),  (char)  (*pAux),
                 size);
             break;
         }

         /*
          * Advance pointer, padding to 64-bit boundary.  The extra "+8" is
          * for the type/size header.
          */
         size = (size + 8 + 7) & ~7;
         pAux += size / sizeof(u4);
     }

 #if 0   // TODO: propagate expected map type from the VM through the API
     /*
      * If we're configured to expect an index map, and we don't find one,
      * reject this DEX so we'll regenerate it.  Also, if we found an
      * "expanding" map but we're not configured to use it, we have to fail
      * because the constants aren't usable without translation.
      */
     if (indexMapType != expectedIndexMapType) {
         LOGW("Incompatible index map configuration: found 0x%04x, need %d\n",
             indexMapType, DVM_REDUCE_CONSTANTS);
         return false;
     }
 #endif

     return true;
 }

 /*
  * Parse an optimized or unoptimized .dex file sitting in memory.  This is
  * called after the byte-ordering and structure alignment has been fixed up.
  *
  * On success, return a newly-allocated DexFile.
  */
 DexFile* dexFileParse(const u1* data, size_t length, int flags)
 {
     DexFile* pDexFile = NULL;
     const DexHeader* pHeader;
     const u1* magic;
     int result = -1;

     if (length < sizeof(DexHeader)) {
         LOGE("too short to be a valid .dex\n");
         goto bail;      /* bad file format */
     }

     pDexFile = (DexFile*) malloc(sizeof(DexFile));
     if (pDexFile == NULL)
         goto bail;      /* alloc failure */
     memset(pDexFile, 0, sizeof(DexFile));

     /*
      * Peel off the optimized header.
      */
     if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
         magic = data;
         if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
             LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
                  magic[4], magic[5], magic[6], magic[7]);
             goto bail;
         }

         pDexFile->pOptHeader = (const DexOptHeader*) data;
         LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
             pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);

         /* locate some auxillary data tables */
         if (!parseAuxData(data, pDexFile))
             goto bail;

         /* ignore the opt header and appended data from here on out */
         data += pDexFile->pOptHeader->dexOffset;
         length -= pDexFile->pOptHeader->dexOffset;
         if (pDexFile->pOptHeader->dexLength > length) {
             LOGE("File truncated? stored len=%d, rem len=%d\n",
                 pDexFile->pOptHeader->dexLength, (int) length);
             goto bail;
         }
         length = pDexFile->pOptHeader->dexLength;
     }

     dexFileSetupBasicPointers(pDexFile, data);
     pHeader = pDexFile->pHeader;

     magic = pHeader->magic;
     if (memcmp(magic, DEX_MAGIC, 4) != 0) {
         /* not expected */
         LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
              magic[0], magic[1], magic[2], magic[3]);
         goto bail;
     }
     if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
         LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
              magic[4], magic[5], magic[6], magic[7]);
         goto bail;
     }

     /*
      * Verify the checksum(s).  This is reasonably quick, but does require
      * touching every byte in the DEX file.  The base checksum changes after
      * byte-swapping and DEX optimization.
      */
     if (flags & kDexParseVerifyChecksum) {
         u4 adler = dexComputeChecksum(pHeader);
         if (adler != pHeader->checksum) {
             LOGE("ERROR: bad checksum (%08x vs %08x)\n",
                 adler, pHeader->checksum);
             if (!(flags & kDexParseContinueOnError))
                 goto bail;
         } else {
             LOGV("+++ adler32 checksum (%08x) verified\n", adler);
         }

         const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
         if (pOptHeader != NULL) {
             adler = dexComputeOptChecksum(pOptHeader);
             if (adler != pOptHeader->checksum) {
                 LOGE("ERROR: bad opt checksum (%08x vs %08x)\n",
                     adler, pOptHeader->checksum);
                 if (!(flags & kDexParseContinueOnError))
                     goto bail;
             } else {
                 LOGV("+++ adler32 opt checksum (%08x) verified\n", adler);
             }
         }
     }

     /*
      * Verify the SHA-1 digest.  (Normally we don't want to do this --
      * the digest is used to uniquely identify the original DEX file, and
      * can't be computed for verification after the DEX is byte-swapped
      * and optimized.)
      */
     if (kVerifySignature) {
         unsigned char sha1Digest[kSHA1DigestLen];
         const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
                             kSHA1DigestLen;

         dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
         if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
             char tmpBuf1[kSHA1DigestOutputLen];
             char tmpBuf2[kSHA1DigestOutputLen];
             LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
                 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
                 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
             if (!(flags & kDexParseContinueOnError))
                 goto bail;
         } else {
             LOGV("+++ sha1 digest verified\n");
         }
     }

     if (pHeader->fileSize != length) {
         LOGE("ERROR: stored file size (%d) != expected (%d)\n",
             (int) pHeader->fileSize, (int) length);
         if (!(flags & kDexParseContinueOnError))
             goto bail;
     }

     if (pHeader->classDefsSize == 0) {
         LOGE("ERROR: DEX file has no classes in it, failing\n");
         goto bail;
     }

     /*
      * Success!
      */
     result = 0;

 bail:
     if (result != 0 && pDexFile != NULL) {
         dexFileFree(pDexFile);
         pDexFile = NULL;
     }
     return pDexFile;
 }

 /*
  * Free up the DexFile and any associated data structures.
  *
  * Note we may be called with a partially-initialized DexFile.
  */
 void dexFileFree(DexFile* pDexFile)
 {
     if (pDexFile == NULL)
         return;

     free(pDexFile);
 }

 /*
  * Look up a class definition entry by descriptor.
  *
  * "descriptor" should look like "Landroid/debug/Stuff;".
  */
 const DexClassDef* dexFindClass(const DexFile* pDexFile,
     const char* descriptor)
 {
     const DexClassLookup* pLookup = pDexFile->pClassLookup;
     u4 hash;
     int idx, mask;

     hash = classDescriptorHash(descriptor);
     mask = pLookup->numEntries - 1;
     idx = hash & mask;

     /*
      * Search until we find a matching entry or an empty slot.
      */
     while (true) {
         int offset;

         offset = pLookup->table[idx].classDescriptorOffset;
         if (offset == 0)
             return NULL;

         if (pLookup->table[idx].classDescriptorHash == hash) {
             const char* str;

             str = (const char*) (pDexFile->baseAddr + offset);
             if (strcmp(str, descriptor) == 0) {
                 return (const DexClassDef*)
                     (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
             }
         }

         idx = (idx + 1) & mask;
     }
 }


 /*
  * Compute the DEX file checksum for a memory-mapped DEX file.
  */
 u4 dexComputeChecksum(const DexHeader* pHeader)
 {
     const u1* start = (const u1*) pHeader;

     uLong adler = adler32(0L, Z_NULL, 0);
     const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);

     return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
 }

 /*
  * Compute the checksum on the data appended to the DEX file by dexopt.
  */
 static u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader)
 {
     const u1* start = (const u1*) pOptHeader + pOptHeader->depsOffset;
     const u1* end = (const u1*) pOptHeader +
         pOptHeader->auxOffset + pOptHeader->auxLength;

     uLong adler = adler32(0L, Z_NULL, 0);

     return (u4) adler32(adler, start, end - start);
 }


 /*
  * Compute the size, in bytes, of a DexCode.
  */
 size_t dexGetDexCodeSize(const DexCode* pCode)
 {
     /*
      * The catch handler data is the last entry.  It has a variable number
      * of variable-size pieces, so we need to create an iterator.
      */
     u4 handlersSize;
     u4 offset;
     u4 ui;

     if (pCode->triesSize != 0) {
         handlersSize = dexGetHandlersSize(pCode);
         offset = dexGetFirstHandlerOffset(pCode);
     } else {
         handlersSize = 0;
         offset = 0;
     }

     for (ui = 0; ui < handlersSize; ui++) {
         DexCatchIterator iterator;
         dexCatchIteratorInit(&iterator, pCode, offset);
         offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
     }

     const u1* handlerData = dexGetCatchHandlerData(pCode);

     //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n",
     //    pCode, handlerData, offset);

     /* return the size of the catch handler + everything before it */
     return (handlerData - (u1*) pCode) + offset;
 }


 /*
  * ===========================================================================
  *      Debug info
  * ===========================================================================
  */

 /*
  * Decode the arguments in a method signature, which looks something
  * like "(ID[Ljava/lang/String;)V".
  *
  * Returns the type signature letter for the next argument, or ')' if
  * there are no more args.  Advances "pSig" to point to the character
  * after the one returned.
  */
 static char decodeSignature(const char** pSig)
 {
     const char* sig = *pSig;

     if (*sig == '(')
         sig++;

     if (*sig == 'L') {
         /* object ref */
         while (*++sig != ';')
             ;
         *pSig = sig+1;
         return 'L';
     }
     if (*sig == '[') {
         /* array; advance past array type */
         while (*++sig == '[')
             ;
         if (*sig == 'L') {
             while (*++sig != ';')
                 ;
         }
         *pSig = sig+1;
         return '[';
     }
     if (*sig == '\0')
         return *sig;        /* don't advance further */

     *pSig = sig+1;
     return *sig;
 }

 /*
  * returns the length of a type string, given the start of the
  * type string. Used for the case where the debug info format
  * references types that are inside a method type signature.
  */
 static int typeLength (const char *type) {
     // Assumes any leading '(' has already been gobbled
     const char *end = type;
     decodeSignature(&end);
     return end - type;
 }

 /*
  * Reads a string index as encoded for the debug info format,
  * returning a string pointer or NULL as appropriate.
  */
 static const char* readStringIdx(const DexFile* pDexFile,
         const u1** pStream) {
     u4 stringIdx = readUnsignedLeb128(pStream);

     // Remember, encoded string indicies have 1 added to them.
     if (stringIdx == 0) {
         return NULL;
     } else {
         return dexStringById(pDexFile, stringIdx - 1);
     }
 }

 /*
  * Reads a type index as encoded for the debug info format, returning
  * a string pointer for its descriptor or NULL as appropriate.
  */
 static const char* readTypeIdx(const DexFile* pDexFile,
         const u1** pStream) {
     u4 typeIdx = readUnsignedLeb128(pStream);

     // Remember, encoded type indicies have 1 added to them.
     if (typeIdx == 0) {
         return NULL;
     } else {
         return dexStringByTypeIdx(pDexFile, typeIdx - 1);
     }
 }

 /* access_flag value indicating that a method is static */
 #define ACC_STATIC              0x0008

 typedef struct LocalInfo {
     const char *name;
     const char *descriptor;
     const char *signature;
     u2 startAddress;
     bool live;
 } LocalInfo;

 static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
         LocalInfo *localInReg, DexDebugNewLocalCb localCb)
 {
     if (localCb != NULL && localInReg[reg].live) {
         localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
                 localInReg[reg].name,
                 localInReg[reg].descriptor,
                 localInReg[reg].signature == NULL
                 ? "" : localInReg[reg].signature );
     }
 }

 // TODO optimize localCb == NULL case
 void dexDecodeDebugInfo(
             const DexFile* pDexFile,
             const DexCode* pCode,
             const char* classDescriptor,
             u4 protoIdx,
             u4 accessFlags,
             DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
             void* cnxt)
 {
     const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
     u4 line;
     u4 parametersSize;
     u4 address = 0;
     LocalInfo localInReg[pCode->registersSize];
     u4 insnsSize = pCode->insnsSize;
     DexProto proto = { pDexFile, protoIdx };

     memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);

     if (stream == NULL) {
         goto end;
     }

     line = readUnsignedLeb128(&stream);
     parametersSize = readUnsignedLeb128(&stream);

     u2 argReg = pCode->registersSize - pCode->insSize;

     if ((accessFlags & ACC_STATIC) == 0) {
         /*
          * The code is an instance method, which means that there is
          * an initial this parameter. Also, the proto list should
          * contain exactly one fewer argument word than the insSize
          * indicates.
          */
         assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
         localInReg[argReg].name = "this";
         localInReg[argReg].descriptor = classDescriptor;
         localInReg[argReg].startAddress = 0;
         localInReg[argReg].live = true;
         argReg++;
     } else {
         assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
     }

     DexParameterIterator iterator;
     dexParameterIteratorInit(&iterator, &proto);

     while (parametersSize-- != 0) {
         const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
         const char *name;
         int reg;

         if ((argReg >= pCode->registersSize) || (descriptor == NULL)) {
             goto invalid_stream;
         }

         name = readStringIdx(pDexFile, &stream);
         reg = argReg;

         switch (descriptor[0]) {
             case 'D':
             case 'J':
                 argReg += 2;
                 break;
             default:
                 argReg += 1;
                 break;
         }

         if (name != NULL) {
             localInReg[reg].name = name;
             localInReg[reg].descriptor = descriptor;
             localInReg[reg].signature = NULL;
             localInReg[reg].startAddress = address;
             localInReg[reg].live = true;
         }
     }

     for (;;)  {
         u1 opcode = *stream++;
         u2 reg;

         switch (opcode) {
             case DBG_END_SEQUENCE:
                 goto end;

             case DBG_ADVANCE_PC:
                 address += readUnsignedLeb128(&stream);
                 break;

             case DBG_ADVANCE_LINE:
                 line += readSignedLeb128(&stream);
                 break;

             case DBG_START_LOCAL:
             case DBG_START_LOCAL_EXTENDED:
                 reg = readUnsignedLeb128(&stream);
                 if (reg > pCode->registersSize) goto invalid_stream;

                 // Emit what was previously there, if anything
                 emitLocalCbIfLive (cnxt, reg, address,
                     localInReg, localCb);

                 localInReg[reg].name = readStringIdx(pDexFile, &stream);
                 localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
                 if (opcode == DBG_START_LOCAL_EXTENDED) {
                     localInReg[reg].signature
                         = readStringIdx(pDexFile, &stream);
                 } else {
                     localInReg[reg].signature = NULL;
                 }
                 localInReg[reg].startAddress = address;
                 localInReg[reg].live = true;
                 break;

             case DBG_END_LOCAL:
                 reg = readUnsignedLeb128(&stream);
                 if (reg > pCode->registersSize) goto invalid_stream;

                 emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
                 localInReg[reg].live = false;
                 break;

             case DBG_RESTART_LOCAL:
                 reg = readUnsignedLeb128(&stream);
                 if (reg > pCode->registersSize) goto invalid_stream;

                 if (localInReg[reg].name == NULL
                         || localInReg[reg].descriptor == NULL) {
                     goto invalid_stream;
                 }

                 /*
                  * If the register is live, the "restart" is superfluous,
                  * and we don't want to mess with the existing start address.
                  */
                 if (!localInReg[reg].live) {
                     localInReg[reg].startAddress = address;
                     localInReg[reg].live = true;
                 }
                 break;

             case DBG_SET_PROLOGUE_END:
             case DBG_SET_EPILOGUE_BEGIN:
             case DBG_SET_FILE:
                 break;

             default: {
                 int adjopcode = opcode - DBG_FIRST_SPECIAL;

                 address += adjopcode / DBG_LINE_RANGE;
                 line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);

                 if (posCb != NULL) {
                     int done;
                     done = posCb(cnxt, address, line);

                     if (done) {
                         // early exit
                         goto end;
                     }
                 }
                 break;
             }
         }
     }

 end:
     {
         int reg;
         for (reg = 0; reg < pCode->registersSize; reg++) {
             emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
         }
     }
     return;

 invalid_stream:
     IF_LOGE() {
         char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
         LOGE("Invalid debug info stream. class %s; proto %s",
                 classDescriptor, methodDescriptor);
         free(methodDescriptor);
     }
 }