blob: b139746e01c9aa80c28dda37abb4e503c002774b [file] [log] [blame]
/*
* Copyright (C) 2008 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Access the contents of a .dex file.
*/
#include "DexFile.h"
#include "DexProto.h"
#include "DexCatch.h"
#include "Leb128.h"
#include "sha1.h"
#include "ZipArchive.h"
#include <zlib.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
// fwd
static u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader);
/*
* Verifying checksums is good, but it slows things down and causes us to
* touch every page. In the "optimized" world, it doesn't work at all,
* because we rewrite the contents.
*/
static const bool kVerifyChecksum = false;
static const bool kVerifySignature = false;
/* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
* code point values for comparison. This treats different encodings
* for the same code point as equivalent, except that only a real '\0'
* byte is considered the string terminator. The return value is as
* for strcmp(). */
int dexUtf8Cmp(const char* s1, const char* s2) {
for (;;) {
if (*s1 == '\0') {
if (*s2 == '\0') {
return 0;
}
return -1;
} else if (*s2 == '\0') {
return 1;
}
int utf1 = dexGetUtf16FromUtf8(&s1);
int utf2 = dexGetUtf16FromUtf8(&s2);
int diff = utf1 - utf2;
if (diff != 0) {
return diff;
}
}
}
/* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
0x00000000, // 00..1f low control characters; nothing valid
0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z'
};
/* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
/*
* It's a multibyte encoded character. Decode it and analyze. We
* accept anything that isn't (a) an improperly encoded low value,
* (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
* control character, or (e) a high space, layout, or special
* character (U+00a0, U+2000..U+200f, U+2028..U+202f,
* U+fff0..U+ffff).
*/
u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
// Perform follow-up tests based on the high 8 bits.
switch (utf16 >> 8) {
case 0x00: {
// It's only valid if it's above the ISO-8859-1 high space (0xa0).
return (utf16 > 0x00a0);
}
case 0xd8:
case 0xd9:
case 0xda:
case 0xdb: {
/*
* It's a leading surrogate. Check to see that a trailing
* surrogate follows.
*/
utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
}
case 0xdc:
case 0xdd:
case 0xde:
case 0xdf: {
// It's a trailing surrogate, which is not valid at this point.
return false;
}
case 0x20:
case 0xff: {
// It's in the range that has spaces, controls, and specials.
switch (utf16 & 0xfff8) {
case 0x2000:
case 0x2008:
case 0x2028:
case 0xfff0:
case 0xfff8: {
return false;
}
}
break;
}
}
return true;
}
/* Return whether the given string is a valid field or method name. */
bool dexIsValidMemberName(const char* s) {
bool angleName = false;
switch (*s) {
case '\0': {
// The empty string is not a valid name.
return false;
}
case '<': {
/*
* '<' is allowed only at the start of a name, and if present,
* means that the name must end with '>'.
*/
angleName = true;
s++;
break;
}
}
for (;;) {
switch (*s) {
case '\0': {
return !angleName;
}
case '>': {
return angleName && s[1] == '\0';
}
}
if (!dexIsValidMemberNameUtf8(&s)) {
return false;
}
}
}
/* Return whether the given string is a valid type descriptor. */
bool dexIsValidTypeDescriptor(const char* s) {
int arrayCount = 0;
while (*s == '[') {
arrayCount++;
s++;
}
if (arrayCount > 255) {
// Arrays may have no more than 255 dimensions.
return false;
}
switch (*(s++)) {
case 'B':
case 'C':
case 'D':
case 'F':
case 'I':
case 'J':
case 'S':
case 'Z': {
// These are all single-character descriptors for primitive types.
return (*s == '\0');
}
case 'V': {
// You can't have an array of void.
return (arrayCount == 0) && (*s == '\0');
}
case 'L': {
// Break out and continue below.
break;
}
default: {
// Oddball descriptor character.
return false;
}
}
// We just consumed the 'L' that introduces a class name.
bool slashOrFirst = true; // first character or just encountered a slash
for (;;) {
u1 c = (u1) *s;
switch (c) {
case '\0': {
// Premature end.
return false;
}
case ';': {
/*
* Make sure that this is the end of the string and that
* it doesn't end with an empty component (including the
* degenerate case of "L;").
*/
return (s[1] == '\0') && !slashOrFirst;
}
case '/': {
if (slashOrFirst) {
// Slash at start or two slashes in a row.
return false;
}
slashOrFirst = true;
s++;
break;
}
default: {
if (!dexIsValidMemberNameUtf8(&s)) {
return false;
}
slashOrFirst = false;
break;
}
}
}
}
/* Return whether the given string is a valid reference descriptor. This
* is true if dexIsValidTypeDescriptor() returns true and the descriptor
* is for a class or array and not a primitive type. */
bool dexIsReferenceDescriptor(const char* s) {
if (!dexIsValidTypeDescriptor(s)) {
return false;
}
return (s[0] == 'L') || (s[0] == '[');
}
/* Return whether the given string is a valid class descriptor. This
* is true if dexIsValidTypeDescriptor() returns true and the descriptor
* is for a class and not an array or primitive type. */
bool dexIsClassDescriptor(const char* s) {
if (!dexIsValidTypeDescriptor(s)) {
return false;
}
return s[0] == 'L';
}
/* Return whether the given string is a valid field type descriptor. This
* is true if dexIsValidTypeDescriptor() returns true and the descriptor
* is for anything but "void". */
bool dexIsFieldDescriptor(const char* s) {
if (!dexIsValidTypeDescriptor(s)) {
return false;
}
return s[0] != 'V';
}
/* Return the UTF-8 encoded string with the specified string_id index,
* also filling in the UTF-16 size (number of 16-bit code points).*/
const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
u4* utf16Size) {
const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
*utf16Size = readUnsignedLeb128(&ptr);
return (const char*) ptr;
}
/*
* Format an SHA-1 digest for printing. tmpBuf must be able to hold at
* least kSHA1DigestOutputLen bytes.
*/
const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
/*
* Compute a SHA-1 digest on a range of bytes.
*/
static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
unsigned char digest[])
{
SHA1_CTX context;
SHA1Init(&context);
SHA1Update(&context, data, length);
SHA1Final(digest, &context);
}
/*
* Format the SHA-1 digest into the buffer, which must be able to hold at
* least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer,
*/
static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
{
static const char hexDigit[] = "0123456789abcdef";
char* cp;
int i;
cp = tmpBuf;
for (i = 0; i < kSHA1DigestLen; i++) {
*cp++ = hexDigit[digest[i] >> 4];
*cp++ = hexDigit[digest[i] & 0x0f];
}
*cp++ = '\0';
assert(cp == tmpBuf + kSHA1DigestOutputLen);
return tmpBuf;
}
/*
* Compute a hash code on a UTF-8 string, for use with internal hash tables.
*
* This may or may not be compatible with UTF-8 hash functions used inside
* the Dalvik VM.
*
* The basic "multiply by 31 and add" approach does better on class names
* than most other things tried (e.g. adler32).
*/
static u4 classDescriptorHash(const char* str)
{
u4 hash = 1;
while (*str != '\0')
hash = hash * 31 + *str++;
return hash;
}
/*
* Add an entry to the class lookup table. We hash the string and probe
* until we find an open slot.
*/
static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
int stringOff, int classDefOff, int* pNumProbes)
{
const char* classDescriptor =
(const char*) (pDexFile->baseAddr + stringOff);
const DexClassDef* pClassDef =
(const DexClassDef*) (pDexFile->baseAddr + classDefOff);
u4 hash = classDescriptorHash(classDescriptor);
int mask = pLookup->numEntries-1;
int idx = hash & mask;
/*
* Find the first empty slot. We oversized the table, so this is
* guaranteed to finish.
*/
int probes = 0;
while (pLookup->table[idx].classDescriptorOffset != 0) {
idx = (idx + 1) & mask;
probes++;
}
//if (probes > 1)
// LOGW("classLookupAdd: probes=%d\n", probes);
pLookup->table[idx].classDescriptorHash = hash;
pLookup->table[idx].classDescriptorOffset = stringOff;
pLookup->table[idx].classDefOffset = classDefOff;
*pNumProbes = probes;
}
/*
* Round up to the next highest power of 2.
*
* Found on http://graphics.stanford.edu/~seander/bithacks.html.
*/
u4 dexRoundUpPower2(u4 val)
{
val--;
val |= val >> 1;
val |= val >> 2;
val |= val >> 4;
val |= val >> 8;
val |= val >> 16;
val++;
return val;
}
/*
* Create the class lookup hash table.
*
* Returns newly-allocated storage.
*/
DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
{
DexClassLookup* pLookup;
int allocSize;
int i, numEntries;
int numProbes, totalProbes, maxProbes;
numProbes = totalProbes = maxProbes = 0;
assert(pDexFile != NULL);
/*
* Using a factor of 3 results in far less probing than a factor of 2,
* but almost doubles the flash storage requirements for the bootstrap
* DEX files. The overall impact on class loading performance seems
* to be minor. We could probably get some performance improvement by
* using a secondary hash.
*/
numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
allocSize = offsetof(DexClassLookup, table)
+ numEntries * sizeof(pLookup->table[0]);
pLookup = (DexClassLookup*) calloc(1, allocSize);
if (pLookup == NULL)
return NULL;
pLookup->size = allocSize;
pLookup->numEntries = numEntries;
for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
const DexClassDef* pClassDef;
const char* pString;
pClassDef = dexGetClassDef(pDexFile, i);
pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
classLookupAdd(pDexFile, pLookup,
(u1*)pString - pDexFile->baseAddr,
(u1*)pClassDef - pDexFile->baseAddr, &numProbes);
if (numProbes > maxProbes)
maxProbes = numProbes;
totalProbes += numProbes;
}
LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
" total=%d max=%d\n",
pDexFile->pHeader->classDefsSize, numEntries,
(100 * pDexFile->pHeader->classDefsSize) / numEntries,
allocSize, totalProbes, maxProbes);
return pLookup;
}
/*
* Set up the basic raw data pointers of a DexFile. This function isn't
* meant for general use.
*/
void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
DexHeader *pHeader = (DexHeader*) data;
pDexFile->baseAddr = data;
pDexFile->pHeader = pHeader;
pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
}
/*
* Parse out an index map entry, advancing "*pData" and reducing "*pSize".
*/
static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding,
u4* pFullCount, u4* pReducedCount, const u2** pMap)
{
const u4* wordPtr = (const u4*) *pData;
u4 size = *pSize;
u4 mapCount;
if (expanding) {
if (size < 4)
return false;
mapCount = *pReducedCount = *wordPtr++;
*pFullCount = (u4) -1;
size -= sizeof(u4);
} else {
if (size < 8)
return false;
mapCount = *pFullCount = *wordPtr++;
*pReducedCount = *wordPtr++;
size -= sizeof(u4) * 2;
}
u4 mapSize = mapCount * sizeof(u2);
if (size < mapSize)
return false;
*pMap = (const u2*) wordPtr;
size -= mapSize;
/* advance the pointer */
const u1* ptr = (const u1*) wordPtr;
ptr += (mapSize + 3) & ~0x3;
/* update pass-by-reference values */
*pData = (const u1*) ptr;
*pSize = size;
return true;
}
/*
* Set up some pointers into the mapped data.
*
* See analysis/ReduceConstants.c for the data layout description.
*/
static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size,
bool expanding)
{
if (!parseIndexMapEntry(&data, &size, expanding,
&pDexFile->indexMap.classFullCount,
&pDexFile->indexMap.classReducedCount,
&pDexFile->indexMap.classMap))
{
return false;
}
if (!parseIndexMapEntry(&data, &size, expanding,
&pDexFile->indexMap.methodFullCount,
&pDexFile->indexMap.methodReducedCount,
&pDexFile->indexMap.methodMap))
{
return false;
}
if (!parseIndexMapEntry(&data, &size, expanding,
&pDexFile->indexMap.fieldFullCount,
&pDexFile->indexMap.fieldReducedCount,
&pDexFile->indexMap.fieldMap))
{
return false;
}
if (!parseIndexMapEntry(&data, &size, expanding,
&pDexFile->indexMap.stringFullCount,
&pDexFile->indexMap.stringReducedCount,
&pDexFile->indexMap.stringMap))
{
return false;
}
if (expanding) {
/*
* The map includes the "reduced" counts; pull the original counts
* out of the DexFile so that code has a consistent source.
*/
assert(pDexFile->indexMap.classFullCount == (u4) -1);
assert(pDexFile->indexMap.methodFullCount == (u4) -1);
assert(pDexFile->indexMap.fieldFullCount == (u4) -1);
assert(pDexFile->indexMap.stringFullCount == (u4) -1);
#if 0 // TODO: not available yet -- do later or just skip this
pDexFile->indexMap.classFullCount =
pDexFile->pHeader->typeIdsSize;
pDexFile->indexMap.methodFullCount =
pDexFile->pHeader->methodIdsSize;
pDexFile->indexMap.fieldFullCount =
pDexFile->pHeader->fieldIdsSize;
pDexFile->indexMap.stringFullCount =
pDexFile->pHeader->stringIdsSize;
#endif
}
LOGI("Class : %u %u %u\n",
pDexFile->indexMap.classFullCount,
pDexFile->indexMap.classReducedCount,
pDexFile->indexMap.classMap[0]);
LOGI("Method: %u %u %u\n",
pDexFile->indexMap.methodFullCount,
pDexFile->indexMap.methodReducedCount,
pDexFile->indexMap.methodMap[0]);
LOGI("Field : %u %u %u\n",
pDexFile->indexMap.fieldFullCount,
pDexFile->indexMap.fieldReducedCount,
pDexFile->indexMap.fieldMap[0]);
LOGI("String: %u %u %u\n",
pDexFile->indexMap.stringFullCount,
pDexFile->indexMap.stringReducedCount,
pDexFile->indexMap.stringMap[0]);
return true;
}
/*
* Parse some auxillary data tables.
*
* v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup
* table. Subsequent versions switched to the "chunk" format.
*/
static bool parseAuxData(const u1* data, DexFile* pDexFile)
{
const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset);
u4 indexMapType = 0;
/* v1.0 format? */
if (*pAux == 0) {
LOGV("+++ found OLD dex format\n");
pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1);
return true;
}
LOGV("+++ found NEW dex format\n");
/* process chunks until we see the end marker */
while (*pAux != kDexChunkEnd) {
u4 size = *(pAux+1);
u1* data = (u1*) (pAux + 2);
switch (*pAux) {
case kDexChunkClassLookup:
pDexFile->pClassLookup = (const DexClassLookup*) data;
break;
case kDexChunkReducingIndexMap:
LOGI("+++ found reducing index map, size=%u\n", size);
if (!parseIndexMap(pDexFile, data, size, false)) {
LOGE("Failed parsing reducing index map\n");
return false;
}
indexMapType = *pAux;
break;
case kDexChunkExpandingIndexMap:
LOGI("+++ found expanding index map, size=%u\n", size);
if (!parseIndexMap(pDexFile, data, size, true)) {
LOGE("Failed parsing expanding index map\n");
return false;
}
indexMapType = *pAux;
break;
case kDexChunkRegisterMaps:
LOGV("+++ found register maps, size=%u\n", size);
pDexFile->pRegisterMapPool = data;
break;
default:
LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n",
*pAux,
(char) ((*pAux) >> 24), (char) ((*pAux) >> 16),
(char) ((*pAux) >> 8), (char) (*pAux),
size);
break;
}
/*
* Advance pointer, padding to 64-bit boundary. The extra "+8" is
* for the type/size header.
*/
size = (size + 8 + 7) & ~7;
pAux += size / sizeof(u4);
}
#if 0 // TODO: propagate expected map type from the VM through the API
/*
* If we're configured to expect an index map, and we don't find one,
* reject this DEX so we'll regenerate it. Also, if we found an
* "expanding" map but we're not configured to use it, we have to fail
* because the constants aren't usable without translation.
*/
if (indexMapType != expectedIndexMapType) {
LOGW("Incompatible index map configuration: found 0x%04x, need %d\n",
indexMapType, DVM_REDUCE_CONSTANTS);
return false;
}
#endif
return true;
}
/*
* Parse an optimized or unoptimized .dex file sitting in memory. This is
* called after the byte-ordering and structure alignment has been fixed up.
*
* On success, return a newly-allocated DexFile.
*/
DexFile* dexFileParse(const u1* data, size_t length, int flags)
{
DexFile* pDexFile = NULL;
const DexHeader* pHeader;
const u1* magic;
int result = -1;
if (length < sizeof(DexHeader)) {
LOGE("too short to be a valid .dex\n");
goto bail; /* bad file format */
}
pDexFile = (DexFile*) malloc(sizeof(DexFile));
if (pDexFile == NULL)
goto bail; /* alloc failure */
memset(pDexFile, 0, sizeof(DexFile));
/*
* Peel off the optimized header.
*/
if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
magic = data;
if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
magic[4], magic[5], magic[6], magic[7]);
goto bail;
}
pDexFile->pOptHeader = (const DexOptHeader*) data;
LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
/* locate some auxillary data tables */
if (!parseAuxData(data, pDexFile))
goto bail;
/* ignore the opt header and appended data from here on out */
data += pDexFile->pOptHeader->dexOffset;
length -= pDexFile->pOptHeader->dexOffset;
if (pDexFile->pOptHeader->dexLength > length) {
LOGE("File truncated? stored len=%d, rem len=%d\n",
pDexFile->pOptHeader->dexLength, (int) length);
goto bail;
}
length = pDexFile->pOptHeader->dexLength;
}
dexFileSetupBasicPointers(pDexFile, data);
pHeader = pDexFile->pHeader;
magic = pHeader->magic;
if (memcmp(magic, DEX_MAGIC, 4) != 0) {
/* not expected */
LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
magic[0], magic[1], magic[2], magic[3]);
goto bail;
}
if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
magic[4], magic[5], magic[6], magic[7]);
goto bail;
}
/*
* Verify the checksum(s). This is reasonably quick, but does require
* touching every byte in the DEX file. The base checksum changes after
* byte-swapping and DEX optimization.
*/
if (flags & kDexParseVerifyChecksum) {
u4 adler = dexComputeChecksum(pHeader);
if (adler != pHeader->checksum) {
LOGE("ERROR: bad checksum (%08x vs %08x)\n",
adler, pHeader->checksum);
if (!(flags & kDexParseContinueOnError))
goto bail;
} else {
LOGV("+++ adler32 checksum (%08x) verified\n", adler);
}
const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
if (pOptHeader != NULL) {
adler = dexComputeOptChecksum(pOptHeader);
if (adler != pOptHeader->checksum) {
LOGE("ERROR: bad opt checksum (%08x vs %08x)\n",
adler, pOptHeader->checksum);
if (!(flags & kDexParseContinueOnError))
goto bail;
} else {
LOGV("+++ adler32 opt checksum (%08x) verified\n", adler);
}
}
}
/*
* Verify the SHA-1 digest. (Normally we don't want to do this --
* the digest is used to uniquely identify the original DEX file, and
* can't be computed for verification after the DEX is byte-swapped
* and optimized.)
*/
if (kVerifySignature) {
unsigned char sha1Digest[kSHA1DigestLen];
const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
kSHA1DigestLen;
dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
char tmpBuf1[kSHA1DigestOutputLen];
char tmpBuf2[kSHA1DigestOutputLen];
LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
dexSHA1DigestToStr(sha1Digest, tmpBuf1),
dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
if (!(flags & kDexParseContinueOnError))
goto bail;
} else {
LOGV("+++ sha1 digest verified\n");
}
}
if (pHeader->fileSize != length) {
LOGE("ERROR: stored file size (%d) != expected (%d)\n",
(int) pHeader->fileSize, (int) length);
if (!(flags & kDexParseContinueOnError))
goto bail;
}
if (pHeader->classDefsSize == 0) {
LOGE("ERROR: DEX file has no classes in it, failing\n");
goto bail;
}
/*
* Success!
*/
result = 0;
bail:
if (result != 0 && pDexFile != NULL) {
dexFileFree(pDexFile);
pDexFile = NULL;
}
return pDexFile;
}
/*
* Free up the DexFile and any associated data structures.
*
* Note we may be called with a partially-initialized DexFile.
*/
void dexFileFree(DexFile* pDexFile)
{
if (pDexFile == NULL)
return;
free(pDexFile);
}
/*
* Look up a class definition entry by descriptor.
*
* "descriptor" should look like "Landroid/debug/Stuff;".
*/
const DexClassDef* dexFindClass(const DexFile* pDexFile,
const char* descriptor)
{
const DexClassLookup* pLookup = pDexFile->pClassLookup;
u4 hash;
int idx, mask;
hash = classDescriptorHash(descriptor);
mask = pLookup->numEntries - 1;
idx = hash & mask;
/*
* Search until we find a matching entry or an empty slot.
*/
while (true) {
int offset;
offset = pLookup->table[idx].classDescriptorOffset;
if (offset == 0)
return NULL;
if (pLookup->table[idx].classDescriptorHash == hash) {
const char* str;
str = (const char*) (pDexFile->baseAddr + offset);
if (strcmp(str, descriptor) == 0) {
return (const DexClassDef*)
(pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
}
}
idx = (idx + 1) & mask;
}
}
/*
* Compute the DEX file checksum for a memory-mapped DEX file.
*/
u4 dexComputeChecksum(const DexHeader* pHeader)
{
const u1* start = (const u1*) pHeader;
uLong adler = adler32(0L, Z_NULL, 0);
const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
}
/*
* Compute the checksum on the data appended to the DEX file by dexopt.
*/
static u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader)
{
const u1* start = (const u1*) pOptHeader + pOptHeader->depsOffset;
const u1* end = (const u1*) pOptHeader +
pOptHeader->auxOffset + pOptHeader->auxLength;
uLong adler = adler32(0L, Z_NULL, 0);
return (u4) adler32(adler, start, end - start);
}
/*
* Compute the size, in bytes, of a DexCode.
*/
size_t dexGetDexCodeSize(const DexCode* pCode)
{
/*
* The catch handler data is the last entry. It has a variable number
* of variable-size pieces, so we need to create an iterator.
*/
u4 handlersSize;
u4 offset;
u4 ui;
if (pCode->triesSize != 0) {
handlersSize = dexGetHandlersSize(pCode);
offset = dexGetFirstHandlerOffset(pCode);
} else {
handlersSize = 0;
offset = 0;
}
for (ui = 0; ui < handlersSize; ui++) {
DexCatchIterator iterator;
dexCatchIteratorInit(&iterator, pCode, offset);
offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
}
const u1* handlerData = dexGetCatchHandlerData(pCode);
//LOGD("+++ pCode=%p handlerData=%p last offset=%d\n",
// pCode, handlerData, offset);
/* return the size of the catch handler + everything before it */
return (handlerData - (u1*) pCode) + offset;
}
/*
* ===========================================================================
* Debug info
* ===========================================================================
*/
/*
* Decode the arguments in a method signature, which looks something
* like "(ID[Ljava/lang/String;)V".
*
* Returns the type signature letter for the next argument, or ')' if
* there are no more args. Advances "pSig" to point to the character
* after the one returned.
*/
static char decodeSignature(const char** pSig)
{
const char* sig = *pSig;
if (*sig == '(')
sig++;
if (*sig == 'L') {
/* object ref */
while (*++sig != ';')
;
*pSig = sig+1;
return 'L';
}
if (*sig == '[') {
/* array; advance past array type */
while (*++sig == '[')
;
if (*sig == 'L') {
while (*++sig != ';')
;
}
*pSig = sig+1;
return '[';
}
if (*sig == '\0')
return *sig; /* don't advance further */
*pSig = sig+1;
return *sig;
}
/*
* returns the length of a type string, given the start of the
* type string. Used for the case where the debug info format
* references types that are inside a method type signature.
*/
static int typeLength (const char *type) {
// Assumes any leading '(' has already been gobbled
const char *end = type;
decodeSignature(&end);
return end - type;
}
/*
* Reads a string index as encoded for the debug info format,
* returning a string pointer or NULL as appropriate.
*/
static const char* readStringIdx(const DexFile* pDexFile,
const u1** pStream) {
u4 stringIdx = readUnsignedLeb128(pStream);
// Remember, encoded string indicies have 1 added to them.
if (stringIdx == 0) {
return NULL;
} else {
return dexStringById(pDexFile, stringIdx - 1);
}
}
/*
* Reads a type index as encoded for the debug info format, returning
* a string pointer for its descriptor or NULL as appropriate.
*/
static const char* readTypeIdx(const DexFile* pDexFile,
const u1** pStream) {
u4 typeIdx = readUnsignedLeb128(pStream);
// Remember, encoded type indicies have 1 added to them.
if (typeIdx == 0) {
return NULL;
} else {
return dexStringByTypeIdx(pDexFile, typeIdx - 1);
}
}
/* access_flag value indicating that a method is static */
#define ACC_STATIC 0x0008
typedef struct LocalInfo {
const char *name;
const char *descriptor;
const char *signature;
u2 startAddress;
bool live;
} LocalInfo;
static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
LocalInfo *localInReg, DexDebugNewLocalCb localCb)
{
if (localCb != NULL && localInReg[reg].live) {
localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
localInReg[reg].name,
localInReg[reg].descriptor,
localInReg[reg].signature == NULL
? "" : localInReg[reg].signature );
}
}
// TODO optimize localCb == NULL case
void dexDecodeDebugInfo(
const DexFile* pDexFile,
const DexCode* pCode,
const char* classDescriptor,
u4 protoIdx,
u4 accessFlags,
DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
void* cnxt)
{
const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
u4 line;
u4 parametersSize;
u4 address = 0;
LocalInfo localInReg[pCode->registersSize];
u4 insnsSize = pCode->insnsSize;
DexProto proto = { pDexFile, protoIdx };
memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);
if (stream == NULL) {
goto end;
}
line = readUnsignedLeb128(&stream);
parametersSize = readUnsignedLeb128(&stream);
u2 argReg = pCode->registersSize - pCode->insSize;
if ((accessFlags & ACC_STATIC) == 0) {
/*
* The code is an instance method, which means that there is
* an initial this parameter. Also, the proto list should
* contain exactly one fewer argument word than the insSize
* indicates.
*/
assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
localInReg[argReg].name = "this";
localInReg[argReg].descriptor = classDescriptor;
localInReg[argReg].startAddress = 0;
localInReg[argReg].live = true;
argReg++;
} else {
assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
}
DexParameterIterator iterator;
dexParameterIteratorInit(&iterator, &proto);
while (parametersSize-- != 0) {
const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
const char *name;
int reg;
if ((argReg >= pCode->registersSize) || (descriptor == NULL)) {
goto invalid_stream;
}
name = readStringIdx(pDexFile, &stream);
reg = argReg;
switch (descriptor[0]) {
case 'D':
case 'J':
argReg += 2;
break;
default:
argReg += 1;
break;
}
if (name != NULL) {
localInReg[reg].name = name;
localInReg[reg].descriptor = descriptor;
localInReg[reg].signature = NULL;
localInReg[reg].startAddress = address;
localInReg[reg].live = true;
}
}
for (;;) {
u1 opcode = *stream++;
u2 reg;
switch (opcode) {
case DBG_END_SEQUENCE:
goto end;
case DBG_ADVANCE_PC:
address += readUnsignedLeb128(&stream);
break;
case DBG_ADVANCE_LINE:
line += readSignedLeb128(&stream);
break;
case DBG_START_LOCAL:
case DBG_START_LOCAL_EXTENDED:
reg = readUnsignedLeb128(&stream);
if (reg > pCode->registersSize) goto invalid_stream;
// Emit what was previously there, if anything
emitLocalCbIfLive (cnxt, reg, address,
localInReg, localCb);
localInReg[reg].name = readStringIdx(pDexFile, &stream);
localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
if (opcode == DBG_START_LOCAL_EXTENDED) {
localInReg[reg].signature
= readStringIdx(pDexFile, &stream);
} else {
localInReg[reg].signature = NULL;
}
localInReg[reg].startAddress = address;
localInReg[reg].live = true;
break;
case DBG_END_LOCAL:
reg = readUnsignedLeb128(&stream);
if (reg > pCode->registersSize) goto invalid_stream;
emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
localInReg[reg].live = false;
break;
case DBG_RESTART_LOCAL:
reg = readUnsignedLeb128(&stream);
if (reg > pCode->registersSize) goto invalid_stream;
if (localInReg[reg].name == NULL
|| localInReg[reg].descriptor == NULL) {
goto invalid_stream;
}
/*
* If the register is live, the "restart" is superfluous,
* and we don't want to mess with the existing start address.
*/
if (!localInReg[reg].live) {
localInReg[reg].startAddress = address;
localInReg[reg].live = true;
}
break;
case DBG_SET_PROLOGUE_END:
case DBG_SET_EPILOGUE_BEGIN:
case DBG_SET_FILE:
break;
default: {
int adjopcode = opcode - DBG_FIRST_SPECIAL;
address += adjopcode / DBG_LINE_RANGE;
line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
if (posCb != NULL) {
int done;
done = posCb(cnxt, address, line);
if (done) {
// early exit
goto end;
}
}
break;
}
}
}
end:
{
int reg;
for (reg = 0; reg < pCode->registersSize; reg++) {
emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
}
}
return;
invalid_stream:
IF_LOGE() {
char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
LOGE("Invalid debug info stream. class %s; proto %s",
classDescriptor, methodDescriptor);
free(methodDescriptor);
}
}