blob: c490691dbfe4b65780e53f35cde5d79320f16183 [file] [log] [blame]
/*
* Copyright (C) 2008 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Dalvik classfile verification. This file contains the verifier entry
* points and the static constraint checks.
*/
#include "Dalvik.h"
#include "analysis/CodeVerify.h"
/* fwd */
static bool verifyMethod(Method* meth, int verifyFlags);
static bool verifyInstructions(const Method* meth, InsnFlags* insnFlags,
int verifyFlags);
/*
* Initialize some things we need for verification.
*/
bool dvmVerificationStartup(void)
{
gDvm.instrWidth = dexCreateInstrWidthTable();
gDvm.instrFormat = dexCreateInstrFormatTable();
gDvm.instrFlags = dexCreateInstrFlagsTable();
if (gDvm.instrWidth == NULL || gDvm.instrFormat == NULL ||
gDvm.instrFlags == NULL)
{
LOGE("Unable to create instruction tables\n");
return false;
}
return true;
}
/*
* Free up some things we needed for verification.
*/
void dvmVerificationShutdown(void)
{
free(gDvm.instrWidth);
free(gDvm.instrFormat);
free(gDvm.instrFlags);
}
/*
* Induce verification on all classes loaded from this DEX file as part
* of pre-verification and optimization. This is never called from a
* normally running VM.
*
* Returns "true" when all classes have been processed.
*/
bool dvmVerifyAllClasses(DexFile* pDexFile)
{
u4 count = pDexFile->pHeader->classDefsSize;
u4 idx;
assert(gDvm.optimizing);
if (gDvm.classVerifyMode == VERIFY_MODE_NONE) {
LOGV("+++ verification is disabled, skipping all classes\n");
return true;
}
if (gDvm.classVerifyMode == VERIFY_MODE_REMOTE &&
gDvm.optimizingBootstrapClass)
{
LOGV("+++ verification disabled for bootstrap classes\n");
return true;
}
for (idx = 0; idx < count; idx++) {
const DexClassDef* pClassDef;
const char* classDescriptor;
ClassObject* clazz;
pClassDef = dexGetClassDef(pDexFile, idx);
classDescriptor = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
/* all classes are loaded into the bootstrap class loader */
clazz = dvmLookupClass(classDescriptor, NULL, false);
if (clazz != NULL) {
if (clazz->pDvmDex->pDexFile != pDexFile) {
LOGD("DexOpt: not verifying '%s': multiple definitions\n",
classDescriptor);
} else {
if (dvmVerifyClass(clazz, VERIFY_DEFAULT)) {
assert((clazz->accessFlags & JAVA_FLAGS_MASK) ==
pClassDef->accessFlags);
((DexClassDef*)pClassDef)->accessFlags |=
CLASS_ISPREVERIFIED;
}
/* keep going even if one fails */
}
} else {
LOGV("DexOpt: +++ not verifying '%s'\n", classDescriptor);
}
}
return true;
}
/*
* Verify a class.
*
* By the time we get here, the value of gDvm.classVerifyMode should already
* have been factored in. If you want to call into the verifier even
* though verification is disabled, that's your business.
*
* Returns "true" on success.
*/
bool dvmVerifyClass(ClassObject* clazz, int verifyFlags)
{
int i;
if (dvmIsClassVerified(clazz)) {
LOGD("Ignoring duplicate verify attempt on %s\n", clazz->descriptor);
return true;
}
//LOGI("Verify1 '%s'\n", clazz->descriptor);
// TODO - verify class structure in DEX?
for (i = 0; i < clazz->directMethodCount; i++) {
if (!verifyMethod(&clazz->directMethods[i], verifyFlags)) {
LOG_VFY("Verifier rejected class %s\n", clazz->descriptor);
return false;
}
}
for (i = 0; i < clazz->virtualMethodCount; i++) {
if (!verifyMethod(&clazz->virtualMethods[i], verifyFlags)) {
LOG_VFY("Verifier rejected class %s\n", clazz->descriptor);
return false;
}
}
return true;
}
/*
* Temporarily "undo" any breakpoints found in this method. There is no risk
* of confusing the interpreter, because unverified code cannot be executed.
*
* Breakpoints can be set after a class is loaded but before it has been
* verified.
*
* The "breakpoint" opcode can replace any other opcode, leaving no
* indication of the original instruction's width or purpose in the
* instruction stream. We either have to quietly undo the breakpoints
* before verification, or look up the original opcode whenever we need it.
* The latter is more efficient since we only slow down on code that
* actually has breakpoints, but it requires explicit handling in every
* function that examines the instruction stream.
*
* We need to ensure that the debugger doesn't insert any additional
* breakpoints while we work. This either requires holding a lock on the
* breakpoint set throughout the verification of this method, or adding a
* "do not touch anything on these pages" list to the set. Either way,
* the caller of this method must ensure that it calls "redo" to release
* state.
*
* A debugger could connect while we work, so we return without doing
* anything if a debugger doesn't happen to be connected now. We can only
* avoid doing work if the debugger thread isn't running (dexopt, zygote,
* or debugging not configured).
*
* Returns "false" if we did nothing, "true" if we did stuff (and, hence,
* need to call "redo" at some point).
*/
static bool undoBreakpoints(Method* meth)
{
#ifdef WITH_DEBUGGER
if (gDvm.optimizing || gDvm.zygote || !gDvm.jdwpConfigured)
return false;
dvmUndoBreakpoints(meth);
return true;
#else
return false;
#endif
}
/*
* Restore any breakpoints we undid previously. Also has to update the
* stored "original opcode" value for any instruction that we replaced
* with a throw-verification-error op.
*/
static void redoBreakpoints(Method* meth)
{
#ifdef WITH_DEBUGGER
if (gDvm.optimizing || gDvm.zygote || !gDvm.jdwpConfigured) {
/* should not be here */
assert(false);
return;
}
dvmRedoBreakpoints(meth);
#else
assert(false);
#endif
}
/*
* Perform verification on a single method.
*
* We do this in three passes:
* (1) Walk through all code units, determining instruction lengths.
* (2) Do static checks, including branch target and operand validation.
* (3) Do structural checks, including data-flow analysis.
*
* Some checks may be bypassed depending on the verification mode. We can't
* turn this stuff off completely if we want to do "exact" GC.
*
* - operands of getfield, putfield, getstatic, putstatic must be valid
* - operands of method invocation instructions must be valid
*
* - code array must not be empty
* - (N/A) code_length must be less than 65536
* - opcode of first instruction begins at index 0
* - only documented instructions may appear
* - each instruction follows the last
* - (below) last byte of last instruction is at (code_length-1)
*/
static bool verifyMethod(Method* meth, int verifyFlags)
{
bool result = false;
UninitInstanceMap* uninitMap = NULL;
InsnFlags* insnFlags = NULL;
int i, newInstanceCount;
bool undidBreakpoints;
undidBreakpoints = undoBreakpoints(meth);
/*
* If there aren't any instructions, make sure that's expected, then
* exit successfully. Note: meth->insns gets set to a native function
* pointer on first call.
*/
if (dvmGetMethodInsnsSize(meth) == 0) {
if (!dvmIsNativeMethod(meth) && !dvmIsAbstractMethod(meth)) {
LOG_VFY_METH(meth,
"VFY: zero-length code in concrete non-native method\n");
goto bail;
}
goto success;
}
/*
* Sanity-check the register counts. ins + locals = registers, so make
* sure that ins <= registers.
*/
if (meth->insSize > meth->registersSize) {
LOG_VFY_METH(meth, "VFY: bad register counts (ins=%d regs=%d)\n",
meth->insSize, meth->registersSize);
goto bail;
}
/*
* Allocate and populate an array to hold instruction data.
*
* TODO: Consider keeping a reusable pre-allocated array sitting
* around for smaller methods.
*/
insnFlags = (InsnFlags*)
calloc(dvmGetMethodInsnsSize(meth), sizeof(InsnFlags));
if (insnFlags == NULL)
goto bail;
/*
* Compute the width of each instruction and store the result in insnFlags.
* Count up the #of occurrences of new-instance instructions while we're
* at it.
*/
if (!dvmComputeCodeWidths(meth, insnFlags, &newInstanceCount))
goto bail;
/*
* Allocate a map to hold the classes of uninitialized instances.
*/
uninitMap = dvmCreateUninitInstanceMap(meth, insnFlags, newInstanceCount);
if (uninitMap == NULL)
goto bail;
/*
* Set the "in try" flags for all instructions guarded by a "try" block.
*/
if (!dvmSetTryFlags(meth, insnFlags))
goto bail;
/*
* Perform static instruction verification.
*/
if (!verifyInstructions(meth, insnFlags, verifyFlags))
goto bail;
/*
* Do code-flow analysis. Do this after verifying the branch targets
* so we don't need to worry about it here.
*
* If there are no registers, we don't need to do much in the way of
* analysis, but we still need to verify that nothing actually tries
* to use a register.
*/
if (!dvmVerifyCodeFlow(meth, insnFlags, uninitMap)) {
//LOGD("+++ %s failed code flow\n", meth->name);
goto bail;
}
success:
result = true;
bail:
if (undidBreakpoints)
redoBreakpoints(meth);
dvmFreeUninitInstanceMap(uninitMap);
free(insnFlags);
return result;
}
/*
* Verify an array data table. "curOffset" is the offset of the fill-array-data
* instruction.
*/
static bool checkArrayData(const Method* meth, int curOffset)
{
const int insnCount = dvmGetMethodInsnsSize(meth);
const u2* insns = meth->insns + curOffset;
const u2* arrayData;
int valueCount, valueWidth, tableSize;
int offsetToArrayData;
assert(curOffset >= 0 && curOffset < insnCount);
/* make sure the start of the array data table is in range */
offsetToArrayData = insns[1] | (((s4)insns[2]) << 16);
if (curOffset + offsetToArrayData < 0 ||
curOffset + offsetToArrayData + 2 >= insnCount)
{
LOG_VFY_METH(meth,
"VFY: invalid array data start: at %d, data offset %d, count %d\n",
curOffset, offsetToArrayData, insnCount);
return false;
}
/* offset to array data table is a relative branch-style offset */
arrayData = insns + offsetToArrayData;
/* make sure the table is 32-bit aligned */
if ((((u4) arrayData) & 0x03) != 0) {
LOG_VFY_METH(meth,
"VFY: unaligned array data table: at %d, data offset %d\n",
curOffset, offsetToArrayData);
return false;
}
valueWidth = arrayData[1];
valueCount = *(u4*)(&arrayData[2]);
tableSize = 4 + (valueWidth * valueCount + 1) / 2;
/* make sure the end of the switch is in range */
if (curOffset + offsetToArrayData + tableSize > insnCount) {
LOG_VFY_METH(meth,
"VFY: invalid array data end: at %d, data offset %d, end %d, "
"count %d\n",
curOffset, offsetToArrayData,
curOffset + offsetToArrayData + tableSize,
insnCount);
return false;
}
return true;
}
/*
* Decode the current instruction.
*/
static void decodeInstruction(const Method* meth, int insnIdx,
DecodedInstruction* pDecInsn)
{
dexDecodeInstruction(gDvm.instrFormat, meth->insns + insnIdx, pDecInsn);
}
/*
* Perform static checks on a "new-instance" instruction. Specifically,
* make sure the class reference isn't for an array class.
*
* We don't need the actual class, just a pointer to the class name.
*/
static bool checkNewInstance(const Method* meth, int insnIdx)
{
DvmDex* pDvmDex = meth->clazz->pDvmDex;
DecodedInstruction decInsn;
const char* classDescriptor;
u4 idx;
decodeInstruction(meth, insnIdx, &decInsn);
idx = decInsn.vB; // 2nd item
if (idx >= pDvmDex->pHeader->typeIdsSize) {
LOG_VFY_METH(meth, "VFY: bad type index %d (max %d)\n",
idx, pDvmDex->pHeader->typeIdsSize);
return false;
}
classDescriptor = dexStringByTypeIdx(pDvmDex->pDexFile, idx);
if (classDescriptor[0] != 'L') {
LOG_VFY_METH(meth, "VFY: can't call new-instance on type '%s'\n",
classDescriptor);
return false;
}
return true;
}
/*
* Perform static checks on a "new-array" instruction. Specifically, make
* sure they aren't creating an array of arrays that causes the number of
* dimensions to exceed 255.
*/
static bool checkNewArray(const Method* meth, int insnIdx)
{
DvmDex* pDvmDex = meth->clazz->pDvmDex;
DecodedInstruction decInsn;
const char* classDescriptor;
u4 idx;
decodeInstruction(meth, insnIdx, &decInsn);
idx = decInsn.vC; // 3rd item
if (idx >= pDvmDex->pHeader->typeIdsSize) {
LOG_VFY_METH(meth, "VFY: bad type index %d (max %d)\n",
idx, pDvmDex->pHeader->typeIdsSize);
return false;
}
classDescriptor = dexStringByTypeIdx(pDvmDex->pDexFile, idx);
int bracketCount = 0;
const char* cp = classDescriptor;
while (*cp++ == '[')
bracketCount++;
if (bracketCount == 0) {
/* The given class must be an array type. */
LOG_VFY_METH(meth, "VFY: can't new-array class '%s' (not an array)\n",
classDescriptor);
return false;
} else if (bracketCount > 255) {
/* It is illegal to create an array of more than 255 dimensions. */
LOG_VFY_METH(meth, "VFY: can't new-array class '%s' (exceeds limit)\n",
classDescriptor);
return false;
}
return true;
}
/*
* Perform static checks on an instruction that takes a class constant.
* Ensure that the class index is in the valid range.
*/
static bool checkTypeIndex(const Method* meth, int insnIdx, bool useB)
{
DvmDex* pDvmDex = meth->clazz->pDvmDex;
DecodedInstruction decInsn;
u4 idx;
decodeInstruction(meth, insnIdx, &decInsn);
if (useB)
idx = decInsn.vB;
else
idx = decInsn.vC;
if (idx >= pDvmDex->pHeader->typeIdsSize) {
LOG_VFY_METH(meth, "VFY: bad type index %d (max %d)\n",
idx, pDvmDex->pHeader->typeIdsSize);
return false;
}
return true;
}
/*
* Perform static checks on a field get or set instruction. All we do
* here is ensure that the field index is in the valid range.
*/
static bool checkFieldIndex(const Method* meth, int insnIdx, bool useB)
{
DvmDex* pDvmDex = meth->clazz->pDvmDex;
DecodedInstruction decInsn;
u4 idx;
decodeInstruction(meth, insnIdx, &decInsn);
if (useB)
idx = decInsn.vB;
else
idx = decInsn.vC;
if (idx >= pDvmDex->pHeader->fieldIdsSize) {
LOG_VFY_METH(meth,
"VFY: bad field index %d (max %d) at offset 0x%04x\n",
idx, pDvmDex->pHeader->fieldIdsSize, insnIdx);
return false;
}
return true;
}
/*
* Perform static checks on a method invocation instruction. All we do
* here is ensure that the method index is in the valid range.
*/
static bool checkMethodIndex(const Method* meth, int insnIdx)
{
DvmDex* pDvmDex = meth->clazz->pDvmDex;
DecodedInstruction decInsn;
decodeInstruction(meth, insnIdx, &decInsn);
if (decInsn.vB >= pDvmDex->pHeader->methodIdsSize) {
LOG_VFY_METH(meth, "VFY: bad method index %d (max %d)\n",
decInsn.vB, pDvmDex->pHeader->methodIdsSize);
return false;
}
return true;
}
/*
* Perform static checks on a string constant instruction. All we do
* here is ensure that the string index is in the valid range.
*/
static bool checkStringIndex(const Method* meth, int insnIdx)
{
DvmDex* pDvmDex = meth->clazz->pDvmDex;
DecodedInstruction decInsn;
decodeInstruction(meth, insnIdx, &decInsn);
if (decInsn.vB >= pDvmDex->pHeader->stringIdsSize) {
LOG_VFY_METH(meth, "VFY: bad string index %d (max %d)\n",
decInsn.vB, pDvmDex->pHeader->stringIdsSize);
return false;
}
return true;
}
/*
* Perform static verification on instructions.
*
* As a side effect, this sets the "branch target" flags in InsnFlags.
*
* "(CF)" items are handled during code-flow analysis.
*
* v3 4.10.1
* - target of each jump and branch instruction must be valid
* - targets of switch statements must be valid
* - (CF) operands referencing constant pool entries must be valid
* - (CF) operands of getfield, putfield, getstatic, putstatic must be valid
* - (new) verify operands of "quick" field ops
* - (CF) operands of method invocation instructions must be valid
* - (new) verify operands of "quick" method invoke ops
* - (CF) only invoke-direct can call a method starting with '<'
* - (CF) <clinit> must never be called explicitly
* - (CF) operands of instanceof, checkcast, new (and variants) must be valid
* - new-array[-type] limited to 255 dimensions
* - can't use "new" on an array class
* - (?) limit dimensions in multi-array creation
* - (CF) local variable load/store register values must be in valid range
*
* v3 4.11.1.2
* - branches must be within the bounds of the code array
* - targets of all control-flow instructions are the start of an instruction
* - (CF) register accesses fall within range of allocated registers
* - (N/A) access to constant pool must be of appropriate type
* - (CF) code does not end in the middle of an instruction
* - (CF) execution cannot fall off the end of the code
* - (earlier) for each exception handler, the "try" area must begin and
* end at the start of an instruction (end can be at the end of the code)
* - (earlier) for each exception handler, the handler must start at a valid
* instruction
*
* TODO: move some of the "CF" items in here for better performance (the
* code-flow analysis sometimes has to process the same instruction several
* times).
*/
static bool verifyInstructions(const Method* meth, InsnFlags* insnFlags,
int verifyFlags)
{
const int insnCount = dvmGetMethodInsnsSize(meth);
const u2* insns = meth->insns;
int i;
/* the start of the method is a "branch target" */
dvmInsnSetBranchTarget(insnFlags, 0, true);
for (i = 0; i < insnCount; /**/) {
/*
* These types of instructions can be GC points. To support precise
* GC, all such instructions must export the PC in the interpreter,
* or the GC won't be able to identify the current PC for the thread.
*/
static const int gcMask = kInstrCanBranch | kInstrCanSwitch |
kInstrCanThrow | kInstrCanReturn;
int width = dvmInsnGetWidth(insnFlags, i);
OpCode opcode = *insns & 0xff;
InstructionFlags opFlags = dexGetInstrFlags(gDvm.instrFlags, opcode);
int offset, absOffset;
if ((opFlags & gcMask) != 0) {
/*
* This instruction is probably a GC point. Branch instructions
* only qualify if they go backward, so we need to check the
* offset.
*/
int offset = -1;
bool unused;
if (dvmGetBranchTarget(meth, insnFlags, i, &offset, &unused)) {
if (offset < 0) {
dvmInsnSetGcPoint(insnFlags, i, true);
}
} else {
/* not a branch target */
dvmInsnSetGcPoint(insnFlags, i, true);
}
}
switch (opcode) {
case OP_NOP:
/* plain no-op or switch table data; nothing to do here */
break;
case OP_CONST_STRING:
case OP_CONST_STRING_JUMBO:
if (!checkStringIndex(meth, i))
return false;
break;
case OP_CONST_CLASS:
case OP_CHECK_CAST:
if (!checkTypeIndex(meth, i, true))
return false;
break;
case OP_INSTANCE_OF:
if (!checkTypeIndex(meth, i, false))
return false;
break;
case OP_PACKED_SWITCH:
case OP_SPARSE_SWITCH:
/* verify the associated table */
if (!dvmCheckSwitchTargets(meth, insnFlags, i))
return false;
break;
case OP_FILL_ARRAY_DATA:
/* verify the associated table */
if (!checkArrayData(meth, i))
return false;
break;
case OP_GOTO:
case OP_GOTO_16:
case OP_IF_EQ:
case OP_IF_NE:
case OP_IF_LT:
case OP_IF_GE:
case OP_IF_GT:
case OP_IF_LE:
case OP_IF_EQZ:
case OP_IF_NEZ:
case OP_IF_LTZ:
case OP_IF_GEZ:
case OP_IF_GTZ:
case OP_IF_LEZ:
/* check the destination */
if (!dvmCheckBranchTarget(meth, insnFlags, i, false))
return false;
break;
case OP_GOTO_32:
/* check the destination; self-branch is okay */
if (!dvmCheckBranchTarget(meth, insnFlags, i, true))
return false;
break;
case OP_NEW_INSTANCE:
if (!checkNewInstance(meth, i))
return false;
break;
case OP_NEW_ARRAY:
if (!checkNewArray(meth, i))
return false;
break;
case OP_FILLED_NEW_ARRAY:
if (!checkTypeIndex(meth, i, true))
return false;
break;
case OP_FILLED_NEW_ARRAY_RANGE:
if (!checkTypeIndex(meth, i, true))
return false;
break;
case OP_IGET:
case OP_IGET_WIDE:
case OP_IGET_OBJECT:
case OP_IGET_BOOLEAN:
case OP_IGET_BYTE:
case OP_IGET_CHAR:
case OP_IGET_SHORT:
case OP_IPUT:
case OP_IPUT_WIDE:
case OP_IPUT_OBJECT:
case OP_IPUT_BOOLEAN:
case OP_IPUT_BYTE:
case OP_IPUT_CHAR:
case OP_IPUT_SHORT:
/* check the field index */
if (!checkFieldIndex(meth, i, false))
return false;
break;
case OP_SGET:
case OP_SGET_WIDE:
case OP_SGET_OBJECT:
case OP_SGET_BOOLEAN:
case OP_SGET_BYTE:
case OP_SGET_CHAR:
case OP_SGET_SHORT:
case OP_SPUT:
case OP_SPUT_WIDE:
case OP_SPUT_OBJECT:
case OP_SPUT_BOOLEAN:
case OP_SPUT_BYTE:
case OP_SPUT_CHAR:
case OP_SPUT_SHORT:
/* check the field index */
if (!checkFieldIndex(meth, i, true))
return false;
break;
case OP_INVOKE_VIRTUAL:
case OP_INVOKE_SUPER:
case OP_INVOKE_DIRECT:
case OP_INVOKE_STATIC:
case OP_INVOKE_INTERFACE:
case OP_INVOKE_VIRTUAL_RANGE:
case OP_INVOKE_SUPER_RANGE:
case OP_INVOKE_DIRECT_RANGE:
case OP_INVOKE_STATIC_RANGE:
case OP_INVOKE_INTERFACE_RANGE:
/* check the method index */
if (!checkMethodIndex(meth, i))
return false;
break;
case OP_EXECUTE_INLINE:
case OP_INVOKE_DIRECT_EMPTY:
case OP_IGET_QUICK:
case OP_IGET_WIDE_QUICK:
case OP_IGET_OBJECT_QUICK:
case OP_IPUT_QUICK:
case OP_IPUT_WIDE_QUICK:
case OP_IPUT_OBJECT_QUICK:
case OP_INVOKE_VIRTUAL_QUICK:
case OP_INVOKE_VIRTUAL_QUICK_RANGE:
case OP_INVOKE_SUPER_QUICK:
case OP_INVOKE_SUPER_QUICK_RANGE:
if ((verifyFlags & VERIFY_ALLOW_OPT_INSTRS) == 0) {
LOG_VFY("VFY: not expecting optimized instructions\n");
return false;
}
break;
default:
/* nothing to do */
break;
}
assert(width > 0);
i += width;
insns += width;
}
/* make sure the last instruction ends at the end of the insn area */
if (i != insnCount) {
LOG_VFY_METH(meth,
"VFY: code did not end when expected (end at %d, count %d)\n",
i, insnCount);
return false;
}
return true;
}