blob: 0d86eb5dfcf8ea8497bae5c537b8df83491ca0af [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
#ifndef ART_SRC_DEX_VERIFY_H_
#define ART_SRC_DEX_VERIFY_H_
#include "casts.h"
#include "dex_file.h"
#include "dex_instruction.h"
#include "macros.h"
#include "object.h"
#include "stl_util.h"
#include "UniquePtr.h"
#include <map>
#include <stack>
#include <vector>
namespace art {
namespace verifier {
class DexVerifier;
class PcToReferenceMap;
class RegTypeCache;
/*
* Set this to enable dead code scanning. This is not required, but it's very useful when testing
* changes to the verifier (to make sure we're not skipping over stuff). The only reason not to do
* it is that it slightly increases the time required to perform verification.
*/
#ifndef NDEBUG
# define DEAD_CODE_SCAN true
#else
# define DEAD_CODE_SCAN false
#endif
/*
* RegType holds information about the type of data held in a register. For most types it's a simple
* enum. For reference types it holds a pointer to the Class*, and for uninitialized references
* it holds an index into the UninitInstanceMap.
*/
class RegType {
public:
/*
* Enumeration for register type values. The "hi" piece of a 64-bit value MUST immediately follow
* the "lo" piece in the enumeration, so we can check that hi==lo+1.
*
* Assignment of constants:
* [-MAXINT,-32768) : integer
* [-32768,-128) : short
* [-128,0) : byte
* 0 : zero
* 1 : one
* [2,128) : posbyte
* [128,32768) : posshort
* [32768,65536) : char
* [65536,MAXINT] : integer
*
* Allowed "implicit" widening conversions:
* zero -> boolean, posbyte, byte, posshort, short, char, integer, ref (null)
* one -> boolean, posbyte, byte, posshort, short, char, integer
* boolean -> posbyte, byte, posshort, short, char, integer
* posbyte -> posshort, short, integer, char
* byte -> short, integer
* posshort -> integer, char
* short -> integer
* char -> integer
*
* In addition, all of the above can convert to "float".
*
* We're more careful with integer values than the spec requires. The motivation is to restrict
* byte/char/short to the correct range of values. For example, if a method takes a byte argument,
* we don't want to allow the code to load the constant "1024" and pass it in.
*/
enum Type {
kRegTypeUnknown = 0, /* initial state */
kRegTypeConflict, /* merge clash makes this reg's type unknowable */
/*
* Category-1nr types. The order of these is chiseled into a couple of tables, so don't add,
* remove, or reorder if you can avoid it.
*/
kRegTypeZero, /* 0 - 32-bit 0, could be Boolean, Int, Float, or Ref */
kRegType1nrSTART = kRegTypeZero,
kRegTypeIntegralSTART = kRegTypeZero,
kRegTypeOne, /* 1 - 32-bit 1, could be Boolean, Int, Float */
kRegTypeBoolean, /* Z - must be 0 or 1 */
kRegTypeConstPosByte, /* y - const derived byte, known positive */
kRegTypeConstByte, /* Y - const derived byte */
kRegTypeConstPosShort, /* h - const derived short, known positive */
kRegTypeConstShort, /* H - const derived short */
kRegTypeConstChar, /* c - const derived char */
kRegTypeConstInteger, /* i - const derived integer */
kRegTypePosByte, /* b - byte, known positive (can become char) */
kRegTypeByte, /* B */
kRegTypePosShort, /* s - short, known positive (can become char) */
kRegTypeShort, /* S */
kRegTypeChar, /* C */
kRegTypeInteger, /* I */
kRegTypeIntegralEND = kRegTypeInteger,
kRegTypeFloat, /* F */
kRegType1nrEND = kRegTypeFloat,
kRegTypeConstLo, /* const derived wide, lower half - could be long or double */
kRegTypeConstHi, /* const derived wide, upper half - could be long or double */
kRegTypeLongLo, /* lower-numbered register; endian-independent */
kRegTypeLongHi,
kRegTypeDoubleLo,
kRegTypeDoubleHi,
kRegTypeReference, // Reference type
kRegTypeMAX = kRegTypeReference + 1,
};
bool IsUninitializedThisReference() const {
return allocation_pc_ == kUninitThisArgAddr;
}
Type GetType() const {
return type_;
}
std::string Dump() const;
Class* GetClass() const {
DCHECK(klass_ != NULL);
return klass_;
}
bool IsInitialized() const { return allocation_pc_ == kInitArgAddr; }
bool IsUninitializedReference() const { return allocation_pc_ != kInitArgAddr; }
bool IsUnknown() const { return type_ == kRegTypeUnknown; }
bool IsConflict() const { return type_ == kRegTypeConflict; }
bool IsZero() const { return type_ == kRegTypeZero; }
bool IsOne() const { return type_ == kRegTypeOne; }
bool IsConstLo() const { return type_ == kRegTypeConstLo; }
bool IsBoolean() const { return type_ == kRegTypeBoolean; }
bool IsByte() const { return type_ == kRegTypeByte; }
bool IsChar() const { return type_ == kRegTypeChar; }
bool IsShort() const { return type_ == kRegTypeShort; }
bool IsInteger() const { return type_ == kRegTypeInteger; }
bool IsLong() const { return type_ == kRegTypeLongLo; }
bool IsFloat() const { return type_ == kRegTypeFloat; }
bool IsDouble() const { return type_ == kRegTypeDoubleLo; }
bool IsReference() const { return type_ == kRegTypeReference; }
bool IsLowHalf() const { return type_ == kRegTypeLongLo ||
type_ == kRegTypeDoubleLo ||
type_ == kRegTypeConstLo; }
bool IsHighHalf() const { return type_ == kRegTypeLongHi ||
type_ == kRegTypeDoubleHi ||
type_ == kRegTypeConstHi; }
const RegType& HighHalf(RegTypeCache* cache) const;
bool CheckWidePair(const RegType& type_h) const {
return IsLowHalf() && (type_h.type_ == type_ + 1);
}
uint16_t GetId() const {
return cache_id_;
}
bool IsLongOrDoubleTypes() const { return IsLowHalf(); }
bool IsReferenceTypes() const {
return type_ == kRegTypeReference || type_ == kRegTypeZero;
}
bool IsCategory1Types() const {
return type_ >= kRegType1nrSTART && type_ <= kRegType1nrEND;
}
bool IsCategory2Types() const {
return IsLowHalf(); // Don't expect explicit testing of high halves
}
bool IsBooleanTypes() const { return IsBoolean() || IsZero() || IsOne(); }
bool IsByteTypes() const {
return IsByte() || IsBooleanTypes() || type_ == kRegTypeConstPosByte ||
type_ == kRegTypeConstByte || type_ == kRegTypePosByte;
}
bool IsShortTypes() const {
return IsShort() || IsByteTypes() || type_ == kRegTypeConstPosShort ||
type_ == kRegTypeConstShort || type_ == kRegTypePosShort;
}
bool IsCharTypes() const {
return IsChar() || IsBooleanTypes() || type_ == kRegTypeConstPosByte ||
type_ == kRegTypePosByte || type_ == kRegTypeConstPosShort || type_ == kRegTypePosShort ||
type_ == kRegTypeConstChar;
}
bool IsIntegralTypes() const {
return type_ >= kRegTypeIntegralSTART && type_ <= kRegTypeIntegralEND;
}
bool IsArrayIndexTypes() const {
return IsIntegralTypes();
}
// Float type may be derived from any constant type
bool IsFloatTypes() const {
return IsFloat() || IsZero() || IsOne() ||
type_ == kRegTypeConstPosByte || type_ == kRegTypeConstByte ||
type_ == kRegTypeConstPosShort || type_ == kRegTypeConstShort ||
type_ == kRegTypeConstChar || type_ == kRegTypeConstInteger;
}
bool IsLongTypes() const {
return IsLong() || type_ == kRegTypeConstLo;
}
bool IsDoubleTypes() const {
return IsDouble() || type_ == kRegTypeConstLo;
}
const RegType& VerifyAgainst(const RegType& check_type, RegTypeCache* reg_types) const;
const RegType& Merge(const RegType& incoming_type, RegTypeCache* reg_types) const;
bool Equals(const RegType& other) const {
return type_ == other.type_ && klass_ == other.klass_ && allocation_pc_ == other.allocation_pc_;
}
/*
* A basic Join operation on classes. For a pair of types S and T the Join, written S v T = J, is
* S <: J, T <: J and for-all U such that S <: U, T <: U then J <: U. That is J is the parent of
* S and T such that there isn't a parent of both S and T that isn't also the parent of J (ie J
* is the deepest (lowest upper bound) parent of S and T).
*
* This operation applies for regular classes and arrays, however, for interface types there needn't
* be a partial ordering on the types. We could solve the problem of a lack of a partial order by
* introducing sets of types, however, the only operation permissible on an interface is
* invoke-interface. In the tradition of Java verifiers [1] we defer the verification of interface
* types until an invoke-interface call on the interface typed reference at runtime and allow
* the perversion of Object being assignable to an interface type (note, however, that we don't
* allow assignment of Object or Interface to any concrete class and are therefore type safe).
*
* [1] Java bytecode verifcation: algorithms and formalizations, Xavier Leroy
*/
static Class* ClassJoin(Class* s, Class* t);
private:
friend class RegTypeCache;
// Address given to an allocation_pc for an initialized object.
static const uint32_t kInitArgAddr = -2;
// Address given to an uninitialized allocation_pc if an object is uninitialized through being
// a constructor.
static const uint32_t kUninitThisArgAddr = -1;
RegType(Type type, Class* klass, uint32_t allocation_pc, uint16_t cache_id) :
type_(type), klass_(klass), allocation_pc_(allocation_pc), cache_id_(cache_id) {
DCHECK(type >= kRegTypeReference || allocation_pc_ == kInitArgAddr);
if (type >= kRegTypeReference) DCHECK(klass != NULL);
}
const Type type_; // The current type of the register
// If known the type of the register
Class* klass_;
// Address an uninitialized reference was created
const uint32_t allocation_pc_;
// A RegType cache densely encodes types, this is the location in the cache for this type
const uint16_t cache_id_;
/*
* Merge result table for primitive values. The table is symmetric along the diagonal.
*
* Note that 32-bit int/float do not merge into 64-bit long/double. This is a register merge, not
* a widening conversion. Only the "implicit" widening within a category, e.g. byte to short, is
* allowed.
*
* Dalvik does not draw a distinction between int and float, but we enforce that once a value is
* used as int, it can't be used as float, and vice-versa. We do not allow free exchange between
* 32-bit int/float and 64-bit long/double.
*
* Note that Uninit + Uninit = Uninit. This holds true because we only use this when the RegType
* value is exactly equal to kRegTypeUninit, which can only happen for the zeroth entry in the
* table.
*
* "Unknown" never merges with anything known. The only time a register transitions from "unknown"
* to "known" is when we're executing code for the first time, and we handle that with a simple
* copy.
*/
static const RegType::Type merge_table_[kRegTypeReference][kRegTypeReference];
DISALLOW_COPY_AND_ASSIGN(RegType);
};
std::ostream& operator<<(std::ostream& os, const RegType& rhs);
class RegTypeCache {
public:
explicit RegTypeCache() : entries_(RegType::kRegTypeReference) {
Unknown(); // ensure Unknown is initialized
}
~RegTypeCache() {
STLDeleteElements(&entries_);
}
const RegType& GetFromId(uint16_t id) {
DCHECK_LT(id, entries_.size());
RegType* result = entries_[id];
DCHECK(result != NULL);
return *result;
}
const RegType& From(RegType::Type type, const ClassLoader* loader, const std::string& descriptor);
const RegType& FromClass(Class* klass);
const RegType& FromCat1Const(int32_t value);
const RegType& FromDescriptor(const ClassLoader* loader, const std::string& descriptor);
const RegType& FromType(RegType::Type);
const RegType& Boolean() { return FromType(RegType::kRegTypeBoolean); }
const RegType& Byte() { return FromType(RegType::kRegTypeByte); }
const RegType& Char() { return FromType(RegType::kRegTypeChar); }
const RegType& Short() { return FromType(RegType::kRegTypeShort); }
const RegType& Integer() { return FromType(RegType::kRegTypeInteger); }
const RegType& Float() { return FromType(RegType::kRegTypeFloat); }
const RegType& Long() { return FromType(RegType::kRegTypeLongLo); }
const RegType& Double() { return FromType(RegType::kRegTypeDoubleLo); }
const RegType& JavaLangClass() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Class;"); }
const RegType& JavaLangObject() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Object;"); }
const RegType& JavaLangString() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/String;"); }
const RegType& Unknown() { return FromType(RegType::kRegTypeUnknown); }
const RegType& Conflict() { return FromType(RegType::kRegTypeConflict); }
const RegType& Zero() { return FromType(RegType::kRegTypeZero); }
const RegType& ConstLo() { return FromType(RegType::kRegTypeConstLo); }
const RegType& Uninitialized(Class* klass, uint32_t allocation_pc);
const RegType& UninitializedThisArgument(Class* klass);
private:
// The allocated entries
std::vector<RegType*> entries_;
DISALLOW_COPY_AND_ASSIGN(RegTypeCache);
};
class InsnFlags {
public:
InsnFlags() : length_(0), flags_(0) {}
void SetLengthInCodeUnits(size_t length) {
CHECK_LT(length, 65536u);
length_ = length;
}
size_t GetLengthInCodeUnits() {
return length_;
}
bool IsOpcode() const {
return length_ != 0;
}
void SetInTry() {
flags_ |= 1 << kInsnFlagInTry;
}
void ClearInTry() {
flags_ &= ~(1 << kInsnFlagInTry);
}
bool IsInTry() const {
return (flags_ & (1 << kInsnFlagInTry)) != 0;
}
void SetBranchTarget() {
flags_ |= 1 << kInsnFlagBranchTarget;
}
void ClearBranchTarget() {
flags_ &= ~(1 << kInsnFlagBranchTarget);
}
bool IsBranchTarget() const {
return (flags_ & (1 << kInsnFlagBranchTarget)) != 0;
}
void SetGcPoint() {
flags_ |= 1 << kInsnFlagGcPoint;
}
void ClearGcPoint() {
flags_ &= ~(1 << kInsnFlagGcPoint);
}
bool IsGcPoint() const {
return (flags_ & (1 << kInsnFlagGcPoint)) != 0;
}
void SetVisited() {
flags_ |= 1 << kInsnFlagVisited;
}
void ClearVisited() {
flags_ &= ~(1 << kInsnFlagVisited);
}
bool IsVisited() const {
return (flags_ & (1 << kInsnFlagVisited)) != 0;
}
void SetChanged() {
flags_ |= 1 << kInsnFlagChanged;
}
void ClearChanged() {
flags_ &= ~(1 << kInsnFlagChanged);
}
bool IsChanged() const {
return (flags_ & (1 << kInsnFlagChanged)) != 0;
}
bool IsVisitedOrChanged() const {
return IsVisited() || IsChanged();
}
std::string Dump() {
char encoding[6];
if (!IsOpcode()) {
strncpy(encoding, "XXXXX", sizeof(encoding));
} else {
strncpy(encoding, "-----", sizeof(encoding));
if (IsInTry()) encoding[kInsnFlagInTry] = 'T';
if (IsBranchTarget()) encoding[kInsnFlagBranchTarget] = 'B';
if (IsGcPoint()) encoding[kInsnFlagGcPoint] = 'G';
if (IsVisited()) encoding[kInsnFlagVisited] = 'V';
if (IsChanged()) encoding[kInsnFlagChanged] = 'C';
}
return std::string(encoding);
}
private:
enum InsnFlag {
kInsnFlagInTry,
kInsnFlagBranchTarget,
kInsnFlagGcPoint,
kInsnFlagVisited,
kInsnFlagChanged,
};
// Size of instruction in code units
uint16_t length_;
uint8_t flags_;
};
/*
* "Direct" and "virtual" methods are stored independently. The type of call used to invoke the
* method determines which list we search, and whether we travel up into superclasses.
*
* (<clinit>, <init>, and methods declared "private" or "static" are stored in the "direct" list.
* All others are stored in the "virtual" list.)
*/
enum MethodType {
METHOD_UNKNOWN = 0,
METHOD_DIRECT, // <init>, private
METHOD_STATIC, // static
METHOD_VIRTUAL, // virtual, super
METHOD_INTERFACE // interface
};
const int kRegTypeUninitMask = 0xff;
const int kRegTypeUninitShift = 8;
/*
* Register type categories, for type checking.
*
* The spec says category 1 includes boolean, byte, char, short, int, float, reference, and
* returnAddress. Category 2 includes long and double.
*
* We treat object references separately, so we have "category1nr". We don't support jsr/ret, so
* there is no "returnAddress" type.
*/
enum TypeCategory {
kTypeCategoryUnknown = 0,
kTypeCategory1nr = 1, // boolean, byte, char, short, int, float
kTypeCategory2 = 2, // long, double
kTypeCategoryRef = 3, // object reference
};
/*
* An enumeration of problems that can turn up during verification.
* VERIFY_ERROR_GENERIC denotes a failure that causes the entire class to be rejected. Other errors
* denote verification errors that cause bytecode to be rewritten to fail at runtime.
*/
enum VerifyError {
VERIFY_ERROR_NONE = 0, /* no error; must be zero */
VERIFY_ERROR_GENERIC, /* VerifyError */
VERIFY_ERROR_NO_CLASS, /* NoClassDefFoundError */
VERIFY_ERROR_NO_FIELD, /* NoSuchFieldError */
VERIFY_ERROR_NO_METHOD, /* NoSuchMethodError */
VERIFY_ERROR_ACCESS_CLASS, /* IllegalAccessError */
VERIFY_ERROR_ACCESS_FIELD, /* IllegalAccessError */
VERIFY_ERROR_ACCESS_METHOD, /* IllegalAccessError */
VERIFY_ERROR_CLASS_CHANGE, /* IncompatibleClassChangeError */
VERIFY_ERROR_INSTANTIATION, /* InstantiationError */
};
std::ostream& operator<<(std::ostream& os, const VerifyError& rhs);
/*
* Identifies the type of reference in the instruction that generated the verify error
* (e.g. VERIFY_ERROR_ACCESS_CLASS could come from a method, field, or class reference).
*
* This must fit in two bits.
*/
enum VerifyErrorRefType {
VERIFY_ERROR_REF_CLASS = 0,
VERIFY_ERROR_REF_FIELD = 1,
VERIFY_ERROR_REF_METHOD = 2,
};
const int kVerifyErrorRefTypeShift = 6;
/*
* Format enumeration for RegisterMap data area.
*/
enum RegisterMapFormat {
kRegMapFormatUnknown = 0,
kRegMapFormatNone, /* indicates no map data follows */
kRegMapFormatCompact8, /* compact layout, 8-bit addresses */
kRegMapFormatCompact16, /* compact layout, 16-bit addresses */
};
// During verification, we associate one of these with every "interesting" instruction. We track
// the status of all registers, and (if the method has any monitor-enter instructions) maintain a
// stack of entered monitors (identified by code unit offset).
// If live-precise register maps are enabled, the "liveRegs" vector will be populated. Unlike the
// other lists of registers here, we do not track the liveness of the method result register
// (which is not visible to the GC).
class RegisterLine {
public:
RegisterLine(size_t num_regs, DexVerifier* verifier) :
line_(new uint16_t[num_regs]), verifier_(verifier), num_regs_(num_regs) {
memset(line_.get(), 0, num_regs_ * sizeof(uint16_t));
result_[0] = RegType::kRegTypeUnknown;
result_[1] = RegType::kRegTypeUnknown;
}
// Implement category-1 "move" instructions. Copy a 32-bit value from "vsrc" to "vdst".
void CopyRegister1(uint32_t vdst, uint32_t vsrc, TypeCategory cat);
// Implement category-2 "move" instructions. Copy a 64-bit value from "vsrc" to "vdst". This
// copies both halves of the register.
void CopyRegister2(uint32_t vdst, uint32_t vsrc);
// Implement "move-result". Copy the category-1 value from the result register to another
// register, and reset the result register.
void CopyResultRegister1(uint32_t vdst, bool is_reference);
// Implement "move-result-wide". Copy the category-2 value from the result register to another
// register, and reset the result register.
void CopyResultRegister2(uint32_t vdst);
// Set the invisible result register to unknown
void SetResultTypeToUnknown();
// Set the type of register N, verifying that the register is valid. If "newType" is the "Lo"
// part of a 64-bit value, register N+1 will be set to "newType+1".
// The register index was validated during the static pass, so we don't need to check it here.
void SetRegisterType(uint32_t vdst, const RegType& new_type);
/* Set the type of the "result" register. */
void SetResultRegisterType(const RegType& new_type);
// Get the type of register vsrc.
const RegType& GetRegisterType(uint32_t vsrc) const;
bool VerifyRegisterType(uint32_t vsrc, const RegType& check_type);
void CopyFromLine(const RegisterLine* src) {
DCHECK_EQ(num_regs_, src->num_regs_);
memcpy(line_.get(), src->line_.get(), num_regs_ * sizeof(uint16_t));
monitors_ = src->monitors_;
reg_to_lock_depths_ = src->reg_to_lock_depths_;
}
std::string Dump() const {
std::string result;
for (size_t i = 0; i < num_regs_; i++) {
result += GetRegisterType(i).Dump();
}
return result;
}
void FillWithGarbage() {
memset(line_.get(), 0xf1, num_regs_ * sizeof(uint16_t));
while (!monitors_.empty()) {
monitors_.pop();
}
reg_to_lock_depths_.clear();
}
/*
* We're creating a new instance of class C at address A. Any registers holding instances
* previously created at address A must be initialized by now. If not, we mark them as "conflict"
* to prevent them from being used (otherwise, MarkRefsAsInitialized would mark the old ones and
* the new ones at the same time).
*/
void MarkUninitRefsAsInvalid(const RegType& uninit_type);
/*
* Update all registers holding "uninit_type" to instead hold the corresponding initialized
* reference type. This is called when an appropriate constructor is invoked -- all copies of
* the reference must be marked as initialized.
*/
void MarkRefsAsInitialized(const RegType& uninit_type);
/*
* Check constraints on constructor return. Specifically, make sure that the "this" argument got
* initialized.
* The "this" argument to <init> uses code offset kUninitThisArgAddr, which puts it at the start
* of the list in slot 0. If we see a register with an uninitialized slot 0 reference, we know it
* somehow didn't get initialized.
*/
bool CheckConstructorReturn() const;
// Compare two register lines. Returns 0 if they match.
// Using this for a sort is unwise, since the value can change based on machine endianness.
int CompareLine(const RegisterLine* line2) const {
DCHECK(monitors_ == line2->monitors_);
// TODO: DCHECK(reg_to_lock_depths_ == line2->reg_to_lock_depths_);
return memcmp(line_.get(), line2->line_.get(), num_regs_ * sizeof(uint16_t));
}
size_t NumRegs() const {
return num_regs_;
}
/*
* Get the "this" pointer from a non-static method invocation. This returns the RegType so the
* caller can decide whether it needs the reference to be initialized or not. (Can also return
* kRegTypeZero if the reference can only be zero at this point.)
*
* The argument count is in vA, and the first argument is in vC, for both "simple" and "range"
* versions. We just need to make sure vA is >= 1 and then return vC.
*/
const RegType& GetInvocationThis(const Instruction::DecodedInstruction& dec_insn);
/*
* Get the value from a register, and cast it to a Class. Sets "*failure" if something fails.
* This fails if the register holds an uninitialized class.
* If the register holds kRegTypeZero, this returns a NULL pointer.
*/
Class* GetClassFromRegister(uint32_t vsrc) const;
/*
* Verify types for a simple two-register instruction (e.g. "neg-int").
* "dst_type" is stored into vA, and "src_type" is verified against vB.
*/
void CheckUnaryOp(const Instruction::DecodedInstruction& dec_insn,
const RegType& dst_type, const RegType& src_type);
/*
* Verify types for a simple three-register instruction (e.g. "add-int").
* "dst_type" is stored into vA, and "src_type1"/"src_type2" are verified
* against vB/vC.
*/
void CheckBinaryOp(const Instruction::DecodedInstruction& dec_insn,
const RegType& dst_type, const RegType& src_type1, const RegType& src_type2,
bool check_boolean_op);
/*
* Verify types for a binary "2addr" operation. "src_type1"/"src_type2"
* are verified against vA/vB, then "dst_type" is stored into vA.
*/
void CheckBinaryOp2addr(const Instruction::DecodedInstruction& dec_insn,
const RegType& dst_type,
const RegType& src_type1, const RegType& src_type2,
bool check_boolean_op);
/*
* Verify types for A two-register instruction with a literal constant (e.g. "add-int/lit8").
* "dst_type" is stored into vA, and "src_type" is verified against vB.
*
* If "check_boolean_op" is set, we use the constant value in vC.
*/
void CheckLiteralOp(const Instruction::DecodedInstruction& dec_insn,
const RegType& dst_type, const RegType& src_type, bool check_boolean_op);
// Verify/push monitor onto the monitor stack, locking the value in reg_idx at location insn_idx.
void PushMonitor(uint32_t reg_idx, int32_t insn_idx);
// Verify/pop monitor from monitor stack ensuring that we believe the monitor is locked
void PopMonitor(uint32_t reg_idx);
// Stack of currently held monitors and where they were locked
size_t MonitorStackDepth() const {
return monitors_.size();
}
// We expect no monitors to be held at certain points, such a method returns. Verify the stack
// is empty, failing and returning false if not.
bool VerifyMonitorStackEmpty();
bool MergeRegisters(const RegisterLine* incoming_line);
size_t GetMaxReferenceReg(size_t max_ref_reg) {
size_t i = static_cast<int>(max_ref_reg) < 0 ? 0 : max_ref_reg;
for(; i < num_regs_; i++) {
if (line_[i] >= RegType::kRegTypeReference) {
max_ref_reg = i;
}
}
return max_ref_reg;
}
// Write a bit at each register location that holds a reference
void WriteReferenceBitMap(int8_t* data, size_t max_bytes);
private:
void CopyRegToLockDepth(size_t dst, size_t src) {
if (reg_to_lock_depths_.count(src) > 0) {
uint32_t depths = reg_to_lock_depths_[src];
reg_to_lock_depths_[dst] = depths;
}
}
bool IsSetLockDepth(size_t reg, size_t depth) {
if (reg_to_lock_depths_.count(reg) > 0) {
uint32_t depths = reg_to_lock_depths_[reg];
return (depths & (1 << depth)) != 0;
} else {
return false;
}
}
void SetRegToLockDepth(size_t reg, size_t depth) {
CHECK_LT(depth, 32u);
DCHECK(!IsSetLockDepth(reg, depth));
uint32_t depths;
if (reg_to_lock_depths_.count(reg) > 0) {
depths = reg_to_lock_depths_[reg];
depths = depths | (1 << depth);
} else {
depths = 1 << depth;
}
reg_to_lock_depths_[reg] = depths;
}
void ClearRegToLockDepth(size_t reg, size_t depth) {
CHECK_LT(depth, 32u);
DCHECK(IsSetLockDepth(reg, depth));
uint32_t depths = reg_to_lock_depths_[reg];
depths = depths ^ (1 << depth);
if (depths != 0) {
reg_to_lock_depths_[reg] = depths;
} else {
reg_to_lock_depths_.erase(reg);
}
}
void ClearAllRegToLockDepths(size_t reg) {
reg_to_lock_depths_.erase(reg);
}
// Storage for the result register's type, valid after an invocation
uint16_t result_[2];
// An array of RegType Ids associated with each dex register
UniquePtr<uint16_t[]> line_;
// Back link to the verifier
DexVerifier* verifier_;
// Length of reg_types_
const size_t num_regs_;
// A stack of monitor enter locations
std::stack<uint32_t> monitors_;
// A map from register to a bit vector of indices into the monitors_ stack. As we pop the monitor
// stack we verify that monitor-enter/exit are correctly nested. That is, if there was a
// monitor-enter on v5 and then on v6, we expect the monitor-exit to be on v6 then on v5
std::map<uint32_t, uint32_t> reg_to_lock_depths_;
};
std::ostream& operator<<(std::ostream& os, const RegisterLine& rhs);
class PcToRegisterLineTable {
public:
// We don't need to store the register data for many instructions, because we either only need
// it at branch points (for verification) or GC points and branches (for verification +
// type-precise register analysis).
enum RegisterTrackingMode {
kTrackRegsBranches,
kTrackRegsGcPoints,
kTrackRegsAll,
};
PcToRegisterLineTable() {}
~PcToRegisterLineTable() {
STLDeleteValues(&pc_to_register_line_);
}
// Initialize the RegisterTable. Every instruction address can have a different set of information
// about what's in which register, but for verification purposes we only need to store it at
// branch target addresses (because we merge into that).
void Init(RegisterTrackingMode mode, InsnFlags* flags, uint32_t insns_size,
uint16_t registers_size, DexVerifier* verifier);
RegisterLine* GetLine(size_t idx) {
return pc_to_register_line_[idx];
}
private:
// Map from a dex pc to the register status associated with it
std::map<int32_t, RegisterLine*> pc_to_register_line_;
// Number of registers we track for each instruction. This is equal to the method's declared
// "registersSize" plus kExtraRegs (2).
size_t insn_reg_count_plus_;
};
// The verifier
class DexVerifier {
public:
/* Verify a class. Returns "true" on success. */
static bool VerifyClass(const Class* klass);
/*
* Perform verification on a single method.
*
* We do this in three passes:
* (1) Walk through all code units, determining instruction locations,
* widths, and other characteristics.
* (2) Walk through all code units, performing static checks on
* operands.
* (3) Iterate through the method, checking type safety and looking
* for code flow problems.
*
* Some checks may be bypassed depending on the verification mode. We can't
* turn this stuff off completely if we want to do "exact" GC.
*
* Confirmed here:
* - code array must not be empty
* Confirmed by ComputeWidthsAndCountOps():
* - opcode of first instruction begins at index 0
* - only documented instructions may appear
* - each instruction follows the last
* - last byte of last instruction is at (code_length-1)
*/
static bool VerifyMethod(Method* method);
uint8_t EncodePcToReferenceMapData() const;
uint32_t DexFileVersion() const {
return dex_file_->GetVersion();
}
RegTypeCache* GetRegTypeCache() {
return &reg_types_;
}
// Verification failed
std::ostream& Fail(VerifyError error) {
CHECK_EQ(failure_, VERIFY_ERROR_NONE);
failure_ = error;
return fail_messages_ << "VFY: " << PrettyMethod(method_)
<< '[' << (void*)work_insn_idx_ << "] : ";
}
// Log for verification information
std::ostream& LogVerifyInfo() {
return info_messages_ << "VFY: " << PrettyMethod(method_)
<< '[' << (void*)work_insn_idx_ << "] : ";
}
// Dump the state of the verifier, namely each instruction, what flags are set on it, register
// information
void Dump(std::ostream& os);
private:
explicit DexVerifier(Method* method);
bool Verify();
/*
* Compute the width of the instruction at each address in the instruction stream, and store it in
* insn_flags_. Addresses that are in the middle of an instruction, or that are part of switch
* table data, are not touched (so the caller should probably initialize "insn_flags" to zero).
*
* The "new_instance_count_" and "monitor_enter_count_" fields in vdata are also set.
*
* Performs some static checks, notably:
* - opcode of first instruction begins at index 0
* - only documented instructions may appear
* - each instruction follows the last
* - last byte of last instruction is at (code_length-1)
*
* Logs an error and returns "false" on failure.
*/
bool ComputeWidthsAndCountOps();
/*
* Set the "in try" flags for all instructions protected by "try" statements. Also sets the
* "branch target" flags for exception handlers.
*
* Call this after widths have been set in "insn_flags".
*
* Returns "false" if something in the exception table looks fishy, but we're expecting the
* exception table to be somewhat sane.
*/
bool ScanTryCatchBlocks();
/*
* Perform static verification on all instructions in a method.
*
* Walks through instructions in a method calling VerifyInstruction on each.
*/
bool VerifyInstructions();
/*
* Perform static verification on an instruction.
*
* As a side effect, this sets the "branch target" flags in InsnFlags.
*
* "(CF)" items are handled during code-flow analysis.
*
* v3 4.10.1
* - target of each jump and branch instruction must be valid
* - targets of switch statements must be valid
* - operands referencing constant pool entries must be valid
* - (CF) operands of getfield, putfield, getstatic, putstatic must be valid
* - (CF) operands of method invocation instructions must be valid
* - (CF) only invoke-direct can call a method starting with '<'
* - (CF) <clinit> must never be called explicitly
* - operands of instanceof, checkcast, new (and variants) must be valid
* - new-array[-type] limited to 255 dimensions
* - can't use "new" on an array class
* - (?) limit dimensions in multi-array creation
* - local variable load/store register values must be in valid range
*
* v3 4.11.1.2
* - branches must be within the bounds of the code array
* - targets of all control-flow instructions are the start of an instruction
* - register accesses fall within range of allocated registers
* - (N/A) access to constant pool must be of appropriate type
* - code does not end in the middle of an instruction
* - execution cannot fall off the end of the code
* - (earlier) for each exception handler, the "try" area must begin and
* end at the start of an instruction (end can be at the end of the code)
* - (earlier) for each exception handler, the handler must start at a valid
* instruction
*/
bool VerifyInstruction(const Instruction* inst, uint32_t code_offset);
/* Ensure that the register index is valid for this code item. */
bool CheckRegisterIndex(uint32_t idx);
/* Ensure that the wide register index is valid for this code item. */
bool CheckWideRegisterIndex(uint32_t idx);
// Perform static checks on a field get or set instruction. All we do here is ensure that the
// field index is in the valid range.
bool CheckFieldIndex(uint32_t idx);
// Perform static checks on a method invocation instruction. All we do here is ensure that the
// method index is in the valid range.
bool CheckMethodIndex(uint32_t idx);
// Perform static checks on a "new-instance" instruction. Specifically, make sure the class
// reference isn't for an array class.
bool CheckNewInstance(uint32_t idx);
/* Ensure that the string index is in the valid range. */
bool CheckStringIndex(uint32_t idx);
// Perform static checks on an instruction that takes a class constant. Ensure that the class
// index is in the valid range.
bool CheckTypeIndex(uint32_t idx);
// Perform static checks on a "new-array" instruction. Specifically, make sure they aren't
// creating an array of arrays that causes the number of dimensions to exceed 255.
bool CheckNewArray(uint32_t idx);
// Verify an array data table. "cur_offset" is the offset of the fill-array-data instruction.
bool CheckArrayData(uint32_t cur_offset);
// Verify that the target of a branch instruction is valid. We don't expect code to jump directly
// into an exception handler, but it's valid to do so as long as the target isn't a
// "move-exception" instruction. We verify that in a later stage.
// The dex format forbids certain instructions from branching to themselves.
// Updates "insnFlags", setting the "branch target" flag.
bool CheckBranchTarget(uint32_t cur_offset);
// Verify a switch table. "cur_offset" is the offset of the switch instruction.
// Updates "insnFlags", setting the "branch target" flag.
bool CheckSwitchTargets(uint32_t cur_offset);
// Check the register indices used in a "vararg" instruction, such as invoke-virtual or
// filled-new-array.
// - vA holds word count (0-5), args[] have values.
// There are some tests we don't do here, e.g. we don't try to verify that invoking a method that
// takes a double is done with consecutive registers. This requires parsing the target method
// signature, which we will be doing later on during the code flow analysis.
bool CheckVarArgRegs(uint32_t vA, uint32_t arg[]);
// Check the register indices used in a "vararg/range" instruction, such as invoke-virtual/range
// or filled-new-array/range.
// - vA holds word count, vC holds index of first reg.
bool CheckVarArgRangeRegs(uint32_t vA, uint32_t vC);
// Extract the relative offset from a branch instruction.
// Returns "false" on failure (e.g. this isn't a branch instruction).
bool GetBranchOffset(uint32_t cur_offset, int32_t* pOffset, bool* pConditional,
bool* selfOkay);
/* Perform detailed code-flow analysis on a single method. */
bool VerifyCodeFlow();
// Set the register types for the first instruction in the method based on the method signature.
// This has the side-effect of validating the signature.
bool SetTypesFromSignature();
/*
* Perform code flow on a method.
*
* The basic strategy is as outlined in v3 4.11.1.2: set the "changed" bit on the first
* instruction, process it (setting additional "changed" bits), and repeat until there are no
* more.
*
* v3 4.11.1.1
* - (N/A) operand stack is always the same size
* - operand stack [registers] contain the correct types of values
* - local variables [registers] contain the correct types of values
* - methods are invoked with the appropriate arguments
* - fields are assigned using values of appropriate types
* - opcodes have the correct type values in operand registers
* - there is never an uninitialized class instance in a local variable in code protected by an
* exception handler (operand stack is okay, because the operand stack is discarded when an
* exception is thrown) [can't know what's a local var w/o the debug info -- should fall out of
* register typing]
*
* v3 4.11.1.2
* - execution cannot fall off the end of the code
*
* (We also do many of the items described in the "static checks" sections, because it's easier to
* do them here.)
*
* We need an array of RegType values, one per register, for every instruction. If the method uses
* monitor-enter, we need extra data for every register, and a stack for every "interesting"
* instruction. In theory this could become quite large -- up to several megabytes for a monster
* function.
*
* NOTE:
* The spec forbids backward branches when there's an uninitialized reference in a register. The
* idea is to prevent something like this:
* loop:
* move r1, r0
* new-instance r0, MyClass
* ...
* if-eq rN, loop // once
* initialize r0
*
* This leaves us with two different instances, both allocated by the same instruction, but only
* one is initialized. The scheme outlined in v3 4.11.1.4 wouldn't catch this, so they work around
* it by preventing backward branches. We achieve identical results without restricting code
* reordering by specifying that you can't execute the new-instance instruction if a register
* contains an uninitialized instance created by that same instruction.
*/
bool CodeFlowVerifyMethod();
/*
* Perform verification for a single instruction.
*
* This requires fully decoding the instruction to determine the effect it has on registers.
*
* Finds zero or more following instructions and sets the "changed" flag if execution at that
* point needs to be (re-)evaluated. Register changes are merged into "reg_types_" at the target
* addresses. Does not set or clear any other flags in "insn_flags_".
*/
bool CodeFlowVerifyInstruction(uint32_t* start_guess);
// Perform verification of an aget instruction. The destination register's type will be set to
// be that of component type of the array unless the array type is unknown, in which case a
// bottom type inferred from the type of instruction is used. is_primitive is false for an
// aget-object.
void VerifyAGet(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive);
// Perform verification of an aput instruction.
void VerifyAPut(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive);
// Lookup instance field and fail for resolution violations
Field* GetInstanceField(const RegType& obj_type, int field_idx);
// Perform verification of an iget instruction.
void VerifyIGet(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive);
// Perform verification of an iput instruction.
void VerifyIPut(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive);
// Lookup static field and fail for resolution violations
Field* GetStaticField(int field_idx);
// Perform verification of an sget instruction.
void VerifySGet(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive);
// Perform verification of an sput instruction.
void VerifySPut(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive);
// Verify that the arguments in a filled-new-array instruction are valid.
// "res_class" is the class refered to by dec_insn->vB_.
void VerifyFilledNewArrayRegs(const Instruction::DecodedInstruction& dec_insn, Class* res_class,
bool is_range);
/*
* Resolves a class based on an index and performs access checks to ensure the referrer can
* access the resolved class.
* Exceptions caused by failures are cleared before returning.
* Sets "*failure" on failure.
*/
Class* ResolveClassAndCheckAccess(uint32_t class_idx);
/*
* For the "move-exception" instruction at "work_insn_idx_", which must be at an exception handler
* address, determine the first common superclass of all exceptions that can land here.
* Returns NULL if no matching exception handler can be found, or if the exception is not a
* subclass of Throwable.
*/
Class* GetCaughtExceptionType();
/*
* Resolves a method based on an index and performs access checks to ensure
* the referrer can access the resolved method.
* Does not throw exceptions.
*/
Method* ResolveMethodAndCheckAccess(uint32_t method_idx, bool is_direct);
/*
* Verify the arguments to a method. We're executing in "method", making
* a call to the method reference in vB.
*
* If this is a "direct" invoke, we allow calls to <init>. For calls to
* <init>, the first argument may be an uninitialized reference. Otherwise,
* calls to anything starting with '<' will be rejected, as will any
* uninitialized reference arguments.
*
* For non-static method calls, this will verify that the method call is
* appropriate for the "this" argument.
*
* The method reference is in vBBBB. The "is_range" parameter determines
* whether we use 0-4 "args" values or a range of registers defined by
* vAA and vCCCC.
*
* Widening conversions on integers and references are allowed, but
* narrowing conversions are not.
*
* Returns the resolved method on success, NULL on failure (with *failure
* set appropriately).
*/
Method* VerifyInvocationArgs(const Instruction::DecodedInstruction& dec_insn,
MethodType method_type, bool is_range, bool is_super);
/*
* Return the register type for the method. We can't just use the already-computed
* DalvikJniReturnType, because if it's a reference type we need to do the class lookup.
* Returned references are assumed to be initialized. Returns kRegTypeUnknown for "void".
*/
const RegType& GetMethodReturnType() {
return reg_types_.FromClass(method_->GetReturnType());
}
/*
* Verify that the target instruction is not "move-exception". It's important that the only way
* to execute a move-exception is as the first instruction of an exception handler.
* Returns "true" if all is well, "false" if the target instruction is move-exception.
*/
bool CheckMoveException(const uint16_t* insns, int insn_idx);
/*
* Replace an instruction with "throw-verification-error". This allows us to
* defer error reporting until the code path is first used.
*/
void ReplaceFailingInstruction();
/*
* Control can transfer to "next_insn". Merge the registers from merge_line into the table at
* next_insn, and set the changed flag on the target address if any of the registers were changed.
* Returns "false" if an error is encountered.
*/
bool UpdateRegisters(uint32_t next_insn, const RegisterLine* merge_line);
/*
* Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
* verification). For type-precise determination we have all the data we need, so we just need to
* encode it in some clever fashion.
* Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
*/
ByteArray* GenerateGcMap();
// Verify that the GC map associated with method_ is well formed
void VerifyGcMap();
// Compute sizes for GC map data
void ComputeGcMapSizes(size_t* gc_points, size_t* ref_bitmap_bits, size_t* log2_max_gc_pc);
Class* JavaLangThrowable();
InsnFlags CurrentInsnFlags() {
return insn_flags_[work_insn_idx_];
}
RegTypeCache reg_types_;
PcToRegisterLineTable reg_table_;
// Storage for the register status we're currently working on.
UniquePtr<RegisterLine> work_line_;
// Lazily initialized reference to java.lang.Class<java.lang.Throwable>
Class* java_lang_throwable_;
// The address of the instruction we're currently working on, note that this is in 2 byte
// quantities
uint32_t work_insn_idx_;
// Storage for the register status we're saving for later.
UniquePtr<RegisterLine> saved_line_;
Method* method_; // The method we're working on.
const DexFile* dex_file_; // The dex file containing the method.
const DexFile::CodeItem* code_item_; // The code item containing the code for the method.
UniquePtr<InsnFlags[]> insn_flags_; // Instruction widths and flags, one entry per code unit.
// The type of any error that occurs
VerifyError failure_;
// Failure message log
std::ostringstream fail_messages_;
// Info message log
std::ostringstream info_messages_;
// The number of occurrences of specific opcodes.
size_t new_instance_count_;
size_t monitor_enter_count_;
};
// Lightweight wrapper for PC to reference bit maps.
class PcToReferenceMap {
public:
PcToReferenceMap(Method* m) {
data_ = down_cast<ByteArray*>(m->GetGcMap());
CHECK(data_ != NULL);
// Check the size of the table agrees with the number of entries
size_t data_size = data_->GetLength() - 4;
DCHECK_EQ(EntryWidth() * NumEntries(), data_size);
}
// The number of entries in the table
size_t NumEntries() const {
return GetData()[2] | (GetData()[3] << 8);
}
// Get the PC at the given index
uint16_t GetPC(size_t index) const {
size_t entry_offset = index * EntryWidth();
if (PcWidth() == 1) {
return Table()[entry_offset];
} else {
return Table()[entry_offset] | (Table()[entry_offset + 1] << 8);
}
}
// Return address of bitmap encoding what are live references
const uint8_t* GetBitMap(size_t index) const {
size_t entry_offset = index * EntryWidth();
return &Table()[entry_offset + PcWidth()];
}
// Find the bitmap associated with the given dex pc
const uint8_t* FindBitMap(uint16_t dex_pc, bool error_if_not_present = true) const;
// The number of bytes used to encode registers
size_t RegWidth() const {
return GetData()[1];
}
private:
// Table of num_entries * (dex pc, bitmap)
const uint8_t* Table() const {
return GetData() + 4;
}
// The format of the table of the PCs for the table
RegisterMapFormat Format() const {
return static_cast<RegisterMapFormat>(GetData()[0]);
}
// Number of bytes used to encode a dex pc
size_t PcWidth() const {
RegisterMapFormat format = Format();
switch (format) {
case kRegMapFormatCompact8:
return 1;
case kRegMapFormatCompact16:
return 2;
default:
LOG(FATAL) << "Invalid format " << static_cast<int>(format);
return -1;
}
}
// The width of an entry in the table
size_t EntryWidth() const {
return PcWidth() + RegWidth();
}
const uint8_t* GetData() const {
return reinterpret_cast<uint8_t*>(data_->GetData());
}
ByteArray* data_; // The header and table data
};
} // namespace verifier
} // namespace art
#endif // ART_SRC_DEX_VERIFY_H_