Merge "[view-compiler] Support method declaration and invocation in Dex builder"

This commit is contained in:
Treehugger Robot
2018-11-07 20:13:57 +00:00
committed by Gerrit Code Review
5 changed files with 390 additions and 51 deletions

View File

@@ -16,10 +16,15 @@
cc_defaults {
name: "viewcompiler_defaults",
header_libs: [
"libbase_headers",
],
shared_libs: [
"libbase",
"libdexfile",
"slicer",
],
cppflags: ["-std=c++17"],
}
cc_library_host_static {
@@ -30,9 +35,6 @@ cc_library_host_static {
"java_lang_builder.cc",
"util.cc",
],
static_libs: [
"libbase",
],
}
cc_binary_host {
@@ -42,7 +44,6 @@ cc_binary_host {
"main.cc",
],
static_libs: [
"libbase",
"libtinyxml2",
"libgflags",
"libviewcompiler",
@@ -59,4 +60,5 @@ cc_test_host {
static_libs: [
"libviewcompiler",
],
test_suites: ["general-tests"],
}

View File

@@ -1,7 +0,0 @@
{
"presubmit": [
{
"name": "view-compiler-tests"
}
]
}

View File

@@ -22,14 +22,16 @@
#include <fstream>
#include <memory>
#define DCHECK_NOT_NULL(p) DCHECK((p) != nullptr)
namespace startop {
namespace dex {
using std::shared_ptr;
using std::string;
using art::Instruction;
using ::dex::kAccPublic;
using Op = Instruction::Op;
const TypeDescriptor TypeDescriptor::Int() { return TypeDescriptor{"I"}; };
const TypeDescriptor TypeDescriptor::Void() { return TypeDescriptor{"V"}; };
@@ -43,6 +45,20 @@ constexpr size_t kMaxEncodedStringLength{5};
} // namespace
std::ostream& operator<<(std::ostream& out, const Instruction::Op& opcode) {
switch (opcode) {
case Instruction::Op::kReturn:
out << "kReturn";
return out;
case Instruction::Op::kMove:
out << "kMove";
return out;
case Instruction::Op::kInvokeVirtual:
out << "kInvokeVirtual";
return out;
}
}
void* TrackingAllocator::Allocate(size_t size) {
std::unique_ptr<uint8_t[]> buffer = std::make_unique<uint8_t[]>(size);
void* raw_buffer = buffer.get();
@@ -56,7 +72,7 @@ void TrackingAllocator::Free(void* ptr) { allocations_.erase(allocations_.find(p
//
// package dextest;
// public class DexTest {
// public static int foo() { return 5; }
// public static int foo(String s) { return s.length(); }
// }
void WriteTestDexFile(const string& filename) {
DexBuilder dex_file;
@@ -64,11 +80,17 @@ void WriteTestDexFile(const string& filename) {
ClassBuilder cbuilder{dex_file.MakeClass("dextest.DexTest")};
cbuilder.set_source_file("dextest.java");
MethodBuilder method{cbuilder.CreateMethod("foo", Prototype{TypeDescriptor::Int()})};
TypeDescriptor string_type = TypeDescriptor::FromClassname("java.lang.String");
MethodBuilder::Register r = method.MakeRegister();
method.BuildConst4(r, 5);
method.BuildReturn(r);
MethodBuilder method{cbuilder.CreateMethod("foo", Prototype{TypeDescriptor::Int(), string_type})};
Value result = method.MakeRegister();
MethodDeclData string_length =
dex_file.GetOrDeclareMethod(string_type, "length", Prototype{TypeDescriptor::Int()});
method.AddInstruction(Instruction::InvokeVirtual(string_length.id, result, Value::Parameter(0)));
method.BuildReturn(result);
method.Encode();
@@ -78,6 +100,10 @@ void WriteTestDexFile(const string& filename) {
out_file.write(image.ptr<const char>(), image.size());
}
TypeDescriptor TypeDescriptor::FromClassname(const std::string& name) {
return TypeDescriptor{art::DotToDescriptor(name.c_str())};
}
DexBuilder::DexBuilder() : dex_file_{std::make_shared<ir::DexFile>()} {
dex_file_->magic = slicer::MemView{kDexFileMagic, sizeof(kDexFileMagic)};
}
@@ -119,10 +145,9 @@ ClassBuilder DexBuilder::MakeClass(const std::string& name) {
class_def->type = type_def;
class_def->super_class = GetOrAddType(art::DotToDescriptor("java.lang.Object"));
class_def->access_flags = kAccPublic;
return ClassBuilder{this, class_def};
return ClassBuilder{this, name, class_def};
}
// TODO(eholk): we probably want GetOrAddString() also
ir::Type* DexBuilder::GetOrAddType(const std::string& descriptor) {
if (types_by_descriptor_.find(descriptor) != types_by_descriptor_.end()) {
return types_by_descriptor_[descriptor];
@@ -158,16 +183,11 @@ std::string Prototype::Shorty() const {
return shorty;
}
ClassBuilder::ClassBuilder(DexBuilder* parent, ir::Class* class_def)
: parent_(parent), class_(class_def) {}
ClassBuilder::ClassBuilder(DexBuilder* parent, const std::string& name, ir::Class* class_def)
: parent_(parent), type_descriptor_{TypeDescriptor::FromClassname(name)}, class_(class_def) {}
MethodBuilder ClassBuilder::CreateMethod(const std::string& name, Prototype prototype) {
ir::String* dex_name{parent_->GetOrAddString(name)};
auto* decl = parent_->Alloc<ir::MethodDecl>();
decl->name = dex_name;
decl->parent = class_->type;
decl->prototype = prototype.Encode(parent_);
ir::MethodDecl* decl = parent_->GetOrDeclareMethod(type_descriptor_, name, prototype).decl;
return MethodBuilder{parent_, class_, decl};
}
@@ -187,8 +207,13 @@ ir::EncodedMethod* MethodBuilder::Encode() {
method->access_flags = kAccPublic | ::dex::kAccStatic;
auto* code = dex_->Alloc<ir::Code>();
code->registers = num_registers_;
// TODO: support ins and outs
DCHECK_NOT_NULL(decl_->prototype);
size_t const num_args =
decl_->prototype->param_types != nullptr ? decl_->prototype->param_types->types.size() : 0;
code->registers = num_registers_ + num_args;
code->ins_count = num_args;
code->outs_count = decl_->prototype->return_type == dex_->GetOrAddType("V") ? 0 : 1;
EncodeInstructions();
code->instructions = slicer::ArrayView<const ::dex::u2>(buffer_.data(), buffer_.size());
method->code = code;
@@ -197,17 +222,135 @@ ir::EncodedMethod* MethodBuilder::Encode() {
return method;
}
MethodBuilder::Register MethodBuilder::MakeRegister() { return num_registers_++; }
Value MethodBuilder::MakeRegister() { return Value::Local(num_registers_++); }
void MethodBuilder::BuildReturn() { buffer_.push_back(Instruction::RETURN_VOID); }
void MethodBuilder::AddInstruction(Instruction instruction) {
instructions_.push_back(instruction);
}
void MethodBuilder::BuildReturn(Register src) { buffer_.push_back(Instruction::RETURN | src << 8); }
void MethodBuilder::BuildReturn() { AddInstruction(Instruction::OpNoArgs(Op::kReturn)); }
void MethodBuilder::BuildConst4(Register target, int value) {
void MethodBuilder::BuildReturn(Value src) {
AddInstruction(Instruction::OpWithArgs(Op::kReturn, /*destination=*/{}, src));
}
void MethodBuilder::BuildConst4(Value target, int value) {
DCHECK_LT(value, 16);
// TODO: support more registers
DCHECK_LT(target, 16);
buffer_.push_back(Instruction::CONST_4 | (value << 12) | (target << 8));
AddInstruction(Instruction::OpWithArgs(Op::kMove, target, Value::Immediate(value)));
}
void MethodBuilder::EncodeInstructions() {
buffer_.clear();
for (const auto& instruction : instructions_) {
EncodeInstruction(instruction);
}
}
void MethodBuilder::EncodeInstruction(const Instruction& instruction) {
switch (instruction.opcode()) {
case Instruction::Op::kReturn:
return EncodeReturn(instruction);
case Instruction::Op::kMove:
return EncodeMove(instruction);
case Instruction::Op::kInvokeVirtual:
return EncodeInvokeVirtual(instruction);
}
}
void MethodBuilder::EncodeReturn(const Instruction& instruction) {
DCHECK_EQ(Instruction::Op::kReturn, instruction.opcode());
DCHECK(!instruction.dest().has_value());
if (instruction.args().size() == 0) {
buffer_.push_back(art::Instruction::RETURN_VOID);
} else {
DCHECK(instruction.args().size() == 1);
size_t source = RegisterValue(instruction.args()[0]);
buffer_.push_back(art::Instruction::RETURN | source << 8);
}
}
void MethodBuilder::EncodeMove(const Instruction& instruction) {
DCHECK_EQ(Instruction::Op::kMove, instruction.opcode());
DCHECK(instruction.dest().has_value());
DCHECK(instruction.dest()->is_register() || instruction.dest()->is_parameter());
DCHECK_EQ(1, instruction.args().size());
const Value& source = instruction.args()[0];
if (source.is_immediate()) {
// TODO: support more registers
DCHECK_LT(RegisterValue(*instruction.dest()), 16);
DCHECK_LT(source.value(), 16);
buffer_.push_back(art::Instruction::CONST_4 | (source.value() << 12) |
(RegisterValue(*instruction.dest()) << 8));
} else {
UNIMPLEMENTED(FATAL);
}
}
void MethodBuilder::EncodeInvokeVirtual(const Instruction& instruction) {
DCHECK_EQ(Instruction::Op::kInvokeVirtual, instruction.opcode());
// TODO: support more than one argument (i.e. the this argument) and change this to DCHECK_GE
DCHECK_EQ(1, instruction.args().size());
const Value& this_arg = instruction.args()[0];
size_t real_reg = RegisterValue(this_arg) & 0xf;
buffer_.push_back(1 << 12 | art::Instruction::INVOKE_VIRTUAL);
buffer_.push_back(instruction.method_id());
buffer_.push_back(real_reg);
if (instruction.dest().has_value()) {
real_reg = RegisterValue(*instruction.dest());
buffer_.push_back(real_reg << 8 | art::Instruction::MOVE_RESULT);
}
}
size_t MethodBuilder::RegisterValue(Value value) const {
if (value.is_register()) {
return value.value();
} else if (value.is_parameter()) {
return value.value() + num_registers_;
}
DCHECK(false && "Must be either a parameter or a register");
return 0;
}
const MethodDeclData& DexBuilder::GetOrDeclareMethod(TypeDescriptor type, const std::string& name,
Prototype prototype) {
MethodDeclData& entry = method_id_map_[{type, name, prototype}];
if (entry.decl == nullptr) {
// This method has not already been declared, so declare it.
ir::MethodDecl* decl = dex_file_->Alloc<ir::MethodDecl>();
// The method id is the last added method.
size_t id = dex_file_->methods.size() - 1;
ir::String* dex_name{GetOrAddString(name)};
decl->name = dex_name;
decl->parent = GetOrAddType(type.descriptor());
decl->prototype = GetOrEncodeProto(prototype);
// update the index -> ir node map (see tools/dexter/slicer/dex_ir_builder.cc)
auto new_index = dex_file_->methods_indexes.AllocateIndex();
auto& ir_node = dex_file_->methods_map[new_index];
SLICER_CHECK(ir_node == nullptr);
ir_node = decl;
decl->orig_index = new_index;
entry = {id, decl};
}
return entry;
}
ir::Proto* DexBuilder::GetOrEncodeProto(Prototype prototype) {
ir::Proto*& ir_proto = proto_map_[prototype];
if (ir_proto == nullptr) {
ir_proto = prototype.Encode(this);
}
return ir_proto;
}
} // namespace dex

View File

@@ -17,7 +17,9 @@
#define DEX_BUILDER_H_
#include <map>
#include <optional>
#include <string>
#include <unordered_map>
#include <vector>
#include "slicer/dex_ir.h"
@@ -45,7 +47,7 @@ class TrackingAllocator : public ::dex::Writer::Allocator {
virtual void Free(void* ptr);
private:
std::map<void*, std::unique_ptr<uint8_t[]>> allocations_;
std::unordered_map<void*, std::unique_ptr<uint8_t[]>> allocations_;
};
// Represents a DEX type descriptor.
@@ -57,11 +59,17 @@ class TypeDescriptor {
static const TypeDescriptor Int();
static const TypeDescriptor Void();
// Creates a type descriptor from a fully-qualified class name. For example, it turns the class
// name java.lang.Object into the descriptor Ljava/lang/Object.
static TypeDescriptor FromClassname(const std::string& name);
// Return the full descriptor, such as I or Ljava/lang/Object
const std::string& descriptor() const { return descriptor_; }
// Return the shorty descriptor, such as I or L
std::string short_descriptor() const { return descriptor().substr(0, 1); }
bool operator<(const TypeDescriptor& rhs) const { return descriptor_ < rhs.descriptor_; }
private:
TypeDescriptor(std::string descriptor) : descriptor_{descriptor} {}
@@ -82,11 +90,98 @@ class Prototype {
// Get the shorty descriptor, such as VII for (Int, Int) -> Void
std::string Shorty() const;
bool operator<(const Prototype& rhs) const {
return std::make_tuple(return_type_, param_types_) <
std::make_tuple(rhs.return_type_, rhs.param_types_);
}
private:
const TypeDescriptor return_type_;
const std::vector<TypeDescriptor> param_types_;
};
// Represents a DEX register or constant. We separate regular registers and parameters
// because we will not know the real parameter id until after all instructions
// have been generated.
class Value {
public:
static constexpr Value Local(size_t id) { return Value{id, Kind::kLocalRegister}; }
static constexpr Value Parameter(size_t id) { return Value{id, Kind::kParameter}; }
static constexpr Value Immediate(size_t value) { return Value{value, Kind::kImmediate}; }
bool is_register() const { return kind_ == Kind::kLocalRegister; }
bool is_parameter() const { return kind_ == Kind::kParameter; }
bool is_immediate() const { return kind_ == Kind::kImmediate; }
size_t value() const { return value_; }
private:
enum class Kind { kLocalRegister, kParameter, kImmediate };
const size_t value_;
const Kind kind_;
constexpr Value(size_t value, Kind kind) : value_{value}, kind_{kind} {}
};
// A virtual instruction. We convert these to real instructions in MethodBuilder::Encode.
// Virtual instructions are needed to keep track of information that is not known until all of the
// code is generated. This information includes things like how many local registers are created and
// branch target locations.
class Instruction {
public:
// The operation performed by this instruction. These are virtual instructions that do not
// correspond exactly to DEX instructions.
enum class Op { kReturn, kMove, kInvokeVirtual };
////////////////////////
// Named Constructors //
////////////////////////
// For instructions with no return value and no arguments.
static inline Instruction OpNoArgs(Op opcode) {
return Instruction{opcode, /*method_id*/ 0, /*dest*/ {}};
}
// For most instructions, which take some number of arguments and have an optional return value.
template <typename... T>
static inline Instruction OpWithArgs(Op opcode, std::optional<const Value> dest, T... args) {
return Instruction{opcode, /*method_id*/ 0, dest, args...};
}
// For method calls.
template <typename... T>
static inline Instruction InvokeVirtual(size_t method_id, std::optional<const Value> dest,
Value this_arg, T... args) {
return Instruction{Op::kInvokeVirtual, method_id, dest, this_arg, args...};
}
///////////////
// Accessors //
///////////////
Op opcode() const { return opcode_; }
size_t method_id() const { return method_id_; }
const std::optional<const Value>& dest() const { return dest_; }
const std::vector<const Value>& args() const { return args_; }
private:
inline Instruction(Op opcode, size_t method_id, std::optional<const Value> dest)
: opcode_{opcode}, method_id_{method_id}, dest_{dest}, args_{} {}
template <typename... T>
inline constexpr Instruction(Op opcode, size_t method_id, std::optional<const Value> dest,
T... args)
: opcode_{opcode}, method_id_{method_id}, dest_{dest}, args_{args...} {}
const Op opcode_;
// The index of the method to invoke, for kInvokeVirtual and similar opcodes.
const size_t method_id_{0};
const std::optional<const Value> dest_;
const std::vector<const Value> args_;
};
// Needed for CHECK_EQ, DCHECK_EQ, etc.
std::ostream& operator<<(std::ostream& out, const Instruction::Op& opcode);
// Tools to help build methods and their bodies.
class MethodBuilder {
public:
@@ -95,42 +190,53 @@ class MethodBuilder {
// Encode the method into DEX format.
ir::EncodedMethod* Encode();
// Registers are just represented by their number.
using Register = size_t;
// Create a new register to be used to storing values. Note that these are not SSA registers, like
// might be expected in similar code generators. This does no liveness tracking or anything, so
// it's up to the caller to reuse registers as appropriate.
Register MakeRegister();
Value MakeRegister();
/////////////////////////////////
// Instruction builder methods //
/////////////////////////////////
void AddInstruction(Instruction instruction);
// return-void
void BuildReturn();
void BuildReturn(Register src);
void BuildReturn(Value src);
// const/4
void BuildConst4(Register target, int value);
void BuildConst4(Value target, int value);
// TODO: add builders for more instructions
private:
void EncodeInstructions();
void EncodeInstruction(const Instruction& instruction);
void EncodeReturn(const Instruction& instruction);
void EncodeMove(const Instruction& instruction);
void EncodeInvokeVirtual(const Instruction& instruction);
// Converts a register or parameter to its DEX register number.
size_t RegisterValue(Value value) const;
DexBuilder* dex_;
ir::Class* class_;
ir::MethodDecl* decl_;
// A buffer to hold instructions we are generating.
// A list of the instructions we will eventually encode.
std::vector<Instruction> instructions_;
// A buffer to hold instructions that have been encoded.
std::vector<::dex::u2> buffer_;
// How many registers we've allocated
size_t num_registers_;
size_t num_registers_{0};
};
// A helper to build class definitions.
class ClassBuilder {
public:
ClassBuilder(DexBuilder* parent, ir::Class* class_def);
ClassBuilder(DexBuilder* parent, const std::string& name, ir::Class* class_def);
void set_source_file(const std::string& source);
@@ -139,8 +245,15 @@ class ClassBuilder {
MethodBuilder CreateMethod(const std::string& name, Prototype prototype);
private:
DexBuilder* parent_;
ir::Class* class_;
DexBuilder* const parent_;
const TypeDescriptor type_descriptor_;
ir::Class* const class_;
};
// Keeps track of information needed to manipulate or call a method.
struct MethodDeclData {
size_t id;
ir::MethodDecl* decl;
};
// Builds Dex files from scratch.
@@ -163,10 +276,19 @@ class DexBuilder {
ClassBuilder MakeClass(const std::string& name);
// Add a type for the given descriptor, or return the existing one if it already exists.
// See the TypeDescriptor class for help generating these.
// See the TypeDescriptor class for help generating these. GetOrAddType can be used to declare
// imported classes.
ir::Type* GetOrAddType(const std::string& descriptor);
// Returns the method id for the method, creating it if it has not been created yet.
const MethodDeclData& GetOrDeclareMethod(TypeDescriptor type, const std::string& name,
Prototype prototype);
private:
// Looks up the ir::Proto* corresponding to this given prototype, or creates one if it does not
// exist.
ir::Proto* GetOrEncodeProto(Prototype prototype);
std::shared_ptr<ir::DexFile> dex_file_;
// allocator_ is needed to be able to encode the image.
@@ -177,10 +299,29 @@ class DexBuilder {
std::vector<std::unique_ptr<uint8_t[]>> string_data_;
// Keep track of what types we've defined so we can look them up later.
std::map<std::string, ir::Type*> types_by_descriptor_;
std::unordered_map<std::string, ir::Type*> types_by_descriptor_;
struct MethodDescriptor {
TypeDescriptor type;
std::string name;
Prototype prototype;
inline bool operator<(const MethodDescriptor& rhs) const {
return std::make_tuple(type, name, prototype) <
std::make_tuple(rhs.type, rhs.name, rhs.prototype);
}
};
// Maps method declarations to their method index. This is needed to encode references to them.
// When we go to actually write the DEX file, slicer will re-assign these after correctly sorting
// the methods list.
std::map<MethodDescriptor, MethodDeclData> method_id_map_;
// Keep track of what strings we've defined so we can look them up later.
std::map<std::string, ir::String*> strings_;
std::unordered_map<std::string, ir::String*> strings_;
// Keep track of already-encoded protos.
std::map<Prototype, ir::Proto*> proto_map_;
};
} // namespace dex

View File

@@ -40,6 +40,12 @@ bool EncodeAndVerify(DexBuilder* dex_file) {
return loaded_dex_file != nullptr;
}
// Write out and verify a DEX file that corresponds to:
//
// package dextest;
// public class DexTest {
// public static void foo() {}
// }
TEST(DexBuilderTest, VerifyDexWithClassMethod) {
DexBuilder dex_file;
@@ -67,6 +73,12 @@ TEST(DexBuilderTest, VerifyBadDexWithClassMethod) {
EXPECT_FALSE(EncodeAndVerify(&dex_file));
}
// Write out and verify a DEX file that corresponds to:
//
// package dextest;
// public class DexTest {
// public static int foo() { return 5; }
// }
TEST(DexBuilderTest, VerifyDexReturn5) {
DexBuilder dex_file;
@@ -80,3 +92,51 @@ TEST(DexBuilderTest, VerifyDexReturn5) {
EXPECT_TRUE(EncodeAndVerify(&dex_file));
}
// Write out and verify a DEX file that corresponds to:
//
// package dextest;
// public class DexTest {
// public static int foo(int x) { return x; }
// }
TEST(DexBuilderTest, VerifyDexReturnIntParam) {
DexBuilder dex_file;
auto cbuilder{dex_file.MakeClass("dextest.DexTest")};
auto method{
cbuilder.CreateMethod("foo", Prototype{TypeDescriptor::Int(), TypeDescriptor::Int()})};
method.BuildReturn(Value::Parameter(0));
method.Encode();
EXPECT_TRUE(EncodeAndVerify(&dex_file));
}
// Write out and verify a DEX file that corresponds to:
//
// package dextest;
// public class DexTest {
// public static int foo(String s) { return s.length(); }
// }
TEST(DexBuilderTest, VerifyDexCallStringLength) {
DexBuilder dex_file;
auto cbuilder{dex_file.MakeClass("dextest.DexTest")};
MethodBuilder method{cbuilder.CreateMethod(
"foo", Prototype{TypeDescriptor::Int(), TypeDescriptor::FromClassname("java.lang.String")})};
Value result = method.MakeRegister();
MethodDeclData string_length =
dex_file.GetOrDeclareMethod(TypeDescriptor::FromClassname("java.lang.String"),
"length",
Prototype{TypeDescriptor::Int()});
method.AddInstruction(Instruction::InvokeVirtual(string_length.id, result, Value::Parameter(0)));
method.BuildReturn(result);
method.Encode();
EXPECT_TRUE(EncodeAndVerify(&dex_file));
}