/*============================================================================= Copyright (c) 2001-2011 Joel de Guzman Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) =============================================================================*/ #include "config.hpp" #include "compiler.hpp" #include "annotation.hpp" #include "vm.hpp" #include #include #include #include #include namespace client { namespace code_gen { value::value() : v(0), is_lvalue_(false), builder(0) {} value::value( llvm::Value* v, bool is_lvalue_, llvm::IRBuilder<>* builder) : v(v), is_lvalue_(is_lvalue_), builder(builder) {} value::value(value const& rhs) : v(rhs.v), is_lvalue_(rhs.is_lvalue_), builder(rhs.builder) {} bool value::is_lvalue() const { return is_lvalue_; } bool value::is_valid() const { return v != 0; } value::operator bool() const { return v != 0; } void value::name(char const* id) { v->setName(id); } void value::name(std::string const& id) { v->setName(id); } value::operator llvm::Value*() const { if (is_lvalue_) { BOOST_ASSERT(builder != 0); return builder->CreateLoad(v, v->getName()); } return v; } value& value::operator=(value const& rhs) { v = rhs.v; is_lvalue_ = rhs.is_lvalue_; builder = rhs.builder; return *this; } value& value::assign(value const& rhs) { BOOST_ASSERT(is_lvalue()); BOOST_ASSERT(builder != 0); builder->CreateStore(rhs, v); return *this; } value operator-(value a) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateNeg(a, "neg_tmp"), false, a.builder ); } value operator!(value a) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateNot(a, "not_tmp"), false, a.builder ); } value operator+(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateAdd(a, b, "add_tmp"), false, a.builder ); } value operator-(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateSub(a, b, "sub_tmp"), false, a.builder ); } value operator*(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateMul(a, b, "mul_tmp"), false, a.builder ); } value operator/(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateSDiv(a, b, "div_tmp"), false, a.builder ); } value operator%(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateSRem(a, b, "mod_tmp"), false, a.builder ); } value operator&(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateAnd(a, b, "and_tmp"), false, a.builder ); } value operator|(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateOr(a, b, "or_tmp"), false, a.builder ); } value operator^(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateXor(a, b, "xor_tmp"), false, a.builder ); } value operator<<(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateShl(a, b, "shl_tmp"), false, a.builder ); } value operator>>(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateLShr(a, b, "shr_tmp"), false, a.builder ); } value operator==(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateICmpEQ(a, b, "eq_tmp"), false, a.builder ); } value operator!=(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateICmpNE(a, b, "ne_tmp"), false, a.builder ); } value operator<(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateICmpSLT(a, b, "slt_tmp"), false, a.builder ); } value operator<=(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateICmpSLE(a, b, "sle_tmp"), false, a.builder ); } value operator>(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateICmpSGT(a, b, "sgt_tmp"), false, a.builder ); } value operator>=(value a, value b) { BOOST_ASSERT(a.builder != 0); return value( a.builder->CreateICmpSGE(a, b, "sge_tmp"), false, a.builder ); } struct function::to_value { typedef value result_type; llvm_compiler* c; to_value(llvm_compiler* c = 0) : c(c) {} value operator()(llvm::Value& v) const { return c->val(&v); } }; bool basic_block::has_terminator() const { return b->getTerminator() != 0; } bool basic_block::is_valid() const { return b != 0; } function::operator llvm::Function*() const { return f; } std::size_t function::arg_size() const { return f->arg_size(); } void function::add(basic_block const& b) { f->getBasicBlockList().push_back(b); } void function::erase_from_parent() { f->eraseFromParent(); } basic_block function::last_block() { return &f->getBasicBlockList().back(); } bool function::empty() const { return f->empty(); } std::string function::name() const { return f->getName(); } function::arg_range function::args() const { BOOST_ASSERT(c != 0); arg_val_iterator first(f->arg_begin(), to_value()); arg_val_iterator last(f->arg_end(), to_value()); return arg_range(first, last); } bool function::is_valid() const { return f != 0; } void function::verify() const { llvm::verifyFunction(*f); } value llvm_compiler::val(unsigned int x) { return value( llvm::ConstantInt::get(context(), llvm::APInt(int_size, x)), false, &llvm_builder); } value llvm_compiler::val(int x) { return value( llvm::ConstantInt::get(context(), llvm::APInt(int_size, x)), false, &llvm_builder); } value llvm_compiler::val(bool x) { return value( llvm::ConstantInt::get(context(), llvm::APInt(1, x)), false, &llvm_builder); } value llvm_compiler::val(llvm::Value* v) { return value(v, false, &llvm_builder); } namespace { // Create an alloca instruction in the entry block of // the function. This is used for mutable variables etc. llvm::AllocaInst* make_entry_block_alloca( llvm::Function* f, char const* name, llvm::LLVMContext& context) { llvm::IRBuilder<> builder( &f->getEntryBlock(), f->getEntryBlock().begin()); return builder.CreateAlloca( llvm::Type::getIntNTy(context, int_size), 0, name); } } value llvm_compiler::var(char const* name) { llvm::Function* f = llvm_builder.GetInsertBlock()->getParent(); llvm::IRBuilder<> builder( &f->getEntryBlock(), f->getEntryBlock().begin()); llvm::AllocaInst* alloca = builder.CreateAlloca( llvm::Type::getIntNTy(context(), int_size), 0, name); return value(alloca, true, &llvm_builder); } struct value::to_llvm_value { typedef llvm::Value* result_type; llvm::Value* operator()(value const& x) const { return x; } }; template llvm::Value* llvm_compiler::call_impl( function callee, C const& args_) { // Sigh. LLVM requires CreateCall arguments to be random access. // It would have been better if it can accept forward iterators. // I guess it needs the arguments to be in contiguous memory. // So, we have to put the args into a temporary std::vector. std::vector args( args_.begin(), args_.end()); // Check the args for null values. We can't have null values. // Return 0 if we find one to flag error. BOOST_FOREACH(llvm::Value* arg, args) { if (arg == 0) return 0; } return llvm_builder.CreateCall( callee, args.begin(), args.end(), "call_tmp"); } template value llvm_compiler::call( function callee, Container const& args) { llvm::Value* call = call_impl( callee, args | boost::adaptors::transformed(value::to_llvm_value())); if (call == 0) return val(); return value(call, false, &llvm_builder); } function llvm_compiler::get_function(char const* name) { return function(vm.module()->getFunction(name), this); } function llvm_compiler::get_current_function() { // get the current function return function(llvm_builder.GetInsertBlock()->getParent(), this); } function llvm_compiler::declare_function( bool void_return , std::string const& name , std::size_t nargs) { llvm::Type const* int_type = llvm::Type::getIntNTy(context(), int_size); llvm::Type const* void_type = llvm::Type::getVoidTy(context()); std::vector ints(nargs, int_type); llvm::Type const* return_type = void_return ? void_type : int_type; llvm::FunctionType* function_type = llvm::FunctionType::get(void_return ? void_type : int_type, ints, false); return function(llvm::Function::Create( function_type, llvm::Function::ExternalLinkage, name, vm.module()), this); } basic_block llvm_compiler::make_basic_block( char const* name , function parent , basic_block before) { return llvm::BasicBlock::Create(context(), name, parent, before); } value llvm_compiler::var(std::string const& name) { return var(name.c_str()); } function llvm_compiler::get_function(std::string const& name) { return get_function(name.c_str()); } basic_block llvm_compiler::get_insert_block() { return llvm_builder.GetInsertBlock(); } void llvm_compiler::set_insert_point(basic_block b) { llvm_builder.SetInsertPoint(b); } void llvm_compiler::conditional_branch( value cond, basic_block true_br, basic_block false_br) { llvm_builder.CreateCondBr(cond, true_br, false_br); } void llvm_compiler::branch(basic_block b) { llvm_builder.CreateBr(b); } void llvm_compiler::return_() { llvm_builder.CreateRetVoid(); } void llvm_compiler::return_(value v) { llvm_builder.CreateRet(v); } void llvm_compiler::optimize_function(function f) { // Optimize the function. fpm.run(*f); } void llvm_compiler::init_fpm() { // Set up the optimizer pipeline. Start with registering info about how the // target lays out data structures. fpm.add(new llvm::TargetData(*vm.execution_engine()->getTargetData())); // Provide basic AliasAnalysis support for GVN. fpm.add(llvm::createBasicAliasAnalysisPass()); // Promote allocas to registers. fpm.add(llvm::createPromoteMemoryToRegisterPass()); // Do simple "peephole" optimizations and bit-twiddling optzns. fpm.add(llvm::createInstructionCombiningPass()); // Reassociate expressions. fpm.add(llvm::createReassociatePass()); // Eliminate Common SubExpressions. fpm.add(llvm::createGVNPass()); // Simplify the control flow graph (deleting unreachable blocks, etc). fpm.add(llvm::createCFGSimplificationPass()); fpm.doInitialization(); } value compiler::operator()(unsigned int x) { return val(x); } value compiler::operator()(bool x) { return val(x); } value compiler::operator()(ast::primary_expr const& x) { return boost::apply_visitor(*this, x.get()); } value compiler::operator()(ast::identifier const& x) { // Look this variable up in the function. if (locals.find(x.name) == locals.end()) { error_handler(x.id, "Undeclared variable: " + x.name); return val(); } return locals[x.name]; } value compiler::operator()(ast::unary_expr const& x) { value operand = boost::apply_visitor(*this, x.operand_); if (!operand.is_valid()) return val(); switch (x.operator_) { case token_ids::compl_: return operand ^ val(-1); case token_ids::minus: return -operand; case token_ids::not_: return !operand; case token_ids::plus: return operand; case token_ids::plus_plus: { if (!operand.is_lvalue()) { error_handler(x.id, "++ needs an var"); return val(); } value r = operand + val(1); operand.assign(r); return operand; } case token_ids::minus_minus: { if (!operand.is_lvalue()) { error_handler(x.id, "-- needs an var"); return val(); } value r = operand - val(1); operand.assign(r); return operand; } default: BOOST_ASSERT(0); return val(); } } namespace { struct compile_args { compiler& c; compile_args(compiler& c) : c(c) {} typedef value result_type; value operator()(ast::expression const& expr) const { return c(expr); } }; } value compiler::operator()(ast::function_call const& x) { function callee = get_function(x.function_name.name); if (!callee.is_valid()) { error_handler(x.function_name.id, "Function not found: " + x.function_name.name); return val(); } if (callee.arg_size() != x.args.size()) { error_handler(x.function_name.id, "Wrong number of arguments: " + x.function_name.name); return val(); } return call(callee, x.args | boost::adaptors::transformed(compile_args(*this))); } namespace { int precedence[] = { // precedence 1 1, // op_comma // precedence 2 2, // op_assign 2, // op_plus_assign 2, // op_minus_assign 2, // op_times_assign 2, // op_divide_assign 2, // op_mod_assign 2, // op_bit_and_assign 2, // op_bit_xor_assign 2, // op_bitor_assign 2, // op_shift_left_assign 2, // op_shift_right_assign // precedence 3 3, // op_logical_or // precedence 4 4, // op_logical_and // precedence 5 5, // op_bit_or // precedence 6 6, // op_bit_xor // precedence 7 7, // op_bit_and // precedence 8 8, // op_equal 8, // op_not_equal // precedence 9 9, // op_less 9, // op_less_equal 9, // op_greater 9, // op_greater_equal // precedence 10 10, // op_shift_left 10, // op_shift_right // precedence 11 11, // op_plus 11, // op_minus // precedence 12 12, // op_times 12, // op_divide 12 // op_mod }; } inline int precedence_of(token_ids::type op) { return precedence[op & 0xFF]; } value compiler::compile_binary_expression( value lhs, value rhs, token_ids::type op) { switch (op) { case token_ids::plus: return lhs + rhs; case token_ids::minus: return lhs - rhs; case token_ids::times: return lhs * rhs; case token_ids::divide: return lhs / rhs; case token_ids::mod: return lhs % rhs; case token_ids::logical_or: case token_ids::bit_or: return lhs | rhs; case token_ids::logical_and: case token_ids::bit_and: return lhs & rhs; case token_ids::bit_xor: return lhs ^ rhs; case token_ids::shift_left: return lhs << rhs; case token_ids::shift_right: return lhs >> rhs; case token_ids::equal: return lhs == rhs; case token_ids::not_equal: return lhs != rhs; case token_ids::less: return lhs < rhs; case token_ids::less_equal: return lhs <= rhs; case token_ids::greater: return lhs > rhs; case token_ids::greater_equal: return lhs >= rhs; default: BOOST_ASSERT(0); return val(); } } // The Shunting-yard algorithm value compiler::compile_expression( int min_precedence, value lhs, std::list::const_iterator& rest_begin, std::list::const_iterator rest_end) { while ((rest_begin != rest_end) && (precedence_of(rest_begin->operator_) >= min_precedence)) { token_ids::type op = rest_begin->operator_; value rhs = boost::apply_visitor(*this, rest_begin->operand_); if (!rhs.is_valid()) return val(); ++rest_begin; while ((rest_begin != rest_end) && (precedence_of(rest_begin->operator_) > precedence_of(op))) { token_ids::type next_op = rest_begin->operator_; rhs = compile_expression( precedence_of(next_op), rhs, rest_begin, rest_end); } lhs = compile_binary_expression(lhs, rhs, op); } return lhs; } value compiler::operator()(ast::expression const& x) { value lhs = boost::apply_visitor(*this, x.first); if (!lhs.is_valid()) return val(); std::list::const_iterator rest_begin = x.rest.begin(); return compile_expression(0, lhs, rest_begin, x.rest.end()); } value compiler::operator()(ast::assignment const& x) { if (locals.find(x.lhs.name) == locals.end()) { error_handler(x.lhs.id, "Undeclared variable: " + x.lhs.name); return val(); } value lhs = locals[x.lhs.name]; value rhs = (*this)(x.rhs); if (!rhs.is_valid()) return val(); if (x.operator_ == token_ids::assign) { lhs.assign(rhs); return lhs; } value result; switch (x.operator_) { case token_ids::plus_assign: result = lhs + rhs; break; case token_ids::minus_assign: result = lhs - rhs; break; case token_ids::times_assign: result = lhs * rhs; break; case token_ids::divide_assign: result = lhs / rhs; break; case token_ids::mod_assign: result = lhs % rhs; break; case token_ids::bit_and_assign: result = lhs & rhs; break; case token_ids::bit_xor_assign: result = lhs ^ rhs; break; case token_ids::bit_or_assign: result = lhs | rhs; break; case token_ids::shift_left_assign: result = lhs << rhs; break; case token_ids::shift_right_assign: result = lhs >> rhs; break; default: BOOST_ASSERT(0); return val(); } lhs.assign(result); return lhs; } bool compiler::operator()(ast::variable_declaration const& x) { if (locals.find(x.lhs.name) != locals.end()) { error_handler(x.lhs.id, "Duplicate variable: " + x.lhs.name); return false; } value init; std::string const& name = x.lhs.name; if (x.rhs) // if there's an RHS initializer { init = (*this)(*x.rhs); if (!init.is_valid()) // don't add the variable if the RHS fails return false; } value var_ = var(name.c_str()); if (init.is_valid()) var_.assign(init); // Remember this binding. locals[name] = var_; return true; } struct compiler::statement_compiler : compiler { typedef bool result_type; }; compiler::statement_compiler& compiler::as_statement() { return *static_cast(this); } bool compiler::operator()(ast::statement const& x) { if (boost::get(&x) != 0) // empty statement return true; return boost::apply_visitor(as_statement(), x); } bool compiler::operator()(ast::statement_list const& x) { BOOST_FOREACH(ast::statement const& s, x) { if (!(*this)(s)) return false; } return true; } bool compiler::operator()(ast::if_statement const& x) { value condition = (*this)(x.condition); if (!condition.is_valid()) return false; function f = get_current_function(); // Create blocks for the then and else cases. Insert the 'then' block at the // end of the function. basic_block then_block = make_basic_block("if.then", f); basic_block else_block; basic_block exit_block; if (x.else_) { else_block = make_basic_block("if.else"); conditional_branch(condition, then_block, else_block); } else { exit_block = make_basic_block("if.end"); conditional_branch(condition, then_block, exit_block); } // Emit then value. set_insert_point(then_block); if (!(*this)(x.then)) return false; if (!then_block.has_terminator()) { if (!exit_block.is_valid()) exit_block = make_basic_block("if.end"); branch(exit_block); } // Codegen of 'then' can change the current block, update then_block then_block = get_insert_block(); if (x.else_) { // Emit else block. f.add(else_block); set_insert_point(else_block); if (!(*this)(*x.else_)) return false; if (!else_block.has_terminator()) { if (!exit_block.is_valid()) exit_block = make_basic_block("if.end"); branch(exit_block); } // Codegen of 'else' can change the current block, update else_block else_block = get_insert_block(); } if (exit_block.is_valid()) { // Emit exit block f.add(exit_block); set_insert_point(exit_block); } return true; } bool compiler::operator()(ast::while_statement const& x) { function f = get_current_function(); basic_block cond_block = make_basic_block("while.cond", f); basic_block body_block = make_basic_block("while.body"); basic_block exit_block = make_basic_block("while.end"); branch(cond_block); set_insert_point(cond_block); value condition = (*this)(x.condition); if (!condition.is_valid()) return false; conditional_branch(condition, body_block, exit_block); f.add(body_block); set_insert_point(body_block); if (!(*this)(x.body)) return false; if (!body_block.has_terminator()) branch(cond_block); // loop back // Emit exit block f.add(exit_block); set_insert_point(exit_block); return true; } bool compiler::operator()(ast::return_statement const& x) { if (void_return) { if (x.expr) { error_handler( x.id, "'void' function returning a value: "); return false; } } else { if (!x.expr) { error_handler( x.id, current_function_name + " function must return a value: "); return false; } } if (x.expr) { value return_val = (*this)(*x.expr); if (!return_val.is_valid()) return false; return_var.assign(return_val); } branch(return_block); return true; } function compiler::function_decl(ast::function const& x) { void_return = x.return_type == "void"; current_function_name = x.function_name.name; function f = declare_function( void_return , current_function_name , x.args.size()); // If function conflicted, the function already exixts. If it has a // body, don't allow redefinition or reextern. if (f.name() != current_function_name) { // Delete the one we just made and get the existing one. f.erase_from_parent(); f = get_function(current_function_name); // If function already has a body, reject this. if (!f.empty()) { error_handler( x.function_name.id, "Duplicate function: " + x.function_name.name); return function(); } // If function took a different number of args, reject. if (f.arg_size() != x.args.size()) { error_handler( x.function_name.id, "Redefinition of function with different # args: " + x.function_name.name); return function(); } // Set names for all arguments. function::arg_range rng = f.args(); function::arg_range::const_iterator iter = rng.begin(); BOOST_FOREACH(ast::identifier const& arg, x.args) { iter->name(arg.name); ++iter; } } return f; } void compiler::function_allocas(ast::function const& x, function f) { // Create variables for each argument and register the // argument in the symbol table so that references to it will succeed. function::arg_range rng = f.args(); function::arg_range::const_iterator iter = rng.begin(); BOOST_FOREACH(ast::identifier const& arg, x.args) { // Create an arg_ for this variable. value arg_ = var(arg.name); // Store the initial value into the arg_. arg_.assign(*iter); // Add arguments to variable symbol table. locals[arg.name] = arg_; ++iter; } if (!void_return) { // Create an alloca for the return value return_var = var("return.val"); } } bool compiler::operator()(ast::function const& x) { /////////////////////////////////////////////////////////////////////// // the signature: function f = function_decl(x); if (!f.is_valid()) return false; /////////////////////////////////////////////////////////////////////// // the body: if (x.body) // compile the body if this is not a prototype { // Create a new basic block to start insertion into. basic_block block = make_basic_block("entry", f); set_insert_point(block); function_allocas(x, f); return_block = make_basic_block("return"); if (!(*this)(*x.body)) { // Error reading body, remove function. f.erase_from_parent(); return false; } basic_block last_block = f.last_block(); // If the last block is unterminated, connect it to return_block if (!last_block.has_terminator()) { set_insert_point(last_block); branch(return_block); } f.add(return_block); set_insert_point(return_block); if (void_return) return_(); else return_(return_var); // Validate the generated code, checking for consistency. f.verify(); // Optimize the function. optimize_function(f); } return true; } bool compiler::operator()(ast::function_list const& x) { BOOST_FOREACH(ast::function const& f, x) { locals.clear(); // clear the variables if (!(*this)(f)) return false; } return true; } }}