diff --git a/tc/core/cuda/cuda_rtc.cc b/tc/core/cuda/cuda_rtc.cc index 9121704da..0a6eaf0cb 100644 --- a/tc/core/cuda/cuda_rtc.cc +++ b/tc/core/cuda/cuda_rtc.cc @@ -29,6 +29,7 @@ #include "tc/core/cuda/cuda_rtc.h" #include "tc/core/flags.h" #include "tc/core/scope_guard.h" +#include "tc/core/utils/system.h" namespace tc { std::mutex nvrtc_mutex; @@ -65,17 +66,6 @@ void checkOrCreateContext() { } namespace { -static void checkedSystemCall( - const std::string& cmd, - const std::vector& args) { - std::stringstream command; - command << cmd << " "; - for (const auto& s : args) { - command << s << " "; - } - TC_CHECK_EQ(std::system(command.str().c_str()), 0) << command.str(); -} - static std::tuple getCudaArchitecture() { int device, major, minor; CUdevice deviceHandle; @@ -119,7 +109,7 @@ static std::string llvmCompile( }); // Compile - checkedSystemCall( + utils::checkedSystemCall( std::string(TC_STRINGIFY(TC_LLVM_BIN_DIR)) + "/clang++", {"-x cuda " + inputFileName, "--cuda-device-only", @@ -134,7 +124,7 @@ static std::string llvmCompile( "-o " + outputClangFile}); // Link libdevice before opt - checkedSystemCall( + utils::checkedSystemCall( std::string(TC_STRINGIFY(TC_LLVM_BIN_DIR)) + "/llvm-link ", {outputClangFile, std::string(TC_STRINGIFY(TC_CUDA_TOOLKIT_ROOT_DIR)) + @@ -143,7 +133,7 @@ static std::string llvmCompile( "-o " + outputLinkFile}); // Opt - checkedSystemCall( + utils::checkedSystemCall( std::string(TC_STRINGIFY(TC_LLVM_BIN_DIR)) + "/opt", {"-internalize", std::string("-internalize-public-api-list=") + name, @@ -154,7 +144,7 @@ static std::string llvmCompile( std::string("-o ") + outputOptFile}); // Ptx - checkedSystemCall( + utils::checkedSystemCall( std::string(TC_STRINGIFY(TC_LLVM_BIN_DIR)) + "/llc", {std::string("-mcpu=") + arch, outputOptFile, @@ -188,7 +178,7 @@ static std::string nvccCompile( // cstdio's std::remove to delete files tc::ScopeGuard sgo([&]() { std::remove(outputPtxFile.c_str()); }); - checkedSystemCall( + utils::checkedSystemCall( std::string(TC_STRINGIFY(TC_CUDA_TOOLKIT_ROOT_DIR)) + "/bin/nvcc", {"-x cu", inputFileName, diff --git a/tc/core/flags.cc b/tc/core/flags.cc index a5dc9e438..c1645ec30 100644 --- a/tc/core/flags.cc +++ b/tc/core/flags.cc @@ -53,8 +53,14 @@ DEFINE_string( "compiler flags to set when nvcc is used"); // CPU codegen options +DEFINE_string(mcpu, "", "see llvm's --mcpu"); DEFINE_bool(llvm_dump_before_opt, false, "Print IR before optimization"); DEFINE_bool(llvm_dump_after_opt, false, "Print IR after optimization"); +DEFINE_bool(llvm_dump_asm, false, "Print asm"); +DEFINE_string( + llvm_dump_asm_options, + "-filetype=asm", + "Options used when dumping asm"); DEFINE_uint32( benchmark_warmup, diff --git a/tc/core/flags.h b/tc/core/flags.h index 38739a2da..0646a14e3 100644 --- a/tc/core/flags.h +++ b/tc/core/flags.h @@ -36,9 +36,12 @@ DECLARE_string(cuda_compiler); DECLARE_string(llvm_flags); DECLARE_string(nvcc_flags); -// llvm codegen +// CPU codegen options +DECLARE_string(mcpu); DECLARE_bool(llvm_dump_before_opt); DECLARE_bool(llvm_dump_after_opt); +DECLARE_bool(llvm_dump_asm); +DECLARE_string(llvm_dump_asm_options); // Used in benchmarking and autotuning DECLARE_uint32(benchmark_warmup); diff --git a/tc/core/polyhedral/codegen_llvm.cc b/tc/core/polyhedral/codegen_llvm.cc index 4e9e264e1..2389658dc 100644 --- a/tc/core/polyhedral/codegen_llvm.cc +++ b/tc/core/polyhedral/codegen_llvm.cc @@ -15,6 +15,7 @@ */ #include "tc/core/polyhedral/codegen_llvm.h" +#include #include #include @@ -26,6 +27,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" @@ -41,7 +43,10 @@ #include "tc/core/polyhedral/schedule_isl_conversion.h" #include "tc/core/polyhedral/scop.h" #include "tc/core/scope_guard.h" +#include "tc/core/utils/cpu.h" +#include "tc/core/utils/system.h" #include "tc/external/isl.h" +#include "tc/tc_config.h" #ifndef LLVM_VERSION_MAJOR #error LLVM_VERSION_MAJOR not set @@ -50,9 +55,8 @@ using namespace Halide; namespace tc { - namespace polyhedral { - +namespace { using IteratorMapType = std::unordered_map; using IteratorMapsType = std::unordered_map; @@ -60,7 +64,64 @@ using IteratorMapsType = using StmtSubscriptExprMapType = std::unordered_map, isl::IslIdIslHash>; -namespace { +struct IslCodegenRes { + IteratorMapsType iteratorMaps; + StmtSubscriptExprMapType stmtSubscripts; + isl::ast_node astNode; +}; + +isl::ast_node collectIteratorMaps( + isl::ast_node node, + isl::ast_build build, + IteratorMapsType& iteratorMaps, + const Scop& scop, + StmtSubscriptExprMapType& stmtSubscripts) { + auto user = node.as(); + TC_CHECK(user); + auto expr = user.get_expr().as(); + auto schedule = build.get_schedule(); + auto scheduleMap = isl::map::from_union_map(schedule); + + auto stmtId = expr.get_arg(0).as().get_id(); + TC_CHECK_EQ(0u, iteratorMaps.count(stmtId)) << "entry exists: " << stmtId; + auto iteratorMap = isl::pw_multi_aff(scheduleMap.reverse()); + auto tuple = scop.halide.domains.at(stmtId).tuple; + auto& stmtIteratorMap = iteratorMaps[stmtId]; + for (int i = 0; i < tuple.size(); ++i) { + auto expr = build.expr_from(iteratorMap.get_pw_aff(i)); + stmtIteratorMap.emplace(tuple.get_id(i).get_name(), expr); + } + auto& subscripts = stmtSubscripts[stmtId]; + auto provide = + scop.halide.statements.at(stmtId).as(); + for (auto e : provide->args) { + const auto& map = iteratorMap; + auto aff = scop.makeIslAffFromStmtExpr(stmtId, e); + auto pulled = isl::pw_aff(aff).pullback(map); + TC_CHECK_EQ(pulled.n_piece(), 1); + subscripts.push_back(build.expr_from(pulled)); + } + return node.set_annotation(stmtId); +} + +static IslCodegenRes codegenISL(const Scop& scop) { + IteratorMapsType iteratorMaps; + StmtSubscriptExprMapType stmtSubscripts; + auto collect = [&iteratorMaps, &scop, &stmtSubscripts]( + isl::ast_node n, isl::ast_build b) -> isl::ast_node { + auto& uv = iteratorMaps; + return collectIteratorMaps(n, b, uv, scop, stmtSubscripts); + }; + + auto schedule = detail::toIslSchedule(scop.scheduleRoot()); + auto astBuild = isl::ast_build(schedule.get_ctx()); + astBuild = astBuild.set_at_each_domain(collect); + auto root = scop.scheduleRoot(); + astBuild = astBuild.set_iterators(Codegen::makeLoopIterators(root)); + auto astNode = astBuild.node_from(schedule); + return { + std::move(iteratorMaps), std::move(stmtSubscripts), std::move(astNode)}; +} thread_local llvm::LLVMContext llvmCtx; @@ -95,6 +156,32 @@ std::vector getTensorSizesWithoutLeadingDim( return sizes; } +// Set some options, grabbed from Halide + we force fast math atm +static llvm::TargetOptions makeTargetOptions() { + bool use_soft_float_abi = false; + bool per_instruction_fast_math_flags = true; + + llvm::TargetOptions options; + options.AllowFPOpFusion = per_instruction_fast_math_flags + ? llvm::FPOpFusion::Strict + : llvm::FPOpFusion::Fast; + options.UnsafeFPMath = !per_instruction_fast_math_flags; + options.NoInfsFPMath = !per_instruction_fast_math_flags; + options.NoNaNsFPMath = !per_instruction_fast_math_flags; + options.HonorSignDependentRoundingFPMathOption = + !per_instruction_fast_math_flags; + options.NoZerosInBSS = false; + options.GuaranteedTailCallOpt = false; + options.StackAlignmentOverride = 0; + options.FunctionSections = true; + options.UseInitArray = false; + options.FloatABIType = + use_soft_float_abi ? llvm::FloatABI::Soft : llvm::FloatABI::Hard; + options.RelaxELFRelocations = false; + + return options; +} + static constexpr int kOptLevel = 3; class CodeGen_TC : public Halide::Internal::CodeGen_X86 { @@ -112,6 +199,7 @@ class CodeGen_TC : public Halide::Internal::CodeGen_X86 { const char* llvm_args[] = {"tc (LLVM argument parsing)", nullptr}; llvm::cl::ParseCommandLineOptions( sizeof(llvm_args) / sizeof(*llvm_args) - 1, llvm_args); + init_context(); module = llvm::make_unique("TensorComprehensionsModule", *context); @@ -194,33 +282,35 @@ class CodeGen_TC : public Halide::Internal::CodeGen_X86 { } public: - void optimize_module() { + void optimize_module(const llvm::TargetMachine& targetMachine) { LOG_IF(INFO, FLAGS_llvm_dump_before_opt) << "[LLVM-IR] Before optimization:\n" << toString(module.get()); - llvm::legacy::FunctionPassManager functionPassManager(module.get()); - llvm::legacy::PassManager modulePassManager; + std::unique_ptr targetMachineWithOptions( + targetMachine.getTarget().createTargetMachine( + targetMachine.getTargetTriple().str(), + targetMachine.getTargetCPU(), + targetMachine.getTargetFeatureString(), + makeTargetOptions(), + llvm::Reloc::PIC_, + llvm::CodeModel::Small, + llvm::CodeGenOpt::Aggressive)); - std::unique_ptr targetMachine = - Halide::Internal::make_target_machine(*module); + llvm::legacy::PassManager modulePassManager; modulePassManager.add(llvm::createTargetTransformInfoWrapperPass( - targetMachine ? targetMachine->getTargetIRAnalysis() - : llvm::TargetIRAnalysis())); + targetMachineWithOptions->getTargetIRAnalysis())); + + llvm::legacy::FunctionPassManager functionPassManager(module.get()); functionPassManager.add(llvm::createTargetTransformInfoWrapperPass( - targetMachine ? targetMachine->getTargetIRAnalysis() - : llvm::TargetIRAnalysis())); + targetMachineWithOptions->getTargetIRAnalysis())); llvm::PassManagerBuilder b; b.OptLevel = kOptLevel; b.Inliner = llvm::createFunctionInliningPass(b.OptLevel, 0, false); b.LoopVectorize = true; b.SLPVectorize = true; - - if (targetMachine) { - targetMachine->adjustPassManager(b); - } - + targetMachineWithOptions->adjustPassManager(b); b.populateFunctionPassManager(functionPassManager); b.populateModulePassManager(modulePassManager); @@ -229,7 +319,6 @@ class CodeGen_TC : public Halide::Internal::CodeGen_X86 { for (llvm::Module::iterator i = module->begin(); i != module->end(); i++) { functionPassManager.run(*i); } - functionPassManager.doFinalization(); modulePassManager.run(*module); @@ -291,6 +380,71 @@ Halide::Expr CodeGen_TC::makeHalideExpr(isl::ast_expr expr) { } class LLVMCodegen { + public: + LLVMCodegen( + const std::string& specializedName, + const Scop& scop, + const llvm::TargetMachine& targetMachine) + : scop_(scop), + islCg_(codegenISL(scop_)), + iteratorMaps_(islCg_.iteratorMaps), + stmtSubscripts_(islCg_.stmtSubscripts), + targetMachine(targetMachine), + // we don't use Halide to tinker with llvm::Module optimization so we + // tthe Halide target can be whatever. + halide_cg(Halide::get_host_target()) { + halide_cg.set_context(llvmCtx); + halide_cg.init_module(); + halide_cg.get_module()->setDataLayout(targetMachine.createDataLayout()); + halide_cg.get_module()->setTargetTriple( + targetMachine.getTargetTriple().str()); + auto entry = createSignature( + scop.halide.inputs, + scop.halide.outputs, + scop.halide.params, + specializedName); + auto exit = emitAst(islCg_.astNode, entry); + halide_cg.get_builder().SetInsertPoint(exit); + halide_cg.get_builder().CreateRetVoid(); + + TC_CHECK(!llvm::verifyModule(*halide_cg.get_module())) + << "LLVM generated module is invalid." << str().c_str(); + + halide_cg.optimize_module(targetMachine); + + if (FLAGS_llvm_dump_asm) { + std::string pat("/tmp/tcXXXXXX"); + std::vector ifn(pat.begin(), pat.end()); + TC_CHECK_GE(mkstemp(ifn.data()), 0); // string.c_str is const char* + std::string fileName(ifn.begin(), ifn.end()); + std::string optFile = fileName + "-opt.ll"; + std::string asmFile = fileName + ".s"; + // cstdio's std::remove to delete files + tc::ScopeGuard sgi([&]() { + std::remove(optFile.c_str()); + std::remove(asmFile.c_str()); + }); + { + std::ofstream ostream(optFile, std::ios::binary); + ostream << str(); + } + utils::checkedSystemCall( + std::string(TC_STRINGIFY(TC_LLVM_BIN_DIR)) + "/llc", + {FLAGS_llvm_dump_asm_options, + utils::CPUID::llcFlags(), + optFile, + std::string("-o ") + asmFile}); + + std::ifstream is(asmFile); + std::string str( + (std::istreambuf_iterator(is)), + std::istreambuf_iterator()); + LOG(INFO) << "Dumping asm for: " << utils::CPUID::llcFlags() << "\n" + << str; + } + } + + private: void collectTensor(const Halide::OutputImageParam& t) { auto sizes = getTensorSizesWithoutLeadingDim(t, scop_.context()); if (not sizes.empty()) { @@ -321,21 +475,16 @@ class LLVMCodegen { } } - public: - LLVMCodegen( - const Scop& scop, - const IteratorMapsType& iteratorMaps, - const StmtSubscriptExprMapType& stmtSubscripts) - : scop_(scop), - iteratorMaps_(iteratorMaps), - stmtSubscripts_(stmtSubscripts), - halide_cg(Halide::Target( - Halide::Target::OSUnknown, - Halide::Target::X86, - 64)) { - halide_cg.set_context(llvmCtx); - - halide_cg.init_module(); + llvm::Type* makePtrToArrayType( + llvm::Type* baseTy, + const std::vector& sizes) { + TC_CHECK_GE(sizes.size(), 1u); + TC_CHECK(baseTy); + llvm::Type* arrTy = llvm::ArrayType::get(baseTy, sizes.back()); + for (auto s = sizes.rbegin() + 1; s != sizes.rend(); ++s) { + arrTy = llvm::ArrayType::get(arrTy, *s); + } + return arrTy->getPointerTo(); } // This creates a signature of the form: @@ -416,19 +565,6 @@ class LLVMCodegen { return nullptr; } - private: - llvm::Type* makePtrToArrayType( - llvm::Type* baseTy, - const std::vector& sizes) { - TC_CHECK_GE(sizes.size(), 1u); - TC_CHECK(baseTy); - llvm::Type* arrTy = llvm::ArrayType::get(baseTy, sizes.back()); - for (auto s = sizes.rbegin() + 1; s != sizes.rend(); ++s) { - arrTy = llvm::ArrayType::get(arrTy, *s); - } - return arrTy->getPointerTo(); - } - llvm::BasicBlock* emitIf( isl::ast_node_if node, llvm::BasicBlock* insertionPoint) { @@ -547,6 +683,7 @@ class LLVMCodegen { private: const Scop& scop_; + const IslCodegenRes islCg_; const IteratorMapsType& iteratorMaps_; const StmtSubscriptExprMapType& stmtSubscripts_; @@ -554,92 +691,16 @@ class LLVMCodegen { std::vector argNames_; public: + const llvm::TargetMachine& targetMachine; CodeGen_TC halide_cg; }; - -struct IslCodegenRes { - IteratorMapsType iteratorMaps; - StmtSubscriptExprMapType stmtSubscripts; - isl::ast_node astNode; -}; - -isl::ast_node collectIteratorMaps( - isl::ast_node node, - isl::ast_build build, - IteratorMapsType& iteratorMaps, - const Scop& scop, - StmtSubscriptExprMapType& stmtSubscripts) { - auto user = node.as(); - TC_CHECK(user); - auto expr = user.get_expr().as(); - auto schedule = build.get_schedule(); - auto scheduleMap = isl::map::from_union_map(schedule); - - auto stmtId = expr.get_arg(0).as().get_id(); - TC_CHECK_EQ(0u, iteratorMaps.count(stmtId)) << "entry exists: " << stmtId; - auto iteratorMap = isl::pw_multi_aff(scheduleMap.reverse()); - auto tuple = scop.halide.domains.at(stmtId).tuple; - auto& stmtIteratorMap = iteratorMaps[stmtId]; - for (int i = 0; i < tuple.size(); ++i) { - auto expr = build.expr_from(iteratorMap.get_pw_aff(i)); - stmtIteratorMap.emplace(tuple.get_id(i).get_name(), expr); - } - auto& subscripts = stmtSubscripts[stmtId]; - auto provide = - scop.halide.statements.at(stmtId).as(); - for (auto e : provide->args) { - const auto& map = iteratorMap; - auto aff = scop.makeIslAffFromStmtExpr(stmtId, e); - auto pulled = isl::pw_aff(aff).pullback(map); - TC_CHECK_EQ(pulled.n_piece(), 1); - subscripts.push_back(build.expr_from(pulled)); - } - return node.set_annotation(stmtId); -} - -IslCodegenRes codegenISL(const Scop& scop) { - IteratorMapsType iteratorMaps; - StmtSubscriptExprMapType stmtSubscripts; - auto collect = [&iteratorMaps, &scop, &stmtSubscripts]( - isl::ast_node n, isl::ast_build b) -> isl::ast_node { - auto& uv = iteratorMaps; - return collectIteratorMaps(n, b, uv, scop, stmtSubscripts); - }; - - auto schedule = detail::toIslSchedule(scop.scheduleRoot()); - auto astBuild = isl::ast_build(schedule.get_ctx()); - astBuild = astBuild.set_at_each_domain(collect); - auto root = scop.scheduleRoot(); - astBuild = astBuild.set_iterators(Codegen::makeLoopIterators(root)); - auto astNode = astBuild.node_from(schedule); - return { - std::move(iteratorMaps), std::move(stmtSubscripts), std::move(astNode)}; -} - } // namespace std::unique_ptr emitLLVMKernel( const std::string& specializedName, const Scop& scop, - const llvm::DataLayout& dataLayout) { - auto islCg = codegenISL(scop); - LLVMCodegen cg(scop, islCg.iteratorMaps, islCg.stmtSubscripts); - cg.halide_cg.get_module()->setDataLayout(dataLayout); - cg.halide_cg.get_module()->setTargetTriple( - llvm::EngineBuilder().selectTarget()->getTargetTriple().str()); - auto entry = cg.createSignature( - scop.halide.inputs, - scop.halide.outputs, - scop.halide.params, - specializedName); - auto exit = cg.emitAst(islCg.astNode, entry); - cg.halide_cg.get_builder().SetInsertPoint(exit); - cg.halide_cg.get_builder().CreateRetVoid(); - - TC_CHECK(!llvm::verifyModule(*cg.halide_cg.get_module())) - << "LLVM generated module is invalid." << cg.str().c_str(); - - cg.halide_cg.optimize_module(); + const llvm::TargetMachine& targetMachine) { + LLVMCodegen cg(specializedName, scop, targetMachine); return cg.halide_cg.move_module(); } diff --git a/tc/core/polyhedral/codegen_llvm.h b/tc/core/polyhedral/codegen_llvm.h index 08c5a9d31..3e84b7aee 100644 --- a/tc/core/polyhedral/codegen_llvm.h +++ b/tc/core/polyhedral/codegen_llvm.h @@ -55,7 +55,7 @@ struct Scop; std::unique_ptr emitLLVMKernel( const std::string& specializedName, const Scop& scop, - const llvm::DataLayout& dataLayout); + const llvm::TargetMachine& targetMachine); // TODO: I want to do something like the following, but compilation was unhappy // using initialize_llvm = Halide::Internal::CodeGen_LLVM::initialize_llvm; diff --git a/tc/core/polyhedral/llvm_jit.cc b/tc/core/polyhedral/llvm_jit.cc index ef80fb6dc..00f3f20ec 100644 --- a/tc/core/polyhedral/llvm_jit.cc +++ b/tc/core/polyhedral/llvm_jit.cc @@ -31,11 +31,11 @@ #include "tc/core/check.h" #include "tc/core/flags.h" #include "tc/core/polyhedral/codegen_llvm.h" +#include "tc/core/utils/cpu.h" using namespace llvm; namespace tc { - Jit::Jit() : ES(), Resolver(llvm::orc::createLegacyLookupResolver( @@ -51,7 +51,7 @@ Jit::Jit() return nullptr; }, [](Error err) { throw std::runtime_error("Lookup failed!"); })), - TM_(EngineBuilder().selectTarget()), + TM_(EngineBuilder().setMCPU(utils::CPUID::mcpu()).selectTarget()), DL_(TM_->createDataLayout()), objectLayer_( ES, @@ -71,8 +71,8 @@ void Jit::addModule(std::shared_ptr M) { std::shared_ptr Jit::codegenScop( const std::string& specializedName, const polyhedral::Scop& scop) { - std::shared_ptr mod = emitLLVMKernel( - specializedName, scop, getTargetMachine().createDataLayout()); + std::shared_ptr mod = + emitLLVMKernel(specializedName, scop, getTargetMachine()); addModule(mod); return mod; } diff --git a/tc/core/polyhedral/llvm_jit.h b/tc/core/polyhedral/llvm_jit.h index 75031d106..843f30b95 100644 --- a/tc/core/polyhedral/llvm_jit.h +++ b/tc/core/polyhedral/llvm_jit.h @@ -16,15 +16,12 @@ #pragma once #include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/Target/TargetMachine.h" -#if LLVM_VERSION_MAJOR > 6 -#include "llvm/ExecutionEngine/Orc/Core.h" -#endif - namespace tc { namespace polyhedral { @@ -33,10 +30,8 @@ class Scop; class Jit { private: -#if LLVM_VERSION_MAJOR > 6 llvm::orc::ExecutionSession ES; std::shared_ptr Resolver; -#endif std::unique_ptr TM_; const llvm::DataLayout DL_; llvm::orc::RTDyldObjectLinkingLayer objectLayer_; diff --git a/tc/core/utils/cpu.h b/tc/core/utils/cpu.h new file mode 100644 index 000000000..81a7e65dd --- /dev/null +++ b/tc/core/utils/cpu.h @@ -0,0 +1,153 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +#include "tc/core/flags.h" +#include "tc/core/utils/cpu.h" + +namespace tc { +namespace utils { + +#define INTEL_ebx 0x756e6547 +#define INTEL_ecx 0x6c65746e +#define INTEL_edx 0x49656e69 + +/** + * We start with a reasonable subset of the processors listed in the result + * of running the command: + * llvm-as < /dev/null | llc -march=x86-64 -mcpu=help + */ +struct CPUID { + public: + CPUID() : eax(0), ebx(0), ecx(0), edx(0) { + __get_cpuid(1, &eax, &ebx, &ecx, &edx); + } + + static bool isIntel() { + unsigned int a, b, c, d; + __get_cpuid(0, &a, &b, &c, &d); + return b == INTEL_ebx && c == INTEL_ecx && d == INTEL_edx; + } + + using Stepping = unsigned char; + using Model = unsigned char; + using Family = unsigned char; + using ProcessorType = unsigned char; + using ExtendedModel = unsigned char; + using ExtendedFamily = unsigned short; + struct FullModel { + FullModel(Model m, ExtendedModel em) { + val = (em << 4) + m; + } + operator unsigned short() { + return val; + } + operator const unsigned short&() const { + return val; + } + unsigned short val; + }; + + static const std::unordered_map& + intelFamily6ExtendedFamily0() { + static std::unordered_map m{ + {FullModel(0xD, 0x3), "broadwell"}, // client + {FullModel(0x7, 0x4), "broadwell"}, // client + {FullModel(0xF, 0x4), "broadwell"}, // server + {FullModel(0x6, 0x5), "broadwell"}, // server + {FullModel(0x6, 0x6), "cannonlake"}, // client + {FullModel(0x6, 0x4), "haswell"}, // client + {FullModel(0x5, 0x4), "haswell"}, // client + {FullModel(0xC, 0x3), "haswell"}, // client + {FullModel(0xF, 0x3), "haswell"}, // server + {FullModel(0xA, 0x3), "ivybridge"}, // client + {FullModel(0xE, 0x3), "ivybridge"}, // server + {FullModel(0xA, 0x2), "sandybridge"}, // client + {FullModel(0xD, 0x2), "sandybridge"}, // server + {FullModel(0xE, 0x4), "skylake"}, // client + {FullModel(0xE, 0x5), "skylake"}, // client + {FullModel(0x5, 0x5), "skylake-avx512"}, // server + {FullModel(0x5, 0x2), "westmere"}, // client + {FullModel(0xC, 0x2), "westmere"}, // server + {FullModel(0xF, 0x2), "westmere"}, // server + }; + return m; + }; + + static std::tuple< + Stepping, + Model, + Family, + ProcessorType, + ExtendedModel, + ExtendedFamily> + parseCPU() { + CPUID id; + return std::make_tuple( + static_cast(id.eax & 0x0000000F), // 3:0 + static_cast((id.eax >> 4) & 0x0000000F), // 7:4 + static_cast((id.eax >> 8) & 0x0000000F), // 11:8 + static_cast((id.eax >> 12) & 0x00000003), // 13:12 + static_cast((id.eax >> 16) & 0x0000000F), // 19:16 + static_cast((id.eax >> 20) & 0x000000FF) // 27:20 + ); + } + +#define INTEL_FAMILY_6 0x6 +#define INTEL_EXTENDED_FAMILY_0 0x0 + static std::string mcpu() { + if (FLAGS_mcpu.size() > 0) { + return FLAGS_mcpu; + } + + TC_CHECK(CPUID::isIntel()); + auto parsedCPU = CPUID::parseCPU(); + auto model = std::get<1>(parsedCPU); + auto family = std::get<2>(parsedCPU); + auto extendedModel = std::get<4>(parsedCPU); + auto extendedFamily = std::get<5>(parsedCPU); + if (family == INTEL_FAMILY_6 && extendedFamily == INTEL_EXTENDED_FAMILY_0) { + if (intelFamily6ExtendedFamily0().count(FullModel(model, extendedModel)) > + 0) { + return intelFamily6ExtendedFamily0().at( + FullModel(model, extendedModel)); + } + LOG(ERROR) << "FullModel: " + << (unsigned short)FullModel(model, extendedModel) + << " -> unspecified x86-64"; + return "x86-64"; + } + TC_CHECK(false) << "Unsupported family/model/extendedmodel: " << family + << "/" << model << "/" << extendedModel; + return ""; + } + + static std::string llcFlags() { + return std::string("-march=x86-64 -mcpu=") + CPUID::mcpu(); + } + + public: + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; +}; +} // namespace utils +} // namespace tc diff --git a/tc/core/utils/system.h b/tc/core/utils/system.h new file mode 100644 index 000000000..2f72a5268 --- /dev/null +++ b/tc/core/utils/system.h @@ -0,0 +1,36 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include "tc/core/check.h" + +namespace tc { +namespace utils { +inline void checkedSystemCall( + const std::string& cmd, + const std::vector& args) { + std::stringstream command; + command << cmd << " "; + for (const auto& s : args) { + command << s << " "; + } + TC_CHECK_EQ(std::system(command.str().c_str()), 0) << command.str(); +} +} // namespace utils +} // namespace tc