Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit ccd3662

Browse files
Introduce CPUID
This commit introduces and uses cpuid information to pass the proper llc `mcpu` flag.
1 parent 0079a95 commit ccd3662

File tree

3 files changed

+143
-2
lines changed

3 files changed

+143
-2
lines changed

tc/core/flags.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ DEFINE_bool(llvm_dump_after_opt, false, "Print IR after optimization");
5959
DEFINE_bool(llvm_dump_asm, false, "Print asm");
6060
DEFINE_string(
6161
llvm_dump_asm_options,
62-
"-march=x86-64 -mcpu=broadwell -filetype=asm",
62+
"-filetype=asm",
6363
"Options used when dumping asm");
6464

6565
DEFINE_uint32(

tc/core/polyhedral/codegen_llvm.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "tc/core/polyhedral/schedule_isl_conversion.h"
4343
#include "tc/core/polyhedral/scop.h"
4444
#include "tc/core/scope_guard.h"
45+
#include "tc/core/utils/cpu.h"
4546
#include "tc/core/utils/system.h"
4647
#include "tc/external/isl.h"
4748
#include "tc/tc_config.h"
@@ -661,7 +662,7 @@ std::unique_ptr<llvm::Module> emitLLVMKernel(
661662
}
662663
utils::checkedSystemCall(
663664
std::string(TC_STRINGIFY(TC_LLVM_BIN_DIR)) + "/llc",
664-
{FLAGS_llvm_dump_asm_options, optFile, std::string("-o ") + asmFile});
665+
{FLAGS_llvm_dump_asm_options, utils::CPUID::llcFlags(), optFile, std::string("-o ") + asmFile});
665666
{
666667
std::ifstream is(asmFile);
667668
std::string str(

tc/core/utils/cpu.h

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/**
2+
* Copyright (c) 2017-present, Facebook, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#pragma once
17+
18+
#include <cpuid.h>
19+
20+
#include "tc/core/flags.h"
21+
#include "tc/core/utils/cpu.h"
22+
23+
namespace tc {
24+
namespace utils {
25+
26+
#define INTEL_ebx 0x756e6547
27+
#define INTEL_ecx 0x6c65746e
28+
#define INTEL_edx 0x49656e69
29+
30+
/**
31+
* We start with a reasonable subset of the processors listed in the result
32+
* of running the command:
33+
* llvm-as < /dev/null | llc -march=x86-64 -mcpu=help
34+
*/
35+
struct CPUID {
36+
public:
37+
CPUID() : eax(0), ebx(0), ecx(0), edx(0) {
38+
__get_cpuid(1, &eax, &ebx, &ecx, &edx);
39+
}
40+
41+
static bool isIntel() {
42+
unsigned int a, b, c, d;
43+
__get_cpuid(0, &a, &b, &c, &d);
44+
return b == INTEL_ebx && c == INTEL_ecx && d == INTEL_edx;
45+
}
46+
47+
using Stepping = unsigned char;
48+
using Model = unsigned char;
49+
using Family = unsigned char;
50+
using ProcessorType = unsigned char;
51+
using ExtendedModel = unsigned char;
52+
using ExtendedFamily = unsigned short;
53+
using FullModel = std::tuple<Model, ExtendedModel>;
54+
55+
struct LessCmp {
56+
bool operator()(const FullModel& a, const FullModel& b) const {
57+
return std::get<0>(a) < std::get<0>(b) && std::get<1>(a) < std::get<1>(b);
58+
}
59+
};
60+
static const std::map<FullModel, std::string, LessCmp>&
61+
intelFamily6ExtendedFamily0() {
62+
static std::map<FullModel, std::string, LessCmp> map{
63+
{FullModel(0xD, 0x3), "broadwell"},
64+
{FullModel(0x7, 0x4), "broadwell"},
65+
{FullModel(0x6, 0x6), "cannonlake"},
66+
{FullModel(0xC, 0x3), "haswell"},
67+
{FullModel(0xF, 0x3), "haswell"},
68+
{FullModel(0x5, 0x4), "haswell"},
69+
{FullModel(0x6, 0x4), "haswell"},
70+
{FullModel(0xA, 0x3), "ivybridge"},
71+
{FullModel(0xE, 0x3), "ivybridge"},
72+
{FullModel(0xA, 0x2), "sandybridge"},
73+
{FullModel(0xD, 0x2), "sandybridge"},
74+
{FullModel(0xE, 0x4), "skylake"},
75+
{FullModel(0xE, 0x5), "skylake"},
76+
{FullModel(0x5, 0x5), "skylake-avx512"},
77+
{FullModel(0x5, 0x2), "westmere"},
78+
{FullModel(0xC, 0x2), "westmere"},
79+
{FullModel(0xF, 0x2), "westmere"},
80+
};
81+
return map;
82+
};
83+
84+
static std::tuple<
85+
Stepping,
86+
Model,
87+
Family,
88+
ProcessorType,
89+
ExtendedModel,
90+
ExtendedFamily>
91+
parseCPU() {
92+
CPUID id;
93+
return std::make_tuple(
94+
static_cast<Stepping>(id.eax & 0x0000000F), // 3:0
95+
static_cast<Model>((id.eax >> 4) & 0x0000000F), // 7:4
96+
static_cast<Family>((id.eax >> 8) & 0x0000000F), // 11:8
97+
static_cast<ProcessorType>((id.eax >> 12) & 0x00000003), // 13:12
98+
static_cast<ExtendedModel>((id.eax >> 16) & 0x0000000F), // 19:16
99+
static_cast<ExtendedFamily>((id.eax >> 20) & 0x000000FF) // 27:20
100+
);
101+
}
102+
103+
#define INTEL_FAMILY_6 0x6
104+
#define INTEL_EXTENDED_FAMILY_0 0x0
105+
static std::string mcpu() {
106+
if (FLAGS_mcpu.size() > 0) {
107+
return FLAGS_mcpu;
108+
}
109+
110+
TC_CHECK(CPUID::isIntel());
111+
auto parsedCPU = CPUID::parseCPU();
112+
auto model = std::get<1>(parsedCPU);
113+
auto family = std::get<2>(parsedCPU);
114+
auto extendedModel = std::get<4>(parsedCPU);
115+
auto extendedFamily = std::get<5>(parsedCPU);
116+
if (family == INTEL_FAMILY_6 && extendedFamily == INTEL_EXTENDED_FAMILY_0) {
117+
if (intelFamily6ExtendedFamily0().count(FullModel(model, extendedModel)) >
118+
0) {
119+
return intelFamily6ExtendedFamily0().at(
120+
FullModel(model, extendedModel));
121+
}
122+
return "x86-64";
123+
}
124+
TC_CHECK(false) << "Unsupported family/model/extendedmodel: " << family
125+
<< "/" << model << "/" << extendedModel;
126+
return "";
127+
}
128+
129+
static std::string llcFlags() {
130+
return std::string("-march=x86-64 -mcpu=") + CPUID::mcpu();
131+
}
132+
133+
public:
134+
unsigned int eax;
135+
unsigned int ebx;
136+
unsigned int ecx;
137+
unsigned int edx;
138+
};
139+
} // namespace utils
140+
} // namespace tc

0 commit comments

Comments
 (0)