|
1 | 1 | #include "comgr-cache-command.h"
|
| 2 | +#include "comgr-cache.h" |
| 3 | +#include "comgr-device-libs.h" |
| 4 | +#include "comgr-env.h" |
| 5 | +#include "comgr.h" |
| 6 | + |
| 7 | +#include <clang/Basic/Version.h> |
| 8 | +#include <clang/Driver/Job.h> |
| 9 | +#include <llvm/ADT/StringExtras.h> |
| 10 | +#include <llvm/ADT/StringSet.h> |
| 11 | + |
| 12 | +#include <optional> |
2 | 13 |
|
3 | 14 | namespace COMGR {
|
4 | 15 | using namespace llvm;
|
5 | 16 | using namespace clang;
|
6 | 17 |
|
| 18 | +namespace { |
| 19 | +// std::isalnum is locale dependent and can have issues |
| 20 | +// depending on the stdlib version and application. We prefer to avoid it |
| 21 | +bool isalnum(char c) { |
| 22 | + char low[] = {'0', 'a', 'A'}; |
| 23 | + char hi[] = {'9', 'z', 'Z'}; |
| 24 | + for (unsigned i = 0; i != 3; ++i) { |
| 25 | + if (low[i] <= c && c <= hi[i]) |
| 26 | + return true; |
| 27 | + } |
| 28 | + return false; |
| 29 | +} |
| 30 | + |
| 31 | +std::optional<size_t> searchComgrTmpModel(StringRef S) { |
| 32 | + // Ideally, we would use std::regex_search with the regex |
| 33 | + // "comgr-[[:alnum:]]{6}". However, due to a bug in stdlibc++ |
| 34 | + // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85824) we have to roll our |
| 35 | + // own search of this regular expression. This bug resulted in a crash in |
| 36 | + // luxmarkv3, during the std::regex constructor. |
| 37 | + const StringRef Prefix = "comgr-"; |
| 38 | + const size_t AlnumCount = 6; |
| 39 | + |
| 40 | + size_t N = S.size(); |
| 41 | + size_t Pos = S.find(Prefix); |
| 42 | + |
| 43 | + size_t AlnumStart = Pos + Prefix.size(); |
| 44 | + size_t AlnumEnd = AlnumStart + AlnumCount; |
| 45 | + if (Pos == StringRef::npos || N < AlnumEnd) |
| 46 | + return std::nullopt; |
| 47 | + |
| 48 | + for (size_t i = AlnumStart; i < AlnumEnd; ++i) { |
| 49 | + if (!isalnum(S[i])) |
| 50 | + return std::nullopt; |
| 51 | + } |
| 52 | + |
| 53 | + return Pos; |
| 54 | +} |
| 55 | + |
| 56 | +bool hasDebugOrProfileInfo(ArrayRef<const char *> Args) { |
| 57 | + // These are too difficult to handle since they generate debug info that |
| 58 | + // refers to the temporary paths used by comgr. |
| 59 | + const StringRef Flags[] = {"-fdebug-info-kind", "-fprofile", "-coverage", |
| 60 | + "-ftime-trace"}; |
| 61 | + |
| 62 | + for (StringRef Arg : Args) { |
| 63 | + for (StringRef Flag : Flags) { |
| 64 | + if (Arg.starts_with(Flag)) |
| 65 | + return true; |
| 66 | + } |
| 67 | + } |
| 68 | + return false; |
| 69 | +} |
| 70 | + |
| 71 | +void addString(CachedCommandAdaptor::HashAlgorithm &H, StringRef S) { |
| 72 | + // hash size + contents to avoid collisions |
| 73 | + // for example, we have to ensure that the result of hashing "AA" "BB" is |
| 74 | + // different from "A" "ABB" |
| 75 | + H.update(S.size()); |
| 76 | + H.update(S); |
| 77 | +} |
| 78 | + |
| 79 | +void addFileContents(CachedCommandAdaptor::HashAlgorithm &H, StringRef Buf) { |
| 80 | + // this is a workaround temporary paths getting in the output files of the |
| 81 | + // different commands in #line directives in preprocessed files, and the |
| 82 | + // ModuleID or source_filename in the bitcode. |
| 83 | + while (!Buf.empty()) { |
| 84 | + std::optional<size_t> ComgrTmpPos = searchComgrTmpModel(Buf); |
| 85 | + if (!ComgrTmpPos) { |
| 86 | + addString(H, Buf); |
| 87 | + break; |
| 88 | + } |
| 89 | + |
| 90 | + StringRef ToHash = Buf.substr(0, *ComgrTmpPos); |
| 91 | + addString(H, ToHash); |
| 92 | + Buf = Buf.substr(ToHash.size() + StringRef("comgr-xxxxxx").size()); |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +Error addFile(CachedCommandAdaptor::HashAlgorithm &H, StringRef Path) { |
| 97 | + auto BufOrError = MemoryBuffer::getFile(Path); |
| 98 | + if (std::error_code EC = BufOrError.getError()) { |
| 99 | + return errorCodeToError(EC); |
| 100 | + } |
| 101 | + StringRef Buf = BufOrError.get()->getBuffer(); |
| 102 | + |
| 103 | + addFileContents(H, Buf); |
| 104 | + |
| 105 | + return Error::success(); |
| 106 | +} |
| 107 | + |
| 108 | +template <typename IteratorTy> |
| 109 | +bool skipProblematicFlag(IteratorTy &It, const IteratorTy &End) { |
| 110 | + // Skip include paths, these should have been handled by preprocessing the |
| 111 | + // source first. Sadly, these are passed also to the middle-end commands. Skip |
| 112 | + // debug related flags (they should be ignored) like -dumpdir (used for |
| 113 | + // profiling/coverage/split-dwarf) |
| 114 | + StringRef Arg = *It; |
| 115 | + static const StringSet<> FlagsWithPathArg = {"-I", "-dumpdir"}; |
| 116 | + bool IsFlagWithPathArg = It + 1 != End && FlagsWithPathArg.contains(Arg); |
| 117 | + if (IsFlagWithPathArg) { |
| 118 | + ++It; |
| 119 | + return true; |
| 120 | + } |
| 121 | + |
| 122 | + // Clang always appends the debug compilation dir, |
| 123 | + // even without debug info (in comgr it matches the current directory). We |
| 124 | + // only consider it if the user specified debug information |
| 125 | + bool IsFlagWithSingleArg = Arg.starts_with("-fdebug-compilation-dir="); |
| 126 | + if (IsFlagWithSingleArg) { |
| 127 | + return true; |
| 128 | + } |
| 129 | + |
| 130 | + return false; |
| 131 | +} |
| 132 | + |
| 133 | +SmallVector<StringRef, 1> getInputFiles(driver::Command &Command) { |
| 134 | + const auto &CommandInputs = Command.getInputInfos(); |
| 135 | + |
| 136 | + SmallVector<StringRef, 1> Paths; |
| 137 | + Paths.reserve(CommandInputs.size()); |
| 138 | + |
| 139 | + for (const auto &II : CommandInputs) { |
| 140 | + if (!II.isFilename()) |
| 141 | + continue; |
| 142 | + Paths.push_back(II.getFilename()); |
| 143 | + } |
| 144 | + |
| 145 | + return Paths; |
| 146 | +} |
| 147 | + |
| 148 | +bool isSourceCodeInput(const driver::InputInfo &II) { |
| 149 | + return driver::types::isSrcFile(II.getType()); |
| 150 | +} |
| 151 | +} // namespace |
| 152 | + |
| 153 | +Expected<CachedCommandAdaptor::Identifier> |
| 154 | +CachedCommandAdaptor::getIdentifier() const { |
| 155 | + CachedCommandAdaptor::HashAlgorithm H; |
| 156 | + H.update(getClass()); |
| 157 | + H.update(env::shouldEmitVerboseLogs()); |
| 158 | + addString(H, getClangFullVersion()); |
| 159 | + addString(H, getComgrHashIdentifier()); |
| 160 | + addString(H, getDeviceLibrariesIdentifier()); |
| 161 | + |
| 162 | + if (Error E = addInputIdentifier(H)) |
| 163 | + return E; |
| 164 | + |
| 165 | + addOptionsIdentifier(H); |
| 166 | + |
| 167 | + CachedCommandAdaptor::Identifier Id; |
| 168 | + toHex(H.final(), true, Id); |
| 169 | + return Id; |
| 170 | +} |
| 171 | + |
7 | 172 | CachedCommand::CachedCommand(driver::Command &Command,
|
8 | 173 | DiagnosticOptions &DiagOpts,
|
9 | 174 | llvm::vfs::FileSystem &VFS,
|
10 | 175 | ExecuteFnTy &&ExecuteImpl)
|
11 | 176 | : Command(Command), DiagOpts(DiagOpts), VFS(VFS),
|
12 | 177 | ExecuteImpl(std::move(ExecuteImpl)) {}
|
13 | 178 |
|
| 179 | +Error CachedCommand::addInputIdentifier(HashAlgorithm &H) const { |
| 180 | + auto Inputs(getInputFiles(Command)); |
| 181 | + for (StringRef Input : Inputs) { |
| 182 | + if (Error E = addFile(H, Input)) { |
| 183 | + // call Error's constructor again to silence copy elision warning |
| 184 | + return Error(std::move(E)); |
| 185 | + } |
| 186 | + } |
| 187 | + return Error::success(); |
| 188 | +} |
| 189 | + |
| 190 | +void CachedCommand::addOptionsIdentifier(HashAlgorithm &H) const { |
| 191 | + auto Inputs(getInputFiles(Command)); |
| 192 | + StringRef Output = Command.getOutputFilenames().front(); |
| 193 | + ArrayRef<const char *> Arguments = Command.getArguments(); |
| 194 | + for (auto It = Arguments.begin(), End = Arguments.end(); It != End; ++It) { |
| 195 | + if (skipProblematicFlag(It, End)) |
| 196 | + continue; |
| 197 | + |
| 198 | + StringRef Arg = *It; |
| 199 | + static const StringSet<> FlagsWithFileArgEmbededInComgr = { |
| 200 | + "-include-pch", "-mlink-builtin-bitcode"}; |
| 201 | + if (FlagsWithFileArgEmbededInComgr.contains(Arg)) { |
| 202 | + // The next argument is a path to a "secondary" input-file (pre-compiled |
| 203 | + // header or device-libs builtin) |
| 204 | + // These two files kinds of files are embedded in comgr at compile time, |
| 205 | + // and in normally their remain constant with comgr's build. The user is |
| 206 | + // not able to change them. |
| 207 | + ++It; |
| 208 | + if (It == End) |
| 209 | + break; |
| 210 | + continue; |
| 211 | + } |
| 212 | + |
| 213 | + // input files are considered by their content |
| 214 | + // output files should not be considered at all |
| 215 | + bool IsIOFile = Output == Arg || is_contained(Inputs, Arg); |
| 216 | + if (IsIOFile) |
| 217 | + continue; |
| 218 | + |
| 219 | +#ifndef NDEBUG |
| 220 | + bool IsComgrTmpPath = searchComgrTmpModel(Arg).has_value(); |
| 221 | + // On debug builds, fail on /tmp/comgr-xxxx/... paths. |
| 222 | + // Implicit dependencies should have been considered before. |
| 223 | + // On release builds, add them to the hash to force a cache miss. |
| 224 | + assert(!IsComgrTmpPath && |
| 225 | + "Unexpected flag and path to comgr temporary directory"); |
| 226 | +#endif |
| 227 | + |
| 228 | + addString(H, Arg); |
| 229 | + } |
| 230 | +} |
| 231 | + |
| 232 | +CachedCommand::ActionClass CachedCommand::getClass() const { |
| 233 | + return Command.getSource().getKind(); |
| 234 | +} |
| 235 | + |
| 236 | +bool CachedCommand::canCache() const { |
| 237 | + bool HasOneOutput = Command.getOutputFilenames().size() == 1; |
| 238 | + bool IsPreprocessorCommand = getClass() == driver::Action::PreprocessJobClass; |
| 239 | + |
| 240 | + // This reduces the applicability of the cache, but it helps us deliver |
| 241 | + // something now and deal with the PCH issues later. The cache would still |
| 242 | + // help for spirv compilation (e.g. bitcode->asm) and for intermediate |
| 243 | + // compilation steps |
| 244 | + bool HasSourceCodeInput = any_of(Command.getInputInfos(), isSourceCodeInput); |
| 245 | + |
| 246 | + return HasOneOutput && !IsPreprocessorCommand && !HasSourceCodeInput && |
| 247 | + !hasDebugOrProfileInfo(Command.getArguments()); |
| 248 | +} |
| 249 | + |
| 250 | +Error CachedCommand::writeExecuteOutput(StringRef CachedBuffer) { |
| 251 | + StringRef OutputFilename = Command.getOutputFilenames().front(); |
| 252 | + std::error_code EC; |
| 253 | + raw_fd_ostream Out(OutputFilename, EC); |
| 254 | + if (EC) { |
| 255 | + Error E = createStringError(EC, Twine("Failed to open ") + OutputFilename + |
| 256 | + " : " + EC.message() + "\n"); |
| 257 | + return E; |
| 258 | + } |
| 259 | + |
| 260 | + Out.write(CachedBuffer.data(), CachedBuffer.size()); |
| 261 | + Out.close(); |
| 262 | + if (Out.has_error()) { |
| 263 | + Error E = createStringError(EC, Twine("Failed to write ") + OutputFilename + |
| 264 | + " : " + EC.message() + "\n"); |
| 265 | + return E; |
| 266 | + } |
| 267 | + |
| 268 | + return Error::success(); |
| 269 | +} |
| 270 | + |
| 271 | +Expected<StringRef> CachedCommand::readExecuteOutput() { |
| 272 | + StringRef OutputFilename = Command.getOutputFilenames().front(); |
| 273 | + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = |
| 274 | + MemoryBuffer::getFile(OutputFilename); |
| 275 | + if (!MBOrErr) { |
| 276 | + std::error_code EC = MBOrErr.getError(); |
| 277 | + return createStringError(EC, Twine("Failed to open ") + OutputFilename + |
| 278 | + " : " + EC.message() + "\n"); |
| 279 | + } |
| 280 | + Output = std::move(*MBOrErr); |
| 281 | + return Output->getBuffer(); |
| 282 | +} |
| 283 | + |
14 | 284 | amd_comgr_status_t CachedCommand::execute(llvm::raw_ostream &LogS) {
|
15 | 285 | return ExecuteImpl(Command, LogS, DiagOpts, VFS);
|
16 | 286 | }
|
|
0 commit comments