Skip to content

[lld][ELF] linker script lexer #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 29 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
85ea00c
[lld][ELF] Added tokenizer file for linker script
hongyu-dev Jul 4, 2024
9c30902
[lld][ELF] update Linkerscript lexer
hongyu-dev Jul 6, 2024
98ea972
[lld][ELF] added new files for new linker script lexer
hongyu-dev Jul 7, 2024
571c9b4
[lld][ELF] update swtich current char
hongyu-dev Jul 7, 2024
ba0cdf3
[lld][ELF] updated token and lexer
hongyu-dev Jul 8, 2024
252e217
[lld][ELF] added llvm source manager and SMDiagnostic
hongyu-dev Jul 8, 2024
4cabbbb
[lld][ELF] updated slicing token from char to StringRef
hongyu-dev Jul 9, 2024
4cafc77
[lld][ELF] update lexer and tokens
hongyu-dev Jul 10, 2024
df59067
[lld][ELF] added more switch case for generating tokens
hongyu-dev Jul 11, 2024
4b0583f
[lld][ELF] Added TokenInfo struct and unittest files
hongyu-dev Jul 13, 2024
2116448
[lld][ELF] Lexer update
hongyu-dev Jul 14, 2024
c6e9ff2
[lld][ELF] updated unittest
hongyu-dev Jul 14, 2024
c5463eb
[lld][ELF] update lexer unittest
hongyu-dev Jul 15, 2024
1fde417
[lld][ELF] added more unittest cases for linker script lexer
hongyu-dev Jul 15, 2024
05c781c
[lld][ELF] added * and = cases in new lexer
hongyu-dev Jul 15, 2024
200afda
[lld][ELF] change leading undercase case as identifier
hongyu-dev Jul 15, 2024
7077d18
[lld][ELF] NFC update identify to identifier
hongyu-dev Jul 15, 2024
ef9b874
[lld][ELF] added test case for PROVIDE
hongyu-dev Jul 15, 2024
5960675
[lld][ELF] added test case and change "." tokens
hongyu-dev Jul 15, 2024
882c59f
[lld][ELF] fixed pos error in getDigits
hongyu-dev Jul 15, 2024
b1a46ae
[lld][ELF] missing '-' and '+' case
hongyu-dev Jul 16, 2024
bcccd0a
[lld][ELF] Added test for addr-zero
hongyu-dev Jul 16, 2024
75fa6a5
[lld][ELF] Add unit test for addr.test
hongyu-dev Jul 16, 2024
eecdc0b
[lld][ELF] Add unittest for align-empty.test
hongyu-dev Jul 16, 2024
21b84b8
[lld][ELF] Add test case for Memory
hongyu-dev Jul 16, 2024
42af510
[lld][ELF] add unittest case from bss-fill.test
hongyu-dev Jul 16, 2024
e25a2b4
[lld][ELF] Add unit test case for CONSTRUCTORS
hongyu-dev Jul 17, 2024
b4ec7d7
[lld][ELF] Add unit test based on data-commands2.test
hongyu-dev Jul 17, 2024
1f9979d
[lld][ELF] Add unittest for DEFINED
hongyu-dev Jul 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lld/ELF/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ add_lld_library(lldELF
InputSection.cpp
LTO.cpp
LinkerScript.cpp
LinkerScriptLexer.cpp
MapFile.cpp
MarkLive.cpp
OutputSections.cpp
Expand Down
331 changes: 331 additions & 0 deletions lld/ELF/LinkerScriptLexer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,331 @@
//===- ScriptParser.cpp ---------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "LinkerScriptLexer.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/Support/ErrorHandling.h"

using namespace llvm;
using namespace lld;
using namespace lld::elf;

// LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM,
// llvm::SMDiagnostic &Err)
// : MB(MB), ErrorInfo(Err), SM(SM) {
// curStringRef = MB.getBuffer();
// }

LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB) : MB(MB) {
curStringRef = MB.getBuffer();
}

/*
bool LinkerScriptLexer::Error(SMLoc ErrorLoc, const Twine &Msg) const {
ErrorInfo = SM.GetMessage(ErrorLoc, llvm::SourceMgr::DK_Error, Msg);
return true;
}

void LinkerScriptLexer::Warning(SMLoc WarningLoc, const Twine &Msg) const {
SM.PrintMessage(WarningLoc, llvm::SourceMgr::DK_Warning, Msg);
}
*/

// bool LinkerScriptLexer::expect(ScriptToken token) { return token == tok1; }

void LinkerScriptLexer::advanceLexer() { curToken = getTokenInfo(); }

inline LinkerScriptLexer::TokenInfo
LinkerScriptLexer::advanceTokenInfo(ScriptToken kind, size_t pos = 1) {
// TODO: special case for kind == ScriptToken::Error
llvm::StringRef valRef = curStringRef.substr(0, pos);
curStringRef = curStringRef.substr(pos);
return {kind, valRef};
}

LinkerScriptLexer::TokenInfo LinkerScriptLexer::getTokenInfo() {
curStringRef = skipComments();

// TODO: make sure the empty situation is not an error
if (curStringRef.empty())
return advanceTokenInfo(ScriptToken::Eof);
const char c = curStringRef.front();
if (std::isdigit(c))
return getDigits();
if (std::isalpha(c))
return getCommandOrIdentifier();
return getSymbolToken();
}

llvm::StringRef LinkerScriptLexer::skipComments() {
// this code now is copied from ScriptLexer.cpp
// and modified so it can use SourceMgr
while (true) {
if (curStringRef.starts_with("/*")) {
size_t e = curStringRef.find("*/", 2);
if (e == llvm::StringRef::npos) {
// TODO: Error("Unclosed comment in a linker script");
return "";
}
curStringRef = curStringRef.substr(e + 2);
continue;
}
if (curStringRef.starts_with("#")) {
size_t e = curStringRef.find("\n", 1);
if (e == StringRef::npos)
e = curStringRef.size() - 1;
curStringRef = curStringRef.substr(e + 1);
continue;
}

size_t size = curStringRef.size();
curStringRef = curStringRef.ltrim();
if (curStringRef.size() == size)
return curStringRef;
}
}

LinkerScriptLexer::TokenInfo LinkerScriptLexer::getSymbolToken() {
const char c = curStringRef.front();
// TODO: single char token needs to substr(1)
switch (c) {
case EOF:
return advanceTokenInfo(ScriptToken::Eof);
case '"':
return getQuotedToken();
case '(':
return advanceTokenInfo(ScriptToken::BracektBegin);
case ')':
return advanceTokenInfo(ScriptToken::BracektEnd);
case '{':
return advanceTokenInfo(ScriptToken::CurlyBegin);
case '}':
return advanceTokenInfo(ScriptToken::CurlyEnd);
case ';':
return advanceTokenInfo(ScriptToken::Semicolon);
case ',':
return advanceTokenInfo(ScriptToken::Comma);
case '_':
return getCommandOrIdentifier();
case '.':
return getCommandOrIdentifier();
case ':':
return advanceTokenInfo(ScriptToken::Colon);
case '*':
return advanceTokenInfo(ScriptToken::Asterisk);
case '=':
return advanceTokenInfo(ScriptToken::Assign);
case '?':
return advanceTokenInfo(ScriptToken::QuestionMark);
case '+':
if (curStringRef.size() > 1 && curStringRef[1] == '=')
return advanceTokenInfo(ScriptToken::PlusAssign, 2);
return advanceTokenInfo(ScriptToken::Plus);
case '-':
if (curStringRef.size() > 1 && curStringRef[1] == '=')
return advanceTokenInfo(ScriptToken::MinusAssign, 2);
return advanceTokenInfo(ScriptToken::Minus);
case '<':
if (curStringRef.size() > 2 && curStringRef[1] == '<' &&
curStringRef[2] == '=') {
return advanceTokenInfo(ScriptToken::RightShiftAssign, 3);
}
if (curStringRef.size() > 1) {
if (curStringRef[1] == '=') {
return advanceTokenInfo(ScriptToken::LessEqual, 2);
} else if (curStringRef[1] == '<') {
return advanceTokenInfo(ScriptToken::LeftShift, 2);
}
}
return advanceTokenInfo(ScriptToken::Less);
case '>':
if (curStringRef.size() > 2 && curStringRef[1] == '>' &&
curStringRef[2] == '=') {
return advanceTokenInfo(ScriptToken::LeftShiftAssign, 3);
}
if (curStringRef.size() > 1) {
if (curStringRef[1] == '=') {
return advanceTokenInfo(ScriptToken::GreaterEqual, 2);
} else if (curStringRef[1] == '>') {
return advanceTokenInfo(ScriptToken::RightShift, 2);
}
}
return advanceTokenInfo(ScriptToken::Greater);
case '&':
if (curStringRef.size() > 1) {
if (curStringRef[1] == '=') {
return advanceTokenInfo(ScriptToken::AndAssign, 2);
} else if (curStringRef[1] == '&') {
return advanceTokenInfo(ScriptToken::AndGate, 2);
}
}
return advanceTokenInfo(ScriptToken::Bitwise);
case '^':
if (curStringRef.size() > 1) {
if (curStringRef[1] == '=') {
return advanceTokenInfo(ScriptToken::AndAssign, 2);
}
}
return advanceTokenInfo(ScriptToken::Xor);
case '|':
if (curStringRef.size() > 1) {
if (curStringRef[1] == '=') {
return advanceTokenInfo(ScriptToken::OrAssign, 2);
} else if (curStringRef[1] == '|') {
return advanceTokenInfo(ScriptToken::OrGate, 2);
}
}
return advanceTokenInfo(ScriptToken::Or);
default:
return advanceTokenInfo(ScriptToken::Error);
}
}

LinkerScriptLexer::TokenInfo LinkerScriptLexer::getQuotedToken() {
// Quoted token. Note that double-quote characters are parts of a token
// because, in a glob match context, only unquoted tokens are interpreted as
// glob patterns. Double-quoted tokens are literal patterns in that context.
size_t e = curStringRef.find("\"", 1);
if (e == StringRef::npos) {
StringRef fileName = MB.getBufferIdentifier();
// TODO: Error(fileName + ": unclosed quote");
return advanceTokenInfo(ScriptToken::Error, e);
}
return advanceTokenInfo(ScriptToken::Quote, e + 1);
}

LinkerScriptLexer::TokenInfo LinkerScriptLexer::getDigits() {
size_t pos = curStringRef.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
"0123456789");
if (curStringRef.starts_with_insensitive("0x")) {
return advanceTokenInfo(ScriptToken::Hexdecimal, pos);
}
const char c = curStringRef[pos - 1];
switch (c) {
case 'H':
case 'h':
return advanceTokenInfo(ScriptToken::Hexdecimal_H, pos);
case 'K':
case 'k':
return advanceTokenInfo(ScriptToken::Decimal_K, pos);
case 'M':
case 'm':
return advanceTokenInfo(ScriptToken::Decimal_M, pos);
default:
return advanceTokenInfo(ScriptToken::Decimal, pos);
};
}

LinkerScriptLexer::TokenInfo LinkerScriptLexer::getCommandOrIdentifier() {
// Unquoted token. This is more relaxed than tokens in C-like language,
// so that you can write "file-name.cpp" as one bare token, for example.
size_t pos = curStringRef.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
"0123456789_.$/\\~=+[]*?-!^:");

// Quoted strings are literal strings, so we don't want to split it.
if (inExpression && !curStringRef.starts_with("\"")) {
StringRef ops = "!~*/+-<>?^:="; // List of operators
size_t e = curStringRef.find_first_of(ops);
if (e != StringRef::npos && e != 0) {
return advanceTokenInfo(ScriptToken::Identifier, e);
}
}

if (pos == 1 && curStringRef[0] == '.')
return advanceTokenInfo(ScriptToken::Dot);

return advanceTokenInfo(getTokenfromKeyword(curStringRef.substr(0, pos)),
pos);
}

ScriptToken
LinkerScriptLexer::getTokenfromKeyword(llvm::StringRef keyword) const {
#define KEYWORD(STR) \
do { \
if (keyword == #STR) \
return ScriptToken::LS_##STR; \
} while (false)

KEYWORD(ENTRY);
KEYWORD(INCLUDE);
KEYWORD(GROUP);
KEYWORD(MEMORY);
KEYWORD(OUTPUT);
KEYWORD(SEARCH_DIR);
KEYWORD(STARTUP);
KEYWORD(INSERT);
KEYWORD(AFTER);
KEYWORD(OUTPUT_FORMAT);
KEYWORD(TARGET);
KEYWORD(OUTPUT_FORMAT);
KEYWORD(ASSERT);
KEYWORD(CONSTANT);
KEYWORD(EXTERN);
KEYWORD(OUTPUT_ARCH);
KEYWORD(PROVIDE);
KEYWORD(HIDDEN);
KEYWORD(PROVIDE_HIDDEN);
KEYWORD(SECTIONS);
KEYWORD(BEFORE);
KEYWORD(EXCLUDE_FILE);
KEYWORD(KEEP);
KEYWORD(INPUT_SECTION_FLAGS);
KEYWORD(OVERLAY);
KEYWORD(NOLOAD);
KEYWORD(COPY);
KEYWORD(INFO);
KEYWORD(OVERWRITE_SECTIONS);
KEYWORD(SUBALIGN);
KEYWORD(ONLY_IF_RO);
KEYWORD(ONLY_IF_RW);
KEYWORD(FILL);
KEYWORD(SORT);
KEYWORD(ABSOLUTE);
KEYWORD(ADDR);
KEYWORD(ALIGN);
KEYWORD(ALIGNOF);
KEYWORD(DATA_SEGMENT_ALIGN);
KEYWORD(DATA_SEGMENT_END);
KEYWORD(DEFINED);
KEYWORD(LENGTH);
KEYWORD(LOADADDR);
KEYWORD(LOG2CEIL);
KEYWORD(MAX);
KEYWORD(MIN);
KEYWORD(ORIGIN);
KEYWORD(SEGMENT_START);
KEYWORD(SIZEOF);
KEYWORD(SIZEOF_HEADERS);
KEYWORD(FILEHDR);
KEYWORD(PHDRS);
KEYWORD(AT);
KEYWORD(FLAGS);
KEYWORD(VERSION);
KEYWORD(REGION_ALIAS);
KEYWORD(AS_NEEDED);
KEYWORD(CONSTRUCTORS);
KEYWORD(MAXPAGESIZE);
KEYWORD(COMMONPAGESIZE);
KEYWORD(BYTE);
KEYWORD(SHORT);
KEYWORD(LONG);
KEYWORD(QUAD);

#undef KEYWORD

if (keyword == "local") {
return ScriptToken::LS_Local;
} else if (keyword == "global") {
return ScriptToken::LS_Global;
} else if (keyword == "extern") {
return ScriptToken::LS_Extern;
} else {
return ScriptToken::Identifier;
}
}
Loading