Skip to content

Commit 7a6d3fa

Browse files
committed
Refactor legacy assemble to also use RAII instruction location construction
1 parent 287b66a commit 7a6d3fa

File tree

1 file changed

+58
-101
lines changed

1 file changed

+58
-101
lines changed

libevmasm/Assembly.cpp

Lines changed: 58 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,163 +1329,120 @@ LinkerObject const& Assembly::assembleLegacy() const
13291329
uint8_t dataRefPush = static_cast<uint8_t>(pushInstruction(bytesPerDataRef));
13301330

13311331
LinkerObject::CodeSectionLocation codeSectionLocation;
1332+
codeSectionLocation.instructionLocations.reserve(items.size());
13321333
codeSectionLocation.start = 0;
1333-
size_t assemblyItemIndex = 0;
1334-
auto assembleInstruction = [&](auto&& _addInstruction) {
1335-
size_t start = ret.bytecode.size();
1336-
_addInstruction();
1337-
size_t end = ret.bytecode.size();
1338-
codeSectionLocation.instructionLocations.emplace_back(
1339-
LinkerObject::InstructionLocation{
1340-
.start = start,
1341-
.end = end,
1342-
.assemblyItemIndex = assemblyItemIndex
1343-
}
1344-
);
1345-
};
1346-
for (AssemblyItem const& item: items)
1334+
for (auto const& [assemblyItemIndex, item]: items | ranges::views::enumerate)
13471335
{
1336+
// collect instruction locations via side effects
1337+
InstructionLocationEmitter instructionLocationEmitter(codeSectionLocation.instructionLocations, ret.bytecode, assemblyItemIndex);
13481338
// store position of the invalid jump destination
13491339
if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits<size_t>::max())
13501340
m_tagPositionsInBytecode[0] = ret.bytecode.size();
13511341

13521342
switch (item.type())
13531343
{
13541344
case Operation:
1355-
assembleInstruction([&](){
1356-
ret.bytecode += assembleOperation(item);
1357-
});
1345+
ret.bytecode += assembleOperation(item);
13581346
break;
13591347
case Push:
1360-
assembleInstruction([&](){
1361-
ret.bytecode += assemblePush(item);
1362-
});
1348+
ret.bytecode += assemblePush(item);
13631349
break;
13641350
case PushTag:
1365-
{
1366-
assembleInstruction([&](){
1367-
ret.bytecode.push_back(tagPush);
1368-
tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
1369-
ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
1370-
});
1351+
ret.bytecode.push_back(tagPush);
1352+
tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
1353+
ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
13711354
break;
1372-
}
13731355
case PushData:
1374-
assembleInstruction([&]() {
1375-
ret.bytecode.push_back(dataRefPush);
1376-
dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
1377-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1378-
});
1356+
ret.bytecode.push_back(dataRefPush);
1357+
dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
1358+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13791359
break;
13801360
case PushSub:
1381-
assembleInstruction([&]() {
1382-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1383-
ret.bytecode.push_back(dataRefPush);
1384-
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1385-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1386-
});
1361+
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1362+
ret.bytecode.push_back(dataRefPush);
1363+
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1364+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13871365
break;
13881366
case PushSubSize:
13891367
{
1390-
assembleInstruction([&](){
1391-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1392-
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1393-
item.setPushedValue(u256(s));
1394-
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
1395-
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
1396-
ret.bytecode.resize(ret.bytecode.size() + b);
1397-
bytesRef byr(&ret.bytecode.back() + 1 - b, b);
1398-
toBigEndian(s, byr);
1399-
});
1368+
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1369+
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1370+
item.setPushedValue(u256(s));
1371+
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
1372+
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
1373+
ret.bytecode.resize(ret.bytecode.size() + b);
1374+
bytesRef byr(&ret.bytecode.back() + 1 - b, b);
1375+
toBigEndian(s, byr);
14001376
break;
14011377
}
14021378
case PushProgramSize:
1403-
{
1404-
assembleInstruction([&](){
1405-
ret.bytecode.push_back(dataRefPush);
1406-
sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
1407-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1408-
});
1379+
ret.bytecode.push_back(dataRefPush);
1380+
sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
1381+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
14091382
break;
1410-
}
14111383
case PushLibraryAddress:
14121384
{
1413-
assembleInstruction([&]() {
1414-
auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
1415-
ret.bytecode += bytecode;
1416-
ret.linkReferences.insert(linkRef);
1417-
});
1385+
auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
1386+
ret.bytecode += bytecode;
1387+
ret.linkReferences.insert(linkRef);
14181388
break;
14191389
}
14201390
case PushImmutable:
1421-
assembleInstruction([&]() {
1422-
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
1423-
// Maps keccak back to the "identifier" std::string of that immutable.
1424-
ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
1425-
// Record the bytecode offset of the PUSH32 argument.
1426-
ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
1427-
// Advance bytecode by 32 bytes (default initialized).
1428-
ret.bytecode.resize(ret.bytecode.size() + 32);
1429-
});
1391+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
1392+
// Maps keccak back to the "identifier" std::string of that immutable.
1393+
ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
1394+
// Record the bytecode offset of the PUSH32 argument.
1395+
ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
1396+
// Advance bytecode by 32 bytes (default initialized).
1397+
ret.bytecode.resize(ret.bytecode.size() + 32);
14301398
break;
14311399
case VerbatimBytecode:
14321400
ret.bytecode += assembleVerbatimBytecode(item);
14331401
break;
14341402
case AssignImmutable:
14351403
{
1404+
// This item type decomposes into multiple evm instructions, so we manually call emit()
1405+
14361406
// Expect 2 elements on stack (source, dest_base)
14371407
auto const& offsets = immutableReferencesBySub[item.data()].second;
14381408
for (size_t i = 0; i < offsets.size(); ++i)
14391409
{
14401410
if (i != offsets.size() - 1)
14411411
{
1442-
assembleInstruction([&]() {
1443-
ret.bytecode.push_back(uint8_t(Instruction::DUP2));
1444-
});
1445-
assembleInstruction([&]() {
1446-
ret.bytecode.push_back(uint8_t(Instruction::DUP2));
1447-
});
1412+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::DUP2));
1413+
instructionLocationEmitter.emit();
1414+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::DUP2));
1415+
instructionLocationEmitter.emit();
14481416
}
1449-
assembleInstruction([&]() {
1450-
// TODO: should we make use of the constant optimizer methods for pushing the offsets?
1451-
bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
1452-
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
1453-
ret.bytecode += offsetBytes;
1454-
});
1455-
assembleInstruction([&]() {
1456-
ret.bytecode.push_back(uint8_t(Instruction::ADD));
1457-
});
1458-
assembleInstruction([&]() {
1459-
ret.bytecode.push_back(uint8_t(Instruction::MSTORE));
1460-
});
1417+
// TODO: should we make use of the constant optimizer methods for pushing the offsets?
1418+
bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
1419+
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
1420+
ret.bytecode += offsetBytes;
1421+
instructionLocationEmitter.emit();
1422+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::ADD));
1423+
instructionLocationEmitter.emit();
1424+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::MSTORE));
1425+
// no emit needed here, it's taken care of by the destructor of instructionLocationEmitter
14611426
}
14621427
if (offsets.empty())
14631428
{
1464-
assembleInstruction([&]() {
1465-
ret.bytecode.push_back(uint8_t(Instruction::POP));
1466-
});
1467-
assembleInstruction([&]() {
1468-
ret.bytecode.push_back(uint8_t(Instruction::POP));
1469-
});
1429+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::POP));
1430+
instructionLocationEmitter.emit();
1431+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::POP));
1432+
// no emit needed here, it's taken care of by the destructor of instructionLocationEmitter
14701433
}
14711434
immutableReferencesBySub.erase(item.data());
14721435
break;
14731436
}
14741437
case PushDeployTimeAddress:
1475-
assembleInstruction([&]() {
1476-
ret.bytecode += assemblePushDeployTimeAddress();
1477-
});
1438+
ret.bytecode += assemblePushDeployTimeAddress();
14781439
break;
14791440
case Tag:
1480-
assembleInstruction([&](){
1481-
ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
1482-
});
1441+
ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
14831442
break;
14841443
default:
14851444
solAssert(false, "Unexpected opcode while assembling.");
14861445
}
1487-
1488-
++assemblyItemIndex;
14891446
}
14901447

14911448
codeSectionLocation.end = ret.bytecode.size();

0 commit comments

Comments
 (0)