@@ -53,6 +53,54 @@ using namespace solidity::evmasm;
53
53
using namespace solidity ::langutil;
54
54
using namespace solidity ::util;
55
55
56
+ namespace
57
+ {
58
+
59
+ // / Produces instruction location info in RAII style. When an assembly instruction is added to the bytecode,
60
+ // / this class can be instantiated in that scope. It will record the current bytecode size (before addition)
61
+ // / and, at destruction time, record the new bytecode size. This information is then added to an external
62
+ // / instruction locations vector.
63
+ // / If the instruction decomposes into multiple individual evm instructions, `emit` can be
64
+ // / called for all but the last one (which will be emitted by the destructor).
65
+ class InstructionLocationEmitter
66
+ {
67
+ public:
68
+ InstructionLocationEmitter (
69
+ std::vector<LinkerObject::InstructionLocation>& _instructionLocations,
70
+ bytes const & _bytecode,
71
+ size_t const _assemblyItemIndex
72
+ ):
73
+ m_instructionLocations (_instructionLocations),
74
+ m_bytecode (_bytecode),
75
+ m_assemblyItemIndex (_assemblyItemIndex),
76
+ m_instructionLocationStart (_bytecode.size())
77
+ {}
78
+
79
+ ~InstructionLocationEmitter ()
80
+ {
81
+ emit ();
82
+ }
83
+
84
+ void emit ()
85
+ {
86
+ auto const end = m_bytecode.size ();
87
+ m_instructionLocations.push_back (LinkerObject::InstructionLocation{
88
+ .start = m_instructionLocationStart,
89
+ .end = end,
90
+ .assemblyItemIndex = m_assemblyItemIndex
91
+ });
92
+ m_instructionLocationStart = end;
93
+ }
94
+
95
+ private:
96
+ std::vector<LinkerObject::InstructionLocation>& m_instructionLocations;
97
+ bytes const & m_bytecode;
98
+ size_t const m_assemblyItemIndex{};
99
+ size_t m_instructionLocationStart{};
100
+ };
101
+
102
+ }
103
+
56
104
std::map<std::string, std::shared_ptr<std::string const >> Assembly::s_sharedSourceNames;
57
105
58
106
AssemblyItem const & Assembly::append (AssemblyItem _i)
@@ -1281,104 +1329,72 @@ LinkerObject const& Assembly::assembleLegacy() const
1281
1329
uint8_t dataRefPush = static_cast <uint8_t >(pushInstruction (bytesPerDataRef));
1282
1330
1283
1331
LinkerObject::CodeSectionLocation codeSectionLocation;
1332
+ codeSectionLocation.instructionLocations .reserve (items.size ());
1284
1333
codeSectionLocation.start = 0 ;
1285
- size_t assemblyItemIndex = 0 ;
1286
- auto assembleInstruction = [&](auto && _addInstruction) {
1287
- size_t start = ret.bytecode .size ();
1288
- _addInstruction ();
1289
- size_t end = ret.bytecode .size ();
1290
- codeSectionLocation.instructionLocations .emplace_back (
1291
- LinkerObject::InstructionLocation{
1292
- .start = start,
1293
- .end = end,
1294
- .assemblyItemIndex = assemblyItemIndex
1295
- }
1296
- );
1297
- };
1298
- for (AssemblyItem const & item: items)
1334
+ for (auto const & [assemblyItemIndex, item]: items | ranges::views::enumerate)
1299
1335
{
1336
+ // collect instruction locations via side effects
1337
+ InstructionLocationEmitter instructionLocationEmitter (codeSectionLocation.instructionLocations , ret.bytecode , assemblyItemIndex);
1300
1338
// store position of the invalid jump destination
1301
1339
if (item.type () != Tag && m_tagPositionsInBytecode[0 ] == std::numeric_limits<size_t >::max ())
1302
1340
m_tagPositionsInBytecode[0 ] = ret.bytecode .size ();
1303
1341
1304
1342
switch (item.type ())
1305
1343
{
1306
1344
case Operation:
1307
- assembleInstruction ([&](){
1308
- ret.bytecode += assembleOperation (item);
1309
- });
1345
+ ret.bytecode += assembleOperation (item);
1310
1346
break ;
1311
1347
case Push:
1312
- assembleInstruction ([&](){
1313
- ret.bytecode += assemblePush (item);
1314
- });
1348
+ ret.bytecode += assemblePush (item);
1315
1349
break ;
1316
1350
case PushTag:
1317
- {
1318
- assembleInstruction ([&](){
1319
- ret.bytecode .push_back (tagPush);
1320
- tagRefs[ret.bytecode .size ()] = item.splitForeignPushTag ();
1321
- ret.bytecode .resize (ret.bytecode .size () + bytesPerTag);
1322
- });
1351
+ ret.bytecode .push_back (tagPush);
1352
+ tagRefs[ret.bytecode .size ()] = item.splitForeignPushTag ();
1353
+ ret.bytecode .resize (ret.bytecode .size () + bytesPerTag);
1323
1354
break ;
1324
- }
1325
1355
case PushData:
1326
- assembleInstruction ([&]() {
1327
- ret.bytecode .push_back (dataRefPush);
1328
- dataRefs.insert (std::make_pair (h256 (item.data ()), ret.bytecode .size ()));
1329
- ret.bytecode .resize (ret.bytecode .size () + bytesPerDataRef);
1330
- });
1356
+ ret.bytecode .push_back (dataRefPush);
1357
+ dataRefs.insert (std::make_pair (h256 (item.data ()), ret.bytecode .size ()));
1358
+ ret.bytecode .resize (ret.bytecode .size () + bytesPerDataRef);
1331
1359
break ;
1332
1360
case PushSub:
1333
- assembleInstruction ([&]() {
1334
- assertThrow (item.data () <= std::numeric_limits<size_t >::max (), AssemblyException, " " );
1335
- ret.bytecode .push_back (dataRefPush);
1336
- subRefs.insert (std::make_pair (static_cast <size_t >(item.data ()), ret.bytecode .size ()));
1337
- ret.bytecode .resize (ret.bytecode .size () + bytesPerDataRef);
1338
- });
1361
+ assertThrow (item.data () <= std::numeric_limits<size_t >::max (), AssemblyException, " " );
1362
+ ret.bytecode .push_back (dataRefPush);
1363
+ subRefs.insert (std::make_pair (static_cast <size_t >(item.data ()), ret.bytecode .size ()));
1364
+ ret.bytecode .resize (ret.bytecode .size () + bytesPerDataRef);
1339
1365
break ;
1340
1366
case PushSubSize:
1341
1367
{
1342
- assembleInstruction ([&](){
1343
- assertThrow (item.data () <= std::numeric_limits<size_t >::max (), AssemblyException, " " );
1344
- auto s = subAssemblyById (static_cast <size_t >(item.data ()))->assemble ().bytecode .size ();
1345
- item.setPushedValue (u256 (s));
1346
- unsigned b = std::max<unsigned >(1 , numberEncodingSize (s));
1347
- ret.bytecode .push_back (static_cast <uint8_t >(pushInstruction (b)));
1348
- ret.bytecode .resize (ret.bytecode .size () + b);
1349
- bytesRef byr (&ret.bytecode .back () + 1 - b, b);
1350
- toBigEndian (s, byr);
1351
- });
1368
+ assertThrow (item.data () <= std::numeric_limits<size_t >::max (), AssemblyException, " " );
1369
+ auto s = subAssemblyById (static_cast <size_t >(item.data ()))->assemble ().bytecode .size ();
1370
+ item.setPushedValue (u256 (s));
1371
+ unsigned b = std::max<unsigned >(1 , numberEncodingSize (s));
1372
+ ret.bytecode .push_back (static_cast <uint8_t >(pushInstruction (b)));
1373
+ ret.bytecode .resize (ret.bytecode .size () + b);
1374
+ bytesRef byr (&ret.bytecode .back () + 1 - b, b);
1375
+ toBigEndian (s, byr);
1352
1376
break ;
1353
1377
}
1354
1378
case PushProgramSize:
1355
- {
1356
- assembleInstruction ([&](){
1357
- ret.bytecode .push_back (dataRefPush);
1358
- sizeRefs.push_back (static_cast <unsigned >(ret.bytecode .size ()));
1359
- ret.bytecode .resize (ret.bytecode .size () + bytesPerDataRef);
1360
- });
1379
+ ret.bytecode .push_back (dataRefPush);
1380
+ sizeRefs.push_back (static_cast <unsigned >(ret.bytecode .size ()));
1381
+ ret.bytecode .resize (ret.bytecode .size () + bytesPerDataRef);
1361
1382
break ;
1362
- }
1363
1383
case PushLibraryAddress:
1364
1384
{
1365
- assembleInstruction ([&]() {
1366
- auto const [bytecode, linkRef] = assemblePushLibraryAddress (item, ret.bytecode .size ());
1367
- ret.bytecode += bytecode;
1368
- ret.linkReferences .insert (linkRef);
1369
- });
1385
+ auto const [bytecode, linkRef] = assemblePushLibraryAddress (item, ret.bytecode .size ());
1386
+ ret.bytecode += bytecode;
1387
+ ret.linkReferences .insert (linkRef);
1370
1388
break ;
1371
1389
}
1372
1390
case PushImmutable:
1373
- assembleInstruction ([&]() {
1374
- ret.bytecode .push_back (static_cast <uint8_t >(Instruction::PUSH32));
1375
- // Maps keccak back to the "identifier" std::string of that immutable.
1376
- ret.immutableReferences [item.data ()].first = m_immutables.at (item.data ());
1377
- // Record the bytecode offset of the PUSH32 argument.
1378
- ret.immutableReferences [item.data ()].second .emplace_back (ret.bytecode .size ());
1379
- // Advance bytecode by 32 bytes (default initialized).
1380
- ret.bytecode .resize (ret.bytecode .size () + 32 );
1381
- });
1391
+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::PUSH32));
1392
+ // Maps keccak back to the "identifier" std::string of that immutable.
1393
+ ret.immutableReferences [item.data ()].first = m_immutables.at (item.data ());
1394
+ // Record the bytecode offset of the PUSH32 argument.
1395
+ ret.immutableReferences [item.data ()].second .emplace_back (ret.bytecode .size ());
1396
+ // Advance bytecode by 32 bytes (default initialized).
1397
+ ret.bytecode .resize (ret.bytecode .size () + 32 );
1382
1398
break ;
1383
1399
case VerbatimBytecode:
1384
1400
ret.bytecode += assembleVerbatimBytecode (item);
@@ -1391,53 +1407,41 @@ LinkerObject const& Assembly::assembleLegacy() const
1391
1407
{
1392
1408
if (i != offsets.size () - 1 )
1393
1409
{
1394
- assembleInstruction ([&]() {
1395
- ret.bytecode .push_back (uint8_t (Instruction::DUP2));
1396
- });
1397
- assembleInstruction ([&]() {
1398
- ret.bytecode .push_back (uint8_t (Instruction::DUP2));
1399
- });
1410
+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::DUP2));
1411
+ // This item type decomposes into multiple evm instructions, so we manually call emit()
1412
+ instructionLocationEmitter.emit ();
1413
+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::DUP2));
1414
+ instructionLocationEmitter.emit ();
1400
1415
}
1401
- assembleInstruction ([&]() {
1402
- // TODO: should we make use of the constant optimizer methods for pushing the offsets?
1403
- bytes offsetBytes = toCompactBigEndian (u256 (offsets[i]));
1404
- ret.bytecode .push_back (static_cast <uint8_t >(pushInstruction (static_cast <unsigned >(offsetBytes.size ()))));
1405
- ret.bytecode += offsetBytes;
1406
- });
1407
- assembleInstruction ([&]() {
1408
- ret.bytecode .push_back (uint8_t (Instruction::ADD));
1409
- });
1410
- assembleInstruction ([&]() {
1411
- ret.bytecode .push_back (uint8_t (Instruction::MSTORE));
1412
- });
1416
+ // TODO: should we make use of the constant optimizer methods for pushing the offsets?
1417
+ bytes offsetBytes = toCompactBigEndian (u256 (offsets[i]));
1418
+ ret.bytecode .push_back (static_cast <uint8_t >(pushInstruction (static_cast <unsigned >(offsetBytes.size ()))));
1419
+ ret.bytecode += offsetBytes;
1420
+ instructionLocationEmitter.emit ();
1421
+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::ADD));
1422
+ instructionLocationEmitter.emit ();
1423
+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::MSTORE));
1424
+ // No emit needed here, it's taken care of by the destructor of instructionLocationEmitter.
1413
1425
}
1414
1426
if (offsets.empty ())
1415
1427
{
1416
- assembleInstruction ([&]() {
1417
- ret.bytecode .push_back (uint8_t (Instruction::POP));
1418
- });
1419
- assembleInstruction ([&]() {
1420
- ret.bytecode .push_back (uint8_t (Instruction::POP));
1421
- });
1428
+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::POP));
1429
+ instructionLocationEmitter.emit ();
1430
+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::POP));
1431
+ // no emit needed here, it's taken care of by the destructor of instructionLocationEmitter
1422
1432
}
1423
1433
immutableReferencesBySub.erase (item.data ());
1424
1434
break ;
1425
1435
}
1426
1436
case PushDeployTimeAddress:
1427
- assembleInstruction ([&]() {
1428
- ret.bytecode += assemblePushDeployTimeAddress ();
1429
- });
1437
+ ret.bytecode += assemblePushDeployTimeAddress ();
1430
1438
break ;
1431
1439
case Tag:
1432
- assembleInstruction ([&](){
1433
- ret.bytecode += assembleTag (item, ret.bytecode .size (), true );
1434
- });
1440
+ ret.bytecode += assembleTag (item, ret.bytecode .size (), true );
1435
1441
break ;
1436
1442
default :
1437
1443
solAssert (false , " Unexpected opcode while assembling." );
1438
1444
}
1439
-
1440
- ++assemblyItemIndex;
1441
1445
}
1442
1446
1443
1447
codeSectionLocation.end = ret.bytecode .size ();
@@ -1606,9 +1610,17 @@ LinkerObject const& Assembly::assembleEOF() const
1606
1610
for (auto && [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate)
1607
1611
{
1608
1612
auto const sectionStart = ret.bytecode .size ();
1613
+
1614
+ std::vector<LinkerObject::InstructionLocation> instructionLocations;
1615
+ instructionLocations.reserve (codeSection.items .size ());
1616
+
1609
1617
solAssert (!codeSection.items .empty (), " Empty code section." );
1610
- for (AssemblyItem const & item: codeSection.items )
1618
+
1619
+ for (auto const & [assemblyItemIndex, item]: codeSection.items | ranges::views::enumerate)
1611
1620
{
1621
+ // collect instruction locations via side effects
1622
+ InstructionLocationEmitter instructionLocationEmitter {instructionLocations, ret.bytecode , assemblyItemIndex};
1623
+
1612
1624
// store position of the invalid jump destination
1613
1625
if (item.type () != Tag && m_tagPositionsInBytecode[0 ] == std::numeric_limits<size_t >::max ())
1614
1626
m_tagPositionsInBytecode[0 ] = ret.bytecode .size ();
@@ -1724,6 +1736,12 @@ LinkerObject const& Assembly::assembleEOF() const
1724
1736
" Code section too large for EOF."
1725
1737
);
1726
1738
setBigEndianUint16 (ret.bytecode , codeSectionSizePositions[codeSectionIndex], ret.bytecode .size () - sectionStart);
1739
+
1740
+ ret.codeSectionLocations .push_back (LinkerObject::CodeSectionLocation{
1741
+ .start = sectionStart,
1742
+ .end = ret.bytecode .size (),
1743
+ .instructionLocations = std::move (instructionLocations)
1744
+ });
1727
1745
}
1728
1746
1729
1747
for (auto const & [refPos, tagId]: tagRef)
0 commit comments