Skip to content

Commit ede68b7

Browse files
committed
lexer: Add ability to lex strings directly
By allowing us to parse strings directly instead of necessarily a filename, we are now able to reuse the parser and lexer in various places of the compiler. This is useful for -frust-cfg, but may also come in handy for other compiler mechanics such as the include!() builtin macro, where we do not actually want location info but just a stream of tokens.
1 parent bf92a10 commit ede68b7

File tree

5 files changed

+84
-51
lines changed

5 files changed

+84
-51
lines changed

gcc/rust/lex/rust-lex.cc

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap)
128128
token_queue (TokenSource (this))
129129
{
130130
// inform line_table that file is being entered and is in line 1
131-
line_map->start_file (filename, current_line);
131+
if (linemap)
132+
line_map->start_file (filename, current_line);
132133
}
133134

134135
Lexer::~Lexer ()
@@ -152,7 +153,11 @@ Lexer::~Lexer ()
152153
Location
153154
Lexer::get_current_location ()
154155
{
155-
return line_map->get_location (current_column);
156+
if (line_map)
157+
return line_map->get_location (current_column);
158+
else
159+
// If we have no linemap, we're lexing something without proper locations
160+
return Location ();
156161
}
157162

158163
int

gcc/rust/lex/rust-lex.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "rust-buffered-queue.h"
2424
#include "rust-token.h"
2525

26+
#include <cstdio>
2627
#include <utility>
2728
#include <tuple>
2829

@@ -49,6 +50,13 @@ struct RAIIFile
4950
file = fopen (filename, "r");
5051
}
5152

53+
/**
54+
* Create a RAIIFile from an existing instance of FILE*
55+
*/
56+
RAIIFile (FILE *raw, const char *filename = nullptr)
57+
: file (raw), filename (filename)
58+
{}
59+
5260
RAIIFile (const RAIIFile &other) = delete;
5361
RAIIFile &operator= (const RAIIFile &other) = delete;
5462

@@ -57,6 +65,7 @@ struct RAIIFile
5765
{
5866
other.file = nullptr;
5967
}
68+
6069
RAIIFile &operator= (RAIIFile &&other)
6170
{
6271
close ();
@@ -132,6 +141,19 @@ class Lexer
132141
Lexer (const char *filename, RAIIFile input, Linemap *linemap);
133142
~Lexer ();
134143

144+
/**
145+
* Lex the contents of a string instead of a file
146+
*/
147+
static Lexer lex_string (std::string &input)
148+
{
149+
// We can perform this ugly cast to a non-const char* since we're only
150+
// *reading* the string. This would not be valid if we were doing any
151+
// modification to it.
152+
auto string_file = fmemopen (&input[0], input.length (), "r");
153+
154+
return Lexer (nullptr, RAIIFile (string_file), nullptr);
155+
}
156+
135157
// don't allow copy semantics (for now, at least)
136158
Lexer (const Lexer &other) = delete;
137159
Lexer &operator= (const Lexer &other) = delete;

gcc/rust/rust-session-manager.cc

Lines changed: 50 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -366,9 +366,11 @@ Session::handle_option (
366366
Compile::Mangler::set_mangling (flag_rust_mangling);
367367
break;
368368

369-
case OPT_frust_cfg_:
370-
ret = handle_cfg_option (std::string (arg));
371-
break;
369+
case OPT_frust_cfg_: {
370+
auto string_arg = std::string (arg);
371+
ret = handle_cfg_option (string_arg);
372+
break;
373+
}
372374

373375
default:
374376
break;
@@ -378,7 +380,7 @@ Session::handle_option (
378380
}
379381

380382
bool
381-
Session::handle_cfg_option (const std::string &input)
383+
Session::handle_cfg_option (std::string &input)
382384
{
383385
std::string key;
384386
std::string value;
@@ -402,8 +404,8 @@ Session::handle_cfg_option (const std::string &input)
402404
return true;
403405
}
404406

405-
/* Enables a certain dump depending on the name passed in. Returns true if name
406-
* is valid, false otherwise. */
407+
/* Enables a certain dump depending on the name passed in. Returns true if
408+
* name is valid, false otherwise. */
407409
bool
408410
Session::enable_dump (std::string arg)
409411
{
@@ -532,8 +534,8 @@ Session::parse_file (const char *filename)
532534
* line into crate root)
533535
* - injection (some lint checks or dummy, register builtin macros, crate
534536
* injection)
535-
* - expansion (expands all macros, maybe build test harness, AST validation,
536-
* maybe macro crate)
537+
* - expansion (expands all macros, maybe build test harness, AST
538+
* validation, maybe macro crate)
537539
* - resolution (name resolution, type resolution, maybe feature checking,
538540
* maybe buffered lints)
539541
* TODO not done */
@@ -603,8 +605,8 @@ Session::parse_file (const char *filename)
603605
if (saw_errors ())
604606
return;
605607

606-
// scan unused has to be done after type resolution since methods are resolved
607-
// at that point
608+
// scan unused has to be done after type resolution since methods are
609+
// resolved at that point
608610
Resolver::ScanUnused::Scan ();
609611

610612
if (saw_errors ())
@@ -638,11 +640,11 @@ Session::debug_dump_load_crates (Parser<Lexer> &parser)
638640

639641
/* TODO: search through inner attrs and see whether any of those attr paths
640642
* contain "no_core", "no_std", "compiler_builtins". If so/not, save certain
641-
* crate names. In these names, insert items at beginning of crate items. This
642-
* is crate injection. Also, inject prelude use decl at beginning (first name
643-
* is assumed to be prelude - prelude is a use decl automatically generated to
644-
* enable using Option and Copy without qualifying it or importing it via
645-
* 'use' manually) */
643+
* crate names. In these names, insert items at beginning of crate items.
644+
* This is crate injection. Also, inject prelude use decl at beginning
645+
* (first name is assumed to be prelude - prelude is a use decl
646+
* automatically generated to enable using Option and Copy without
647+
* qualifying it or importing it via 'use' manually) */
646648

647649
std::vector<std::string> crate_names;
648650
for (const auto &item : crate.items)
@@ -695,8 +697,8 @@ Session::injection (AST::Crate &crate)
695697

696698
// register builtin macros
697699
/* In rustc, builtin macros are divided into 3 categories depending on use -
698-
* "bang" macros, "attr" macros, and "derive" macros. I think the meanings of
699-
* these categories should be fairly obvious to anyone who has used rust.
700+
* "bang" macros, "attr" macros, and "derive" macros. I think the meanings
701+
* of these categories should be fairly obvious to anyone who has used rust.
700702
* Builtin macro list by category: Bang
701703
* - asm
702704
* - assert
@@ -739,8 +741,8 @@ Session::injection (AST::Crate &crate)
739741
* rustc also has a "quote" macro that is defined differently and is
740742
* supposedly not stable so eh. */
741743
/* TODO: actually implement injection of these macros. In particular, derive
742-
* macros, cfg, and test should be prioritised since they seem to be used the
743-
* most. */
744+
* macros, cfg, and test should be prioritised since they seem to be used
745+
* the most. */
744746

745747
// crate injection
746748
std::vector<std::string> names;
@@ -804,11 +806,11 @@ Session::injection (AST::Crate &crate)
804806
crate.items.insert (crate.items.begin (), std::move (use_decl));
805807

806808
/* TODO: potentially add checking attribute crate type? I can't figure out
807-
* what this does currently comment says "Unconditionally collect crate types
808-
* from attributes to make them used", which presumably refers to checking the
809-
* linkage info by "crate_type". It also seems to ensure that an invalid crate
810-
* type is not specified, so maybe just do that. Valid crate types: bin lib
811-
* dylib staticlib cdylib rlib proc-macro */
809+
* what this does currently comment says "Unconditionally collect crate
810+
* types from attributes to make them used", which presumably refers to
811+
* checking the linkage info by "crate_type". It also seems to ensure that
812+
* an invalid crate type is not specified, so maybe just do that. Valid
813+
* crate types: bin lib dylib staticlib cdylib rlib proc-macro */
812814

813815
rust_debug ("finished injection");
814816
}
@@ -818,8 +820,8 @@ Session::expansion (AST::Crate &crate)
818820
{
819821
rust_debug ("started expansion");
820822

821-
/* rustc has a modification to windows PATH temporarily here, which may end up
822-
* being required */
823+
/* rustc has a modification to windows PATH temporarily here, which may end
824+
* up being required */
823825

824826
// create macro expansion config?
825827
// if not, would at least have to configure recursion_limit
@@ -1036,10 +1038,10 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature)
10361038
* [types/values] or absolute paths)
10371039
* - HIR lower (convert modified AST to simpler HIR [both expressions and
10381040
* module tree])
1039-
* - resolve type aliases (replace any usages of type aliases with actual type
1040-
* [except associated types])
1041-
* - resolve bind (iterate HIR tree and set binding annotations on all concrete
1042-
* types [avoids path lookups later])
1041+
* - resolve type aliases (replace any usages of type aliases with actual
1042+
* type [except associated types])
1043+
* - resolve bind (iterate HIR tree and set binding annotations on all
1044+
* concrete types [avoids path lookups later])
10431045
* - resolve HIR markings (generate "markings" [e.g. for Copy/Send/Sync/...]
10441046
* for all types
10451047
* - sort impls (small pass - sort impls into groups)
@@ -1059,8 +1061,8 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature)
10591061
* function calls)
10601062
* - expand HIR reborrows (apply reborrow rules [taking '&mut *v' instead of
10611063
* 'v'])
1062-
* - expand HIR erasedtype (replace all erased types 'impl Trait' with the true
1063-
* type)
1064+
* - expand HIR erasedtype (replace all erased types 'impl Trait' with the
1065+
* true type)
10641066
* - typecheck expressions (validate - double check that previous passes
10651067
* haven't broke type system rules)
10661068
* - lower MIR (convert HIR exprs into a control-flow graph [MIR])
@@ -1071,24 +1073,25 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature)
10711073
* - MIR optimise (perform various simple optimisations on the MIR - constant
10721074
* propagation, dead code elimination, borrow elimination, some inlining)
10731075
* - MIR validate PO (re-validate the MIR)
1074-
* - MIR validate full (optionally: perform expensive state-tracking validation
1075-
* on MIR)
1076-
* - trans enumerate (enumerate all items needed for code generation, primarily
1077-
* types used for generics)
1078-
* - trans auto impls (create magic trait impls as enumerated in previous pass)
1076+
* - MIR validate full (optionally: perform expensive state-tracking
1077+
* validation on MIR)
1078+
* - trans enumerate (enumerate all items needed for code generation,
1079+
* primarily types used for generics)
1080+
* - trans auto impls (create magic trait impls as enumerated in previous
1081+
* pass)
10791082
* - trans monomorph (generate monomorphised copies of all functions [with
10801083
* generics replaced with real types])
1081-
* - MIR optimise inline (run optimisation again, this time with full type info
1082-
* [primarily for inlining])
1084+
* - MIR optimise inline (run optimisation again, this time with full type
1085+
* info [primarily for inlining])
10831086
* - HIR serialise (write out HIR dump [module tree and generic/inline MIR])
10841087
* - trans codegen (generate final output file: emit C source file and call C
10851088
* compiler) */
10861089

10871090
/* rustc compile pipeline (basic, in way less detail):
10881091
* - parse input (parse .rs to AST)
10891092
* - name resolution, macro expansion, and configuration (process AST
1090-
* recursively, resolving paths, expanding macros, processing #[cfg] nodes [i.e.
1091-
* maybe stripping stuff from AST])
1093+
* recursively, resolving paths, expanding macros, processing #[cfg] nodes
1094+
* [i.e. maybe stripping stuff from AST])
10921095
* - lower to HIR
10931096
* - type check and other analyses (e.g. privacy checking)
10941097
* - lower to MIR and post-processing (and do stuff like borrow checking)
@@ -1100,14 +1103,14 @@ TargetOptions::enable_implicit_feature_reqs (std::string feature)
11001103
* - register plugins (attributes injection, set various options, register
11011104
* lints, load plugins)
11021105
* - expansion/configure and expand (initial 'cfg' processing, 'loading
1103-
* compiler plugins', syntax expansion, secondary 'cfg' expansion, synthesis of
1104-
* a test harness if required, injection of any std lib dependency and prelude,
1105-
* and name resolution) - actually documented inline
1106+
* compiler plugins', syntax expansion, secondary 'cfg' expansion, synthesis
1107+
* of a test harness if required, injection of any std lib dependency and
1108+
* prelude, and name resolution) - actually documented inline
11061109
* - seeming pierced-together order: pre-AST expansion lint checks,
11071110
* registering builtin macros, crate injection, then expand all macros, then
1108-
* maybe build test harness, AST validation, maybe create a macro crate (if not
1109-
* rustdoc), name resolution, complete gated feature checking, add all buffered
1110-
* lints
1111+
* maybe build test harness, AST validation, maybe create a macro crate (if
1112+
* not rustdoc), name resolution, complete gated feature checking, add all
1113+
* buffered lints
11111114
* - create global context (lower to HIR)
11121115
* - analysis on global context (HIR optimisations? create MIR?)
11131116
* - code generation

gcc/rust/rust-session-manager.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ struct Session
288288
void expansion (AST::Crate &crate);
289289

290290
// handle cfg_option
291-
bool handle_cfg_option (const std::string &data);
291+
bool handle_cfg_option (std::string &data);
292292
};
293293
} // namespace Rust
294294

gcc/rust/util/rust-hir-map.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,10 @@ NodeId
139139
Mappings::get_next_node_id (CrateNum crateNum)
140140
{
141141
auto it = nodeIdIter.find (crateNum);
142-
rust_assert (it != nodeIdIter.end ());
142+
// We're probably *not* parsing actual rust code... but mostly reusing
143+
// the parser in another way. Return 0
144+
if (it == nodeIdIter.end ())
145+
return 0;
143146

144147
auto id = it->second + 1;
145148
nodeIdIter[crateNum] = id;

0 commit comments

Comments
 (0)