26
26
MessageField ,
27
27
Option ,
28
28
Proto ,
29
+ Reference ,
29
30
Scope ,
30
31
Type ,
31
32
)
@@ -189,12 +190,9 @@ def util_parse_sequence(self, p: P) -> None:
189
190
p [0 ] = []
190
191
191
192
def copy_p_tracking (self , p : P , from_ : int = 1 , to : int = 0 ) -> None :
192
- """Don't know why P's tracking info (lexpos and lineno) sometimes missing.
193
- Particular in recursion grammar situation. We have to copy it manually.
194
-
195
- Add this function in a p_xxx function when:
196
- 1. the p[0] is gona to be used in another parsing target.
197
- 2. and the tracking information is gona to be used there.
193
+ """
194
+ Ply's position tracking works only for lexing SYMBOLS (not for all grammer symbols) by default.
195
+ We either enable parse(tracking=True), or copy them on need manually.
198
196
"""
199
197
p .set_lexpos (to , p .lexpos (from_ ))
200
198
p .set_lineno (to , p .lineno (from_ ))
@@ -213,13 +211,20 @@ def p_open_global_scope(self, p: P) -> None:
213
211
filepath = self .current_filepath (),
214
212
_bound = None ,
215
213
scope_stack = self .current_scope_stack (),
214
+ scope_start_lineno = 1 ,
215
+ scope_start_col = 1 ,
216
216
)
217
217
self .push_scope (proto )
218
218
219
219
@override_docstring (r_close_global_scope )
220
220
def p_close_global_scope (self , p : P ) -> None :
221
221
scope = self .pop_scope ()
222
222
proto = cast_or_raise (Proto , scope )
223
+
224
+ proto .scope_end_lineno = p .lexer .lexdata .count ("\n " ) # FIXME: slow?
225
+ lexpos = len (p .lexer .lexdata )
226
+ proto .scope_end_col = lexpos - p .lexer .lexdata .rfind ("\n " , 0 , lexpos )
227
+
223
228
if not proto .name :
224
229
raise ProtoNameUndefined (filepath = self .current_filepath ())
225
230
proto .freeze ()
@@ -334,6 +339,7 @@ def p_option(self, p: P) -> None:
334
339
filepath = self .current_filepath (),
335
340
lineno = p .lineno (2 ),
336
341
token = p [2 ],
342
+ token_col_start = self ._get_col (p , 2 ),
337
343
)
338
344
self .current_scope ().push_member (option )
339
345
@@ -357,6 +363,7 @@ def p_alias(self, p: P) -> None:
357
363
filepath = self .current_filepath (),
358
364
lineno = lineno ,
359
365
token = token ,
366
+ token_col_start = self ._get_col (p , 2 ) if len (p ) == 6 else self ._get_col (p , 3 ),
360
367
indent = self .current_indent (p ),
361
368
scope_stack = self .current_scope_stack (),
362
369
comment_block = self .collect_comment_block (),
@@ -384,6 +391,7 @@ def p_const(self, p: P) -> None:
384
391
_bound = self .current_proto (),
385
392
filepath = self .current_filepath (),
386
393
token = p [2 ],
394
+ token_col_start = self ._get_col (p , 2 ),
387
395
lineno = p .lineno (2 ),
388
396
)
389
397
self .current_scope ().push_member (constant )
@@ -466,6 +474,15 @@ def p_constant_reference(self, p: P) -> None:
466
474
p [0 ] = d
467
475
self .copy_p_tracking (p )
468
476
477
+ reference = Reference (
478
+ token = p [1 ],
479
+ lineno = p .lineno (1 ),
480
+ token_col_start = self ._get_col (p , 1 ),
481
+ filepath = self .current_filepath (),
482
+ referenced_definition = d ,
483
+ )
484
+ self .current_proto ().references .append (reference )
485
+
469
486
@override_docstring (r_type )
470
487
def p_type (self , p : P ) -> None :
471
488
p [0 ] = p [1 ]
@@ -498,9 +515,19 @@ def p_type_reference(self, p: P) -> None:
498
515
token = p [1 ],
499
516
lineno = p .lineno (1 ),
500
517
)
518
+
501
519
p [0 ] = d
502
520
self .copy_p_tracking (p )
503
521
522
+ reference = Reference (
523
+ token = p [1 ],
524
+ lineno = p .lineno (1 ),
525
+ token_col_start = self ._get_col (p , 1 ),
526
+ filepath = self .current_filepath (),
527
+ referenced_definition = d ,
528
+ )
529
+ self .current_proto ().references .append (reference )
530
+
504
531
@override_docstring (r_optional_extensible_flag )
505
532
def p_optional_extensible_flag (self , p : P ) -> None :
506
533
extensible = len (p ) == 2
@@ -517,6 +544,7 @@ def p_array_type(self, p: P) -> None:
517
544
cap = p [3 ],
518
545
extensible = p [5 ],
519
546
token = "{0}[{1}]" .format (p [1 ], p [3 ]),
547
+ token_col_start = self ._get_col (p , 1 ),
520
548
lineno = p .lineno (2 ),
521
549
filepath = self .current_filepath (),
522
550
)
@@ -552,12 +580,15 @@ def p_open_enum_scope(self, p: P) -> None:
552
580
name = p [2 ],
553
581
type = p [4 ],
554
582
token = p [2 ],
583
+ token_col_start = self ._get_col (p , 2 ),
555
584
lineno = p .lineno (2 ),
556
585
filepath = self .current_filepath (),
557
586
indent = self .current_indent (p ),
558
587
comment_block = self .collect_comment_block (),
559
588
scope_stack = self .current_scope_stack (),
560
589
_bound = self .current_proto (),
590
+ scope_start_lineno = p .lineno (5 ), # '{'
591
+ scope_start_col = self ._get_col (p , 5 ), # '{'
561
592
)
562
593
self .push_scope (enum )
563
594
@@ -567,7 +598,10 @@ def p_enum_scope(self, p: P) -> None:
567
598
568
599
@override_docstring (r_close_enum_scope )
569
600
def p_close_enum_scope (self , p : P ) -> None :
570
- self .pop_scope ().freeze ()
601
+ enum = self .pop_scope ()
602
+ enum .scope_end_lineno = p .lineno (1 )
603
+ enum .scope_end_col = self ._get_col (p , 1 )
604
+ enum .freeze ()
571
605
572
606
@override_docstring (r_enum_items )
573
607
def p_enum_items (self , p : P ) -> None :
@@ -605,6 +639,7 @@ def p_enum_field(self, p: P) -> None:
605
639
name = name ,
606
640
value = value ,
607
641
token = p [1 ],
642
+ token_col_start = self ._get_col (p , 1 ),
608
643
lineno = p .lineno (1 ),
609
644
indent = self .current_indent (p ),
610
645
filepath = self .current_filepath (),
@@ -626,18 +661,24 @@ def p_open_message_scope(self, p: P) -> None:
626
661
name = p [2 ],
627
662
extensible = p [3 ],
628
663
token = p [2 ],
664
+ token_col_start = self ._get_col (p , 2 ),
629
665
lineno = p .lineno (2 ),
630
666
filepath = self .current_filepath (),
631
667
indent = self .current_indent (p ),
632
668
comment_block = self .collect_comment_block (),
633
669
scope_stack = self .current_scope_stack (),
634
670
_bound = self .current_proto (),
671
+ scope_start_lineno = p .lineno (4 ), # '{'
672
+ scope_start_col = self ._get_col (p , 4 ), # '{'
635
673
)
636
674
self .push_scope (message )
637
675
638
676
@override_docstring (r_close_message_scope )
639
677
def p_close_message_scope (self , p : P ) -> None :
640
- self .pop_scope ().freeze ()
678
+ message = self .pop_scope ()
679
+ message .scope_end_lineno = p .lineno (1 ) # '}'
680
+ message .scope_end_col = self ._get_col (p , 1 ) # '}'
681
+ message .freeze ()
641
682
642
683
@override_docstring (r_message_scope )
643
684
def p_message_scope (self , p : P ) -> None :
@@ -673,6 +714,7 @@ def p_message_field(self, p: P) -> None:
673
714
type = type ,
674
715
number = field_number ,
675
716
token = p [2 ],
717
+ token_col_start = self ._get_col (p , 2 ),
676
718
lineno = p .lineno (2 ),
677
719
filepath = self .current_filepath (),
678
720
comment_block = self .collect_comment_block (),
@@ -685,6 +727,7 @@ def p_message_field(self, p: P) -> None:
685
727
@override_docstring (r_message_field_name )
686
728
def p_message_field_name (self , p : P ) -> None :
687
729
p [0 ] = p [1 ]
730
+ self .copy_p_tracking (p ) # from 1 to 0
688
731
689
732
@override_docstring (r_boolean_literal )
690
733
def p_boolean_literal (self , p : P ) -> None :
@@ -700,7 +743,7 @@ def p_string_literal(self, p: P) -> None:
700
743
701
744
@override_docstring (r_dotted_identifier )
702
745
def p_dotted_identifier (self , p : P ) -> None :
703
- self .copy_p_tracking (p )
746
+ self .copy_p_tracking (p ) # from 1 => 0
704
747
if len (p ) == 4 :
705
748
p [0 ] = "." .join ([p [1 ], p [3 ]])
706
749
elif len (p ) == 2 :
@@ -716,6 +759,13 @@ def p_error(self, p: P) -> None:
716
759
raise GrammarError (filepath = filepath , token = p .value (1 ), lineno = p .lineno (1 ))
717
760
raise GrammarError ()
718
761
762
+ def _get_col (self , p : P , k : int ) -> int :
763
+ lexpos = p .lexpos (k )
764
+ # we dont use `last_newline_pos` here,
765
+ # because the recursive parsing may result a deeper `last_newline_pos`.
766
+ last_newline = p .lexer .lexdata .rfind ("\n " , 0 , lexpos )
767
+ return lexpos - max (last_newline , 0 )
768
+
719
769
720
770
def parse (filepath : str , traditional_mode : bool = False ) -> Proto :
721
771
"""Parse a bitproto from given filepath.
@@ -726,3 +776,14 @@ def parse(filepath: str, traditional_mode: bool = False) -> Proto:
726
776
extensible grammar is used in traditional mode.
727
777
"""
728
778
return Parser (traditional_mode = traditional_mode ).parse (filepath )
779
+
780
+
781
+ def parse_string (
782
+ content : str , traditional_mode : bool = False , filepath : str = ""
783
+ ) -> Proto :
784
+ """
785
+ Parse a bitproto from string.
786
+ """
787
+ return Parser (traditional_mode = traditional_mode ).parse_string (
788
+ content , filepath = filepath
789
+ )
0 commit comments