@@ -4,77 +4,90 @@ use crate::constants::{ACTION_DELETED_FILE, ACTION_FILE};
44use crate :: core_types:: ActionType ;
55use regex:: Captures ;
66
7- /// Extracts action word and path string from HEADER_REGEX captures.
7+ /// Extracts action word and path string from HEADER_REGEX captures, ignoring trailing text.
8+ /// Relies on simplified regex capture groups and performs more parsing here.
89pub ( crate ) fn extract_action_path_from_captures ( caps : & Captures ) -> Option < ( String , String ) > {
910 let mut action_word: Option < String > = None ;
10- let mut header_path: Option < String > = None ;
11- let mut content_str: Option < & str > = None ;
11+ let mut final_path: Option < String > = None ;
1212
13- // Extract based on named capture groups
14- if let ( Some ( aw) , Some ( c) ) = ( caps. name ( "action_word_bold" ) , caps. name ( "content_bold" ) ) {
15- action_word = Some ( aw. as_str ( ) . to_string ( ) ) ;
16- content_str = Some ( c. as_str ( ) ) ;
17- } else if let ( Some ( aw) , Some ( c) ) = ( caps. name ( "action_word_hash" ) , caps. name ( "content_hash" ) ) {
18- action_word = Some ( aw. as_str ( ) . to_string ( ) ) ;
19- content_str = Some ( c. as_str ( ) ) ;
13+ // --- Determine Action Word and Raw Content/Path ---
14+
15+ // Check specific backtick path captures first (cleanest case)
16+ if let Some ( p) = caps. name ( "path_hash_backtick" ) {
17+ action_word = Some ( ACTION_FILE . to_string ( ) ) ;
18+ final_path = Some ( p. as_str ( ) . trim ( ) . to_string ( ) ) ;
2019 } else if let Some ( p) = caps. name ( "path_backtick_only" ) {
2120 action_word = Some ( ACTION_FILE . to_string ( ) ) ;
22- header_path = Some ( p. as_str ( ) . trim ( ) . to_string ( ) ) ;
21+ final_path = Some ( p. as_str ( ) . trim ( ) . to_string ( ) ) ;
2322 } else if let Some ( p) = caps. name ( "path_numbered_backtick" ) {
2423 action_word = Some ( ACTION_FILE . to_string ( ) ) ;
25- header_path = Some ( p. as_str ( ) . trim ( ) . to_string ( ) ) ;
24+ final_path = Some ( p. as_str ( ) . trim ( ) . to_string ( ) ) ;
2625 } else if let Some ( p) = caps. name ( "path_bold_backtick" ) {
2726 action_word = Some ( ACTION_FILE . to_string ( ) ) ;
28- header_path = Some ( p. as_str ( ) . trim ( ) . to_string ( ) ) ;
29- } else if let Some ( p) = caps. name ( "path_hash_backtick" ) {
30- action_word = Some ( ACTION_FILE . to_string ( ) ) ;
31- header_path = Some ( p. as_str ( ) . trim ( ) . to_string ( ) ) ;
27+ final_path = Some ( p. as_str ( ) . trim ( ) . to_string ( ) ) ;
3228 }
33-
34- // Process content_str for Bold/Hash Action formats to extract path
35- if let Some ( content) = content_str {
36- let stripped_content = content. trim ( ) ;
37- // Check if the stripped content is *only* backticks (e.g., `` ` `` or ``` `` ```)
38- // If so, treat it as an empty path.
39- let is_only_backticks = stripped_content. starts_with ( '`' )
40- && stripped_content. ends_with ( '`' )
41- && stripped_content
42- . chars ( )
43- . skip ( 1 )
44- . take ( stripped_content. len ( ) - 2 )
45- . all ( |c| c == '`' ) ;
46-
47- if is_only_backticks {
48- header_path = Some ( "" . to_string ( ) ) ; // Treat as empty path explicitly
49- } else {
50- // Prefer path inside backticks if present within the content part
51- header_path = Some (
52- if stripped_content. len ( ) > 1
53- && stripped_content. starts_with ( '`' )
54- && stripped_content. ends_with ( '`' )
55- {
56- stripped_content[ 1 ..stripped_content. len ( ) - 1 ]
57- . trim ( )
58- . to_string ( ) // Inside backticks
59- } else {
60- stripped_content. to_string ( ) // Whole content as path
61- } ,
62- ) ;
63- }
29+ // Check combined Action: content captures (need parsing)
30+ else if let ( Some ( aw) , Some ( c) ) = ( caps. name ( "action_word_bold" ) , caps. name ( "content_bold" ) ) {
31+ action_word = Some ( aw. as_str ( ) . to_string ( ) ) ;
32+ final_path = parse_content_for_path ( c. as_str ( ) ) ;
33+ } else if let ( Some ( aw) , Some ( c) ) = ( caps. name ( "action_word_hash" ) , caps. name ( "content_hash" ) ) {
34+ action_word = Some ( aw. as_str ( ) . to_string ( ) ) ;
35+ final_path = parse_content_for_path ( c. as_str ( ) ) ;
6436 }
6537
66- // Validate and return
67- match ( action_word, header_path) {
68- // Ensure the extracted path is not empty AFTER trimming potential backticks and whitespace
69- ( Some ( aw) , Some ( hp) ) => {
70- let final_path = hp. trim ( ) ; // Trim whitespace from final path string
71- if !final_path. is_empty ( ) {
72- Some ( ( aw, final_path. to_string ( ) ) )
38+ // --- Validate and Return ---
39+ match ( action_word, final_path) {
40+ // Ensure final path is not empty AFTER trimming potential backticks and whitespace
41+ ( Some ( aw) , Some ( fp) ) => {
42+ let final_trimmed_path = fp. trim ( ) ;
43+ // Add check: reject if path consists ONLY of backticks after trimming
44+ if !final_trimmed_path. is_empty ( ) && final_trimmed_path. chars ( ) . all ( |c| c == '`' ) {
45+ return None ;
46+ }
47+ if !final_trimmed_path. is_empty ( ) {
48+ Some ( ( aw, final_trimmed_path. to_string ( ) ) )
7349 } else {
7450 None
7551 }
7652 }
77- _ => None ,
53+ _ => None , // No action word, or path parsing failed/resulted in empty path
54+ }
55+ }
56+
57+ /// Parses the raw captured content string to extract the path, ignoring trailing text.
58+ fn parse_content_for_path ( raw_content : & str ) -> Option < String > {
59+ let trimmed_content = raw_content. trim ( ) ;
60+
61+ // Check for path inside backticks first
62+ if let ( Some ( start) , Some ( end) ) = ( trimmed_content. find ( '`' ) , trimmed_content. rfind ( '`' ) ) {
63+ if start < end {
64+ // Found distinct backticks, extract path from within
65+ let path_between_ticks = trimmed_content[ start + 1 ..end] . trim ( ) ;
66+ // Ensure the content BETWEEN the ticks is not empty after trimming
67+ return if path_between_ticks. is_empty ( ) {
68+ None
69+ } else {
70+ Some ( path_between_ticks. to_string ( ) )
71+ } ;
72+ }
73+ // If start >= end, backticks are malformed or nested in a way we don't handle here.
74+ // Fall through to treat as non-backticked path.
75+ }
76+
77+ // No valid backticks found, treat as non-backticked path.
78+ // Find the end of the path (before potential trailing text).
79+ // Trailing text starts with " (" or " #".
80+ let path_end_index = trimmed_content
81+ . find ( " (" )
82+ . or_else ( || trimmed_content. find ( " #" ) )
83+ . unwrap_or ( trimmed_content. len ( ) ) ; // If no marker found, path is the whole string
84+
85+ let path = trimmed_content[ ..path_end_index] . trim ( ) ;
86+
87+ if path. is_empty ( ) {
88+ None
89+ } else {
90+ Some ( path. to_string ( ) )
7891 }
7992}
8093
0 commit comments