@@ -45,26 +45,29 @@ def _glycan_string_to_sugars_and_bonds(
45
45
glycan_string : str ,
46
46
) -> tuple [list [str ], list [GlycosidicBond ]]:
47
47
"""Parses the glycan string to its constituent sugars and bonds."""
48
+ glycan_string = glycan_string .strip () # Remove leading/trailing spaces
48
49
sugars : list [str ] = [] # Tracks all sugars
49
50
parent_sugar_idx : list [int ] = [] # Tracks the parent sugar for bond formation
50
51
bonds : list [GlycosidicBond ] = []
51
52
open_count , closed_count = 0 , 0
52
- for i in range (len (glycan_string )):
53
+ i = 0 # We increment unevenly so manually handle
54
+ while i < len (glycan_string ):
53
55
char = glycan_string [i ]
54
- if char == " " :
56
+ if char == " " : # Space; skip
57
+ i += 1
55
58
continue
56
- if char == "(" :
59
+ if char == "(" : # Open bracket
60
+ i += 1
57
61
open_count += 1
58
62
continue
59
- if char == ")" :
63
+ if char == ")" : # Close bracket
60
64
closed_count += 1
61
65
parent_sugar_idx .pop () # Remove
66
+ i += 1
62
67
continue
68
+ # Not a bracket or a space - should be either bond info or CCD
63
69
chunk = glycan_string [i : i + 3 ]
64
- if re .match (r"[0-9A-Z]{3}" , chunk ): # Match CCD codes (3 char, alphanumeric)
65
- sugars .append (chunk )
66
- parent_sugar_idx .append (len (sugars ) - 1 ) # latest sugar
67
- elif re .match (r"[1-6]{1}-[1-6]{1}" , chunk ):
70
+ if re .match (r"[1-6]{1}-[1-6]{1}" , chunk ):
68
71
s , d = chunk .split ("-" )
69
72
assert parent_sugar_idx
70
73
bonds .append (
@@ -75,6 +78,13 @@ def _glycan_string_to_sugars_and_bonds(
75
78
dst_atom = int (d ),
76
79
)
77
80
)
81
+ i += 3
82
+ elif re .match (r"[0-9A-Z]{3}" , chunk ):
83
+ sugars .append (chunk )
84
+ parent_sugar_idx .append (len (sugars ) - 1 ) # latest sugar
85
+ i += 3
86
+ else :
87
+ raise ValueError (f"Invalid glycan string: { glycan_string } " )
78
88
assert open_count == closed_count
79
89
return sugars , bonds
80
90
0 commit comments