|
| 1 | +CLASS zcl_highlighter DEFINITION |
| 2 | + PUBLIC |
| 3 | + ABSTRACT |
| 4 | + CREATE PUBLIC. |
| 5 | + |
| 6 | +************************************************************************ |
| 7 | +* Syntax Highlighter |
| 8 | +* |
| 9 | +* Copyright (c) 2014 abapGit Contributors |
| 10 | +* SPDX-License-Identifier: MIT |
| 11 | +************************************************************************ |
| 12 | + PUBLIC SECTION. |
| 13 | + |
| 14 | + CONSTANTS c_version TYPE string VALUE '1.0.0' ##NEEDED. |
| 15 | + |
| 16 | + METHODS process_line |
| 17 | + IMPORTING |
| 18 | + !iv_line TYPE string |
| 19 | + RETURNING |
| 20 | + VALUE(rv_line) TYPE string. |
| 21 | + |
| 22 | + METHODS set_hidden_chars |
| 23 | + IMPORTING |
| 24 | + !iv_hidden_chars TYPE abap_bool. |
| 25 | + |
| 26 | + PROTECTED SECTION. |
| 27 | + |
| 28 | + TYPES: |
| 29 | + BEGIN OF ty_match, |
| 30 | + token TYPE c LENGTH 1, " Type of matches |
| 31 | + offset TYPE i, " Beginning position of the string that should be formatted |
| 32 | + length TYPE i, " Length of the string that should be formatted |
| 33 | + text_tag TYPE string, " Type of text tag |
| 34 | + END OF ty_match, |
| 35 | + ty_match_tt TYPE STANDARD TABLE OF ty_match WITH DEFAULT KEY, |
| 36 | + BEGIN OF ty_rule, |
| 37 | + regex TYPE REF TO cl_abap_regex, |
| 38 | + token TYPE c LENGTH 1, |
| 39 | + style TYPE string, |
| 40 | + relevant_submatch TYPE i, |
| 41 | + END OF ty_rule. |
| 42 | + |
| 43 | + CONSTANTS c_token_none TYPE c VALUE '.'. |
| 44 | + |
| 45 | + DATA mt_rules TYPE STANDARD TABLE OF ty_rule. |
| 46 | + DATA mv_hidden_chars TYPE abap_bool. |
| 47 | + |
| 48 | + METHODS add_rule |
| 49 | + IMPORTING |
| 50 | + !iv_regex TYPE string |
| 51 | + !iv_token TYPE c |
| 52 | + !iv_style TYPE string |
| 53 | + !iv_submatch TYPE i OPTIONAL. |
| 54 | + |
| 55 | + METHODS parse_line |
| 56 | + IMPORTING |
| 57 | + !iv_line TYPE string |
| 58 | + RETURNING |
| 59 | + VALUE(rt_matches) TYPE ty_match_tt. |
| 60 | + |
| 61 | + METHODS order_matches |
| 62 | + IMPORTING |
| 63 | + !iv_line TYPE string |
| 64 | + CHANGING |
| 65 | + !ct_matches TYPE ty_match_tt. |
| 66 | + |
| 67 | + METHODS extend_matches |
| 68 | + IMPORTING |
| 69 | + !iv_line TYPE string |
| 70 | + CHANGING |
| 71 | + !ct_matches TYPE ty_match_tt. |
| 72 | + |
| 73 | + METHODS format_line |
| 74 | + IMPORTING |
| 75 | + !iv_line TYPE string |
| 76 | + !it_matches TYPE ty_match_tt |
| 77 | + RETURNING |
| 78 | + VALUE(rv_line) TYPE string. |
| 79 | + |
| 80 | + METHODS apply_style |
| 81 | + IMPORTING |
| 82 | + !iv_line TYPE string |
| 83 | + !iv_class TYPE string |
| 84 | + RETURNING |
| 85 | + VALUE(rv_line) TYPE string. |
| 86 | + |
| 87 | + METHODS is_whitespace |
| 88 | + IMPORTING |
| 89 | + !iv_string TYPE string |
| 90 | + RETURNING |
| 91 | + VALUE(rv_result) TYPE abap_bool. |
| 92 | + |
| 93 | + METHODS show_hidden_chars |
| 94 | + IMPORTING |
| 95 | + !iv_line TYPE string |
| 96 | + RETURNING |
| 97 | + VALUE(rv_line) TYPE string. |
| 98 | + |
| 99 | + PRIVATE SECTION. |
| 100 | +ENDCLASS. |
| 101 | + |
| 102 | + |
| 103 | + |
| 104 | +CLASS zcl_highlighter IMPLEMENTATION. |
| 105 | + |
| 106 | + |
| 107 | + METHOD add_rule. |
| 108 | + |
| 109 | + DATA ls_rule LIKE LINE OF mt_rules. |
| 110 | + |
| 111 | + IF NOT iv_regex IS INITIAL. |
| 112 | + CREATE OBJECT ls_rule-regex |
| 113 | + EXPORTING |
| 114 | + pattern = iv_regex |
| 115 | + ignore_case = abap_true. |
| 116 | + ENDIF. |
| 117 | + |
| 118 | + ls_rule-token = iv_token. |
| 119 | + ls_rule-style = iv_style. |
| 120 | + ls_rule-relevant_submatch = iv_submatch. |
| 121 | + APPEND ls_rule TO mt_rules. |
| 122 | + |
| 123 | + ENDMETHOD. |
| 124 | + |
| 125 | + |
| 126 | + METHOD apply_style. |
| 127 | + |
| 128 | + DATA lv_escaped TYPE string. |
| 129 | + |
| 130 | + lv_escaped = escape( val = iv_line |
| 131 | + format = cl_abap_format=>e_html_text ). |
| 132 | + |
| 133 | + lv_escaped = show_hidden_chars( lv_escaped ). |
| 134 | + |
| 135 | + IF iv_class IS NOT INITIAL. |
| 136 | + rv_line = |<span class="{ iv_class }">{ lv_escaped }</span>|. |
| 137 | + ELSE. |
| 138 | + rv_line = lv_escaped. |
| 139 | + ENDIF. |
| 140 | + |
| 141 | + ENDMETHOD. |
| 142 | + |
| 143 | + |
| 144 | + METHOD extend_matches. |
| 145 | + |
| 146 | + DATA: |
| 147 | + lv_line_len TYPE i, |
| 148 | + lv_last_pos TYPE i VALUE 0, |
| 149 | + lv_length TYPE i, |
| 150 | + ls_match TYPE ty_match. |
| 151 | + |
| 152 | + FIELD-SYMBOLS <ls_match> TYPE ty_match. |
| 153 | + |
| 154 | + lv_line_len = strlen( iv_line ). |
| 155 | + |
| 156 | + SORT ct_matches BY offset. |
| 157 | + |
| 158 | + " Add entries referring to parts of text that should not be formatted |
| 159 | + LOOP AT ct_matches ASSIGNING <ls_match>. |
| 160 | + IF <ls_match>-offset > lv_last_pos. |
| 161 | + lv_length = <ls_match>-offset - lv_last_pos. |
| 162 | + ls_match-token = c_token_none. |
| 163 | + ls_match-offset = lv_last_pos. |
| 164 | + ls_match-length = lv_length. |
| 165 | + INSERT ls_match INTO ct_matches INDEX sy-tabix. |
| 166 | + ENDIF. |
| 167 | + lv_last_pos = <ls_match>-offset + <ls_match>-length. |
| 168 | + ENDLOOP. |
| 169 | + |
| 170 | + " Add remainder of the string |
| 171 | + IF lv_line_len > lv_last_pos. |
| 172 | + lv_length = lv_line_len - lv_last_pos. |
| 173 | + ls_match-token = c_token_none. |
| 174 | + ls_match-offset = lv_last_pos. |
| 175 | + ls_match-length = lv_length. |
| 176 | + APPEND ls_match TO ct_matches. |
| 177 | + ENDIF. |
| 178 | + |
| 179 | + ENDMETHOD. |
| 180 | + |
| 181 | + |
| 182 | + METHOD format_line. |
| 183 | + |
| 184 | + DATA: |
| 185 | + lv_chunk TYPE string, |
| 186 | + ls_rule LIKE LINE OF mt_rules. |
| 187 | + |
| 188 | + FIELD-SYMBOLS <ls_match> TYPE ty_match. |
| 189 | + |
| 190 | + LOOP AT it_matches ASSIGNING <ls_match>. |
| 191 | + lv_chunk = substring( val = iv_line |
| 192 | + off = <ls_match>-offset |
| 193 | + len = <ls_match>-length ). |
| 194 | + |
| 195 | + CLEAR ls_rule. " Failed read equals no style |
| 196 | + READ TABLE mt_rules INTO ls_rule WITH KEY token = <ls_match>-token. |
| 197 | + |
| 198 | + lv_chunk = apply_style( iv_line = lv_chunk |
| 199 | + iv_class = ls_rule-style ). |
| 200 | + |
| 201 | + rv_line = rv_line && lv_chunk. |
| 202 | + ENDLOOP. |
| 203 | + |
| 204 | + ENDMETHOD. |
| 205 | + |
| 206 | + |
| 207 | + METHOD is_whitespace. |
| 208 | + |
| 209 | + DATA lv_whitespace TYPE string. |
| 210 | + |
| 211 | + "/^\s+$/ |
| 212 | + lv_whitespace = ` ` && cl_abap_char_utilities=>horizontal_tab && cl_abap_char_utilities=>cr_lf. |
| 213 | + |
| 214 | + rv_result = boolc( iv_string CO lv_whitespace ). |
| 215 | + |
| 216 | + ENDMETHOD. |
| 217 | + |
| 218 | + |
| 219 | + METHOD order_matches. |
| 220 | + ENDMETHOD. |
| 221 | + |
| 222 | + |
| 223 | + METHOD parse_line. |
| 224 | + |
| 225 | + DATA: |
| 226 | + lo_regex TYPE REF TO cl_abap_regex, |
| 227 | + lo_matcher TYPE REF TO cl_abap_matcher, |
| 228 | + lt_result TYPE match_result_tab, |
| 229 | + ls_match TYPE ty_match. |
| 230 | + |
| 231 | + FIELD-SYMBOLS: |
| 232 | + <ls_regex> LIKE LINE OF mt_rules, |
| 233 | + <ls_result> TYPE match_result, |
| 234 | + <ls_submatch> LIKE LINE OF <ls_result>-submatches. |
| 235 | + |
| 236 | + |
| 237 | + " Process syntax-dependent regex table and find all matches |
| 238 | + LOOP AT mt_rules ASSIGNING <ls_regex> WHERE regex IS BOUND. |
| 239 | + lo_regex = <ls_regex>-regex. |
| 240 | + lo_matcher = lo_regex->create_matcher( text = iv_line ). |
| 241 | + lt_result = lo_matcher->find_all( ). |
| 242 | + |
| 243 | + " Save matches into custom table with predefined tokens |
| 244 | + LOOP AT lt_result ASSIGNING <ls_result>. |
| 245 | + CLEAR: ls_match. |
| 246 | + IF <ls_regex>-relevant_submatch = 0. |
| 247 | + ls_match-token = <ls_regex>-token. |
| 248 | + ls_match-offset = <ls_result>-offset. |
| 249 | + ls_match-length = <ls_result>-length. |
| 250 | + APPEND ls_match TO rt_matches. |
| 251 | + ELSE. |
| 252 | + READ TABLE <ls_result>-submatches ASSIGNING <ls_submatch> INDEX <ls_regex>-relevant_submatch. |
| 253 | + "submatch might be empty if only discarded parts matched |
| 254 | + IF sy-subrc = 0 AND <ls_submatch>-offset >= 0 AND <ls_submatch>-length > 0. |
| 255 | + ls_match-token = <ls_regex>-token. |
| 256 | + ls_match-offset = <ls_submatch>-offset. |
| 257 | + ls_match-length = <ls_submatch>-length. |
| 258 | + APPEND ls_match TO rt_matches. |
| 259 | + ENDIF. |
| 260 | + ENDIF. |
| 261 | + ENDLOOP. |
| 262 | + ENDLOOP. |
| 263 | + |
| 264 | + ENDMETHOD. |
| 265 | + |
| 266 | + |
| 267 | + METHOD process_line. |
| 268 | + |
| 269 | + DATA lt_matches TYPE ty_match_tt. |
| 270 | + |
| 271 | + IF iv_line IS INITIAL OR is_whitespace( iv_line ) = abap_true. |
| 272 | + rv_line = show_hidden_chars( iv_line ). |
| 273 | + RETURN. |
| 274 | + ENDIF. |
| 275 | + |
| 276 | + lt_matches = parse_line( iv_line ). |
| 277 | + |
| 278 | + order_matches( EXPORTING iv_line = iv_line |
| 279 | + CHANGING ct_matches = lt_matches ). |
| 280 | + |
| 281 | + extend_matches( EXPORTING iv_line = iv_line |
| 282 | + CHANGING ct_matches = lt_matches ). |
| 283 | + |
| 284 | + rv_line = format_line( iv_line = iv_line |
| 285 | + it_matches = lt_matches ). |
| 286 | + |
| 287 | + ENDMETHOD. |
| 288 | + |
| 289 | + |
| 290 | + METHOD set_hidden_chars. |
| 291 | + mv_hidden_chars = iv_hidden_chars. |
| 292 | + ENDMETHOD. |
| 293 | + |
| 294 | + |
| 295 | + METHOD show_hidden_chars. |
| 296 | + |
| 297 | + DATA lv_bom TYPE x LENGTH 3. |
| 298 | + |
| 299 | + rv_line = iv_line. |
| 300 | + |
| 301 | + IF mv_hidden_chars = abap_true. |
| 302 | + REPLACE ALL OCCURRENCES OF cl_abap_char_utilities=>horizontal_tab IN rv_line WITH ' → '. |
| 303 | + REPLACE ALL OCCURRENCES OF cl_abap_char_utilities=>cr_lf(1) IN rv_line WITH '¶'. |
| 304 | + REPLACE ALL OCCURRENCES OF ` ` IN rv_line WITH '·'. |
| 305 | + REPLACE ALL OCCURRENCES OF cl_abap_char_utilities=>form_feed IN rv_line |
| 306 | + WITH '<span class="red">⊝</span>'. |
| 307 | + |
| 308 | + IF strlen( rv_line ) BETWEEN 1 AND 2. |
| 309 | + TRY. |
| 310 | + lv_bom = lcl_out=>convert( rv_line ). |
| 311 | + CATCH zcx_abapgit_exception ##NO_HANDLER. |
| 312 | + ENDTRY. |
| 313 | + IF lv_bom(2) = cl_abap_char_utilities=>byte_order_mark_big. |
| 314 | + rv_line = '<span class="red">▪</span>'. " UTF-16 big-endian (FE FF) |
| 315 | + ENDIF. |
| 316 | + IF lv_bom(2) = cl_abap_char_utilities=>byte_order_mark_little. |
| 317 | + rv_line = '<span class="red">∘</span>'. " UTF-16 little-endian (FF FE) |
| 318 | + ENDIF. |
| 319 | + IF lv_bom(3) = cl_abap_char_utilities=>byte_order_mark_utf8. |
| 320 | + rv_line = '<span class="red">¤</span>'. " UTF-8 (EF BB BF) |
| 321 | + ENDIF. |
| 322 | + ENDIF. |
| 323 | + ENDIF. |
| 324 | + |
| 325 | + ENDMETHOD. |
| 326 | +ENDCLASS. |
0 commit comments