diff --git a/src/actions/transformations/js_decode.cc b/src/actions/transformations/js_decode.cc old mode 100644 new mode 100755 index be8fce45b7..e9742aee96 --- a/src/actions/transformations/js_decode.cc +++ b/src/actions/transformations/js_decode.cc @@ -71,14 +71,21 @@ int JsDecode::inplace(unsigned char *input, uint64_t input_len) { && (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5]))) { /* \uHHHH */ - /* Use only the lower byte. */ - *d = utils::string::x2c(&input[i + 4]); + unsigned char lowestByte = utils::string::x2c(&input[i + 4]); - /* Full width ASCII (ff01 - ff5e) needs 0x20 added */ - if ((*d > 0x00) && (*d < 0x5f) + if ((lowestByte > 0x00) && (lowestByte < 0x5f) && ((input[i + 2] == 'f') || (input[i + 2] == 'F')) - && ((input[i + 3] == 'f') || (input[i + 3] == 'F'))) { - (*d) += 0x20; + && ((input[i + 3] == 'f') || (input[i + 3] == 'F'))) + { + /* Full width ASCII (ff01 - ff5e) needs 0x20 added. */ + /* This is because the first printable char in ASCII is 0x20, and corresponds to 0xFF00. */ + *d = lowestByte + 0x20; + } + else + { + /* There was no good ASCII character to map this unicode character to. */ + /* Put a placeholder that is hopefully as innocent as the unicode character. */ + *d = 'x'; } d++; diff --git a/src/actions/transformations/url_decode_uni.cc b/src/actions/transformations/url_decode_uni.cc old mode 100644 new mode 100755 index 894eaf6c75..c268afee6f --- a/src/actions/transformations/url_decode_uni.cc +++ b/src/actions/transformations/url_decode_uni.cc @@ -75,7 +75,7 @@ int UrlDecodeUni::inplace(unsigned char *input, uint64_t input_len, if (input[i] == '%') { if ((i + 1 < input_len) && ((input[i + 1] == 'u') || (input[i + 1] == 'U'))) { - /* Character is a percent sign. */ + /* Character is a percent sign. */ /* IIS-specific %u encoding. */ if (i + 5 < input_len) { /* We have at least 4 data bytes. */ @@ -113,18 +113,21 @@ int UrlDecodeUni::inplace(unsigned char *input, uint64_t input_len, if (hmap != -1) { *d = hmap; } else { - /* We first make use of the lower byte here, - * ignoring the higher byte. */ - *d = utils::string::x2c(&input[i + 4]); - - /* Full width ASCII (ff01 - ff5e) - * needs 0x20 added */ - if ((*d > 0x00) && (*d < 0x5f) - && ((input[i + 2] == 'f') - || (input[i + 2] == 'F')) - && ((input[i + 3] == 'f') - || (input[i + 3] == 'F'))) { - (*d) += 0x20; + unsigned char lowestByte = utils::string::x2c(&input[i + 4]); + + if ((lowestByte > 0x00) && (lowestByte < 0x5f) + && ((input[i + 2] == 'f') || (input[i + 2] == 'F')) + && ((input[i + 3] == 'f') || (input[i + 3] == 'F'))) + { + /* Full width ASCII (ff01 - ff5e) needs 0x20 added. */ + /* This is because the first printable char in ASCII is 0x20, and corresponds to 0xFF00. */ + *d = lowestByte + 0x20; + } + else + { + /* There was no good ASCII character to map this unicode character to. */ + /* Put a placeholder that is hopefully as innocent as the unicode character. */ + *d = 'x'; } } d++;