From 103abf5517dde2d57d1e14c2bbaf48af360b289c Mon Sep 17 00:00:00 2001 From: Allan Boll Date: Thu, 2 Nov 2017 01:14:50 +0000 Subject: [PATCH 1/2] Unicode chars without an ASCII mapping should not default to whatever random byte is the lowest in the unicode code point --- src/actions/transformations/url_decode_uni.cc | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) mode change 100644 => 100755 src/actions/transformations/url_decode_uni.cc diff --git a/src/actions/transformations/url_decode_uni.cc b/src/actions/transformations/url_decode_uni.cc old mode 100644 new mode 100755 index 894eaf6c75..d41fab77a5 --- a/src/actions/transformations/url_decode_uni.cc +++ b/src/actions/transformations/url_decode_uni.cc @@ -75,7 +75,7 @@ int UrlDecodeUni::inplace(unsigned char *input, uint64_t input_len, if (input[i] == '%') { if ((i + 1 < input_len) && ((input[i + 1] == 'u') || (input[i + 1] == 'U'))) { - /* Character is a percent sign. */ + /* Character is a percent sign. */ /* IIS-specific %u encoding. */ if (i + 5 < input_len) { /* We have at least 4 data bytes. */ @@ -113,19 +113,9 @@ int UrlDecodeUni::inplace(unsigned char *input, uint64_t input_len, if (hmap != -1) { *d = hmap; } else { - /* We first make use of the lower byte here, - * ignoring the higher byte. */ - *d = utils::string::x2c(&input[i + 4]); - - /* Full width ASCII (ff01 - ff5e) - * needs 0x20 added */ - if ((*d > 0x00) && (*d < 0x5f) - && ((input[i + 2] == 'f') - || (input[i + 2] == 'F')) - && ((input[i + 3] == 'f') - || (input[i + 3] == 'F'))) { - (*d) += 0x20; - } + /* There was no ASCII character to map this unicode character to. */ + /* Put a placeholder that is hopefully as innocent as the unicode character. */ + *d = 'x'; } d++; count++; From 0e5129c43f2db18ca7dceea866be1ae23cb9cc03 Mon Sep 17 00:00:00 2001 From: Allan Boll Date: Fri, 3 Nov 2017 08:38:58 +0000 Subject: [PATCH 2/2] Fix unicode fallback in jsdecode and add back full width handling --- src/actions/transformations/js_decode.cc | 19 +++++++++++++------ src/actions/transformations/url_decode_uni.cc | 19 ++++++++++++++++--- 2 files changed, 29 insertions(+), 9 deletions(-) mode change 100644 => 100755 src/actions/transformations/js_decode.cc diff --git a/src/actions/transformations/js_decode.cc b/src/actions/transformations/js_decode.cc old mode 100644 new mode 100755 index be8fce45b7..e9742aee96 --- a/src/actions/transformations/js_decode.cc +++ b/src/actions/transformations/js_decode.cc @@ -71,14 +71,21 @@ int JsDecode::inplace(unsigned char *input, uint64_t input_len) { && (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5]))) { /* \uHHHH */ - /* Use only the lower byte. */ - *d = utils::string::x2c(&input[i + 4]); + unsigned char lowestByte = utils::string::x2c(&input[i + 4]); - /* Full width ASCII (ff01 - ff5e) needs 0x20 added */ - if ((*d > 0x00) && (*d < 0x5f) + if ((lowestByte > 0x00) && (lowestByte < 0x5f) && ((input[i + 2] == 'f') || (input[i + 2] == 'F')) - && ((input[i + 3] == 'f') || (input[i + 3] == 'F'))) { - (*d) += 0x20; + && ((input[i + 3] == 'f') || (input[i + 3] == 'F'))) + { + /* Full width ASCII (ff01 - ff5e) needs 0x20 added. */ + /* This is because the first printable char in ASCII is 0x20, and corresponds to 0xFF00. */ + *d = lowestByte + 0x20; + } + else + { + /* There was no good ASCII character to map this unicode character to. */ + /* Put a placeholder that is hopefully as innocent as the unicode character. */ + *d = 'x'; } d++; diff --git a/src/actions/transformations/url_decode_uni.cc b/src/actions/transformations/url_decode_uni.cc index d41fab77a5..c268afee6f 100755 --- a/src/actions/transformations/url_decode_uni.cc +++ b/src/actions/transformations/url_decode_uni.cc @@ -113,9 +113,22 @@ int UrlDecodeUni::inplace(unsigned char *input, uint64_t input_len, if (hmap != -1) { *d = hmap; } else { - /* There was no ASCII character to map this unicode character to. */ - /* Put a placeholder that is hopefully as innocent as the unicode character. */ - *d = 'x'; + unsigned char lowestByte = utils::string::x2c(&input[i + 4]); + + if ((lowestByte > 0x00) && (lowestByte < 0x5f) + && ((input[i + 2] == 'f') || (input[i + 2] == 'F')) + && ((input[i + 3] == 'f') || (input[i + 3] == 'F'))) + { + /* Full width ASCII (ff01 - ff5e) needs 0x20 added. */ + /* This is because the first printable char in ASCII is 0x20, and corresponds to 0xFF00. */ + *d = lowestByte + 0x20; + } + else + { + /* There was no good ASCII character to map this unicode character to. */ + /* Put a placeholder that is hopefully as innocent as the unicode character. */ + *d = 'x'; + } } d++; count++;