From 103abf5517dde2d57d1e14c2bbaf48af360b289c Mon Sep 17 00:00:00 2001
From: Allan Boll <allanbo@microsoft.com>
Date: Thu, 2 Nov 2017 01:14:50 +0000
Subject: [PATCH 1/2] Unicode chars without an ASCII mapping should not default
 to whatever random byte is the lowest in the unicode code point

---
 src/actions/transformations/url_decode_uni.cc | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)
 mode change 100644 => 100755 src/actions/transformations/url_decode_uni.cc

diff --git a/src/actions/transformations/url_decode_uni.cc b/src/actions/transformations/url_decode_uni.cc
old mode 100644
new mode 100755
index 894eaf6c75..d41fab77a5
--- a/src/actions/transformations/url_decode_uni.cc
+++ b/src/actions/transformations/url_decode_uni.cc
@@ -75,7 +75,7 @@ int UrlDecodeUni::inplace(unsigned char *input, uint64_t input_len,
         if (input[i] == '%') {
             if ((i + 1 < input_len) &&
                 ((input[i + 1] == 'u') || (input[i + 1] == 'U'))) {
-            /* Character is a percent sign. */
+                /* Character is a percent sign. */
                 /* IIS-specific %u encoding. */
                 if (i + 5 < input_len) {
                     /* We have at least 4 data bytes. */
@@ -113,19 +113,9 @@ int UrlDecodeUni::inplace(unsigned char *input, uint64_t input_len,
                         if (hmap != -1)  {
                             *d = hmap;
                         } else {
-                            /* We first make use of the lower byte here,
-                             * ignoring the higher byte. */
-                            *d = utils::string::x2c(&input[i + 4]);
-
-                            /* Full width ASCII (ff01 - ff5e)
-                             * needs 0x20 added */
-                            if ((*d > 0x00) && (*d < 0x5f)
-                                    && ((input[i + 2] == 'f')
-                                    || (input[i + 2] == 'F'))
-                                    && ((input[i + 3] == 'f')
-                                    || (input[i + 3] == 'F'))) {
-                                (*d) += 0x20;
-                            }
+                            /* There was no ASCII character to map this unicode character to. */
+                            /* Put a placeholder that is hopefully as innocent as the unicode character. */
+                            *d = 'x';
                         }
                         d++;
                         count++;

From 0e5129c43f2db18ca7dceea866be1ae23cb9cc03 Mon Sep 17 00:00:00 2001
From: Allan Boll <allanbo@microsoft.com>
Date: Fri, 3 Nov 2017 08:38:58 +0000
Subject: [PATCH 2/2] Fix unicode fallback in jsdecode and add back full width
 handling

---
 src/actions/transformations/js_decode.cc      | 19 +++++++++++++------
 src/actions/transformations/url_decode_uni.cc | 19 ++++++++++++++++---
 2 files changed, 29 insertions(+), 9 deletions(-)
 mode change 100644 => 100755 src/actions/transformations/js_decode.cc

diff --git a/src/actions/transformations/js_decode.cc b/src/actions/transformations/js_decode.cc
old mode 100644
new mode 100755
index be8fce45b7..e9742aee96
--- a/src/actions/transformations/js_decode.cc
+++ b/src/actions/transformations/js_decode.cc
@@ -71,14 +71,21 @@ int JsDecode::inplace(unsigned char *input, uint64_t input_len) {
                 && (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5]))) {
                 /* \uHHHH */
 
-                /* Use only the lower byte. */
-                *d = utils::string::x2c(&input[i + 4]);
+                unsigned char lowestByte = utils::string::x2c(&input[i + 4]);
 
-                /* Full width ASCII (ff01 - ff5e) needs 0x20 added */
-                if ((*d > 0x00) && (*d < 0x5f)
+                if ((lowestByte > 0x00) && (lowestByte < 0x5f)
                     && ((input[i + 2] == 'f') || (input[i + 2] == 'F'))
-                    && ((input[i + 3] == 'f') || (input[i + 3] == 'F'))) {
-                    (*d) += 0x20;
+                    && ((input[i + 3] == 'f') || (input[i + 3] == 'F')))
+                {
+                    /* Full width ASCII (ff01 - ff5e) needs 0x20 added. */
+                    /* This is because the first printable char in ASCII is 0x20, and corresponds to 0xFF00. */
+                    *d = lowestByte + 0x20;
+                }
+                else
+                {
+                    /* There was no good ASCII character to map this unicode character to. */
+                    /* Put a placeholder that is hopefully as innocent as the unicode character. */
+                    *d = 'x';
                 }
 
                 d++;
diff --git a/src/actions/transformations/url_decode_uni.cc b/src/actions/transformations/url_decode_uni.cc
index d41fab77a5..c268afee6f 100755
--- a/src/actions/transformations/url_decode_uni.cc
+++ b/src/actions/transformations/url_decode_uni.cc
@@ -113,9 +113,22 @@ int UrlDecodeUni::inplace(unsigned char *input, uint64_t input_len,
                         if (hmap != -1)  {
                             *d = hmap;
                         } else {
-                            /* There was no ASCII character to map this unicode character to. */
-                            /* Put a placeholder that is hopefully as innocent as the unicode character. */
-                            *d = 'x';
+                             unsigned char lowestByte = utils::string::x2c(&input[i + 4]);
+
+                            if ((lowestByte > 0x00) && (lowestByte < 0x5f)
+                                && ((input[i + 2] == 'f') || (input[i + 2] == 'F'))
+                                && ((input[i + 3] == 'f') || (input[i + 3] == 'F')))
+                            {
+                                /* Full width ASCII (ff01 - ff5e) needs 0x20 added. */
+                                /* This is because the first printable char in ASCII is 0x20, and corresponds to 0xFF00. */
+                                *d = lowestByte + 0x20;
+                            }
+                            else
+                            {
+                                /* There was no good ASCII character to map this unicode character to. */
+                                /* Put a placeholder that is hopefully as innocent as the unicode character. */
+                                *d = 'x';
+                            }
                         }
                         d++;
                         count++;