utils: add functions for uint/int and sign conversions

bettio · bettio · commit 774cb3290d7b · 2025-04-29T13:38:13.000+02:00
Add functions that do not rely on undefined behavior for converting
unsigned to signed negative integers (and viceversa), for checking if conversion
overflows and for conditionally negate.

Start using newly introduced utilities in both intn and externalterm
(an old macro is removed).

Signed-off-by: Davide Bettio &lt;davide@uninstall.it&gt;
diff --git a/src/libAtomVM/externalterm.c b/src/libAtomVM/externalterm.c
@@ -59,10 +59,6 @@
 #define MAP_EXT_BASE_SIZE 5
 #define SMALL_ATOM_EXT_BASE_SIZE 2
 
-// Assuming two's-complement implementation of signed integers
-#define REMOVE_SIGN(val, unsigned_type)                                                            \
-    ((val) < 0 ? ~((unsigned_type) (val)) + 1 : (unsigned_type) (val))
-
 // MAINTENANCE NOTE.  Range checking on the external term buffer is only performed in
 // the calculate_heap_usage function, which will fail with an invalid term if there is
 // insufficient space in the external term buffer (preventing reading off the end of the
@@ -250,12 +246,13 @@ static int serialize_term(uint8_t *buf, term t, GlobalContext *glb)
             }
             return INTEGER_EXT_SIZE;
         } else {
-            avm_uint64_t unsigned_val = REMOVE_SIGN(val, avm_uint64_t);
+            bool is_negative;
+            avm_uint64_t unsigned_val = int64_safe_unsigned_abs_set_flag(val, &is_negative);
             uint8_t num_bytes = get_num_bytes(unsigned_val);
             if (buf != NULL) {
                 buf[0] = SMALL_BIG_EXT;
                 buf[1] = num_bytes;
-                buf[2] = val < 0 ? 0x01 : 0x00;
+                buf[2] = is_negative ? 0x01 : 0x00;
                 write_bytes(buf + 3, unsigned_val);
             }
             return SMALL_BIG_EXT_BASE_SIZE + num_bytes;
@@ -439,13 +436,9 @@ static term parse_external_terms(const uint8_t *external_term_buf, size_t *eterm
             // NB due to call to calculate_heap_usage, there is no loss of precision:
             // 1. 0 <= unsigned_value <= INT64_MAX if sign is 0
             // 2. 0 <= unsigned_value <= INT64_MAX + 1 if sign is not 0
-            avm_int64_t value = 0;
-            if (sign != 0x00) {
-                value = -((avm_int64_t) unsigned_value);
-            } else {
-                value = (avm_int64_t) unsigned_value;
-            }
+            avm_int64_t value = int64_cond_neg_unsigned(sign != 0x00, unsigned_value);
             *eterm_size = SMALL_BIG_EXT_BASE_SIZE + num_bytes;
+
             return term_make_maybe_boxed_int64(value, heap);
         }
 
@@ -700,12 +693,7 @@ static int calculate_heap_usage(const uint8_t *external_term_buf, size_t remaini
             }
             // Compute the size with the sign as -2^27 or -2^59 can be encoded
             // on 1 term while 2^27 and 2^59 respectively (32/64 bits) cannot.
-            avm_int64_t value = 0;
-            if (sign != 0x00) {
-                value = -((avm_int64_t) unsigned_value);
-            } else {
-                value = (avm_int64_t) unsigned_value;
-            }
+            avm_int64_t value = int64_cond_neg_unsigned(sign != 0x00, unsigned_value);
             return term_boxed_integer_size(value);
         }
 
diff --git a/src/libAtomVM/intn.h b/src/libAtomVM/intn.h
@@ -77,16 +77,8 @@ static inline void intn_copy(
     memset(out + num_len, 0, (extend_to - num_len) * sizeof(intn_digit_t));
 }
 
-static inline void int64_to_intn_2(int64_t i64, uint32_t out[], intn_integer_sign_t *out_sign)
+static inline void intn_u64_to_digits(uint64_t absu64, uint32_t out[])
 {
-    uint64_t absu64;
-    if (i64 < 0) {
-        absu64 = -i64;
-        *out_sign = IntNNegativeInteger;
-    } else {
-        absu64 = i64;
-        *out_sign = IntNPositiveInteger;
-    }
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     memcpy(out, &absu64, sizeof(absu64));
 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
@@ -98,24 +90,39 @@ static inline void int64_to_intn_2(int64_t i64, uint32_t out[], intn_integer_sig
 #endif
 }
 
+static inline void int64_to_intn_2(int64_t i64, uint32_t out[], intn_integer_sign_t *out_sign)
+{
+    bool is_negative;
+    uint64_t absu64 = int64_safe_unsigned_abs_set_flag(i64, &is_negative);
+    *out_sign = is_negative ? IntNNegativeInteger : IntNPositiveInteger;
+    intn_u64_to_digits(absu64, out);
+}
+
+static inline uint64_t intn_digits_to_u64(const intn_digit_t num[])
+{
+    uint64_t utmp;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    memcpy(&utmp, num, sizeof(uint64_t));
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    utmp = (((uint64_t) num[1] << 32) | (uint64_t) num[0]);
+#else
+#error "Unsupported endianess"
+#endif
+
+    return utmp;
+}
+
 static inline int64_t intn_2_digits_to_int64(
     const intn_digit_t num[], size_t len, intn_integer_sign_t sign)
 {
     switch (len) {
         case 0:
             return 0;
         case 1:
-            return (sign == IntNPositiveInteger) ? (int32_t) num[0] : -((int32_t) num[0]);
+            return int32_cond_neg_unsigned(sign == IntNNegativeInteger, num[0]);
         case 2: {
-            int64_t ret;
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-            memcpy(&ret, num, sizeof(int64_t));
-#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-            ret = (((uint64_t) num[1] << 32) | (uint64_t) num[0]);
-#else
-#error "Unsupported endianess"
-#endif
-            return (sign == IntNPositiveInteger) ? ret : -ret;
+            uint64_t utmp = intn_digits_to_u64(num);
+            return int64_cond_neg_unsigned(sign == IntNNegativeInteger, utmp);
         }
         default:
             UNREACHABLE();
@@ -127,12 +134,8 @@ static inline bool intn_fits_int64(const intn_digit_t num[], size_t len, intn_in
     if (len < INTN_INT64_LEN) {
         return true;
     } else if (len == INTN_INT64_LEN) {
-        uint64_t u64 = (((uint64_t) num[1]) << 32) | (num[0]);
-        if (sign == IntNPositiveInteger) {
-            return u64 <= ((uint64_t) INT64_MAX);
-        } else {
-            return u64 <= ((uint64_t) INT64_MAX) + 1;
-        }
+        uint64_t u64 = intn_digits_to_u64(num);
+        return !uint64_does_overflow_int64(u64, sign == IntNNegativeInteger);
     }
     return false;
 }
diff --git a/src/libAtomVM/utils.c b/src/libAtomVM/utils.c
@@ -207,17 +207,6 @@ size_t int64_write_to_ascii_buf(int64_t n, unsigned int base, char *out_end)
 
 #endif
 
-static inline int64_t int64_safe_neg_unsigned(uint64_t u64)
-{
-    return (-((int64_t) (u64 - 1)) - 1);
-}
-
-static inline int64_t uint64_does_overflow_int64(uint64_t val, bool is_negative)
-{
-    return ((is_negative && (val > ((uint64_t) INT64_MAX) + 1))
-        || (!is_negative && (val > ((uint64_t) INT64_MAX))));
-}
-
 static inline bool is_base_10_digit(char c)
 {
     return (c >= '0') && (c <= '9');
@@ -265,7 +254,7 @@ static int buf10_to_int64(
         utmp /= 10;
         pos--;
     }
-    *out = is_negative ? int64_safe_neg_unsigned(utmp) : (int64_t) utmp;
+    *out = int64_cond_neg_unsigned(is_negative, utmp);
     return pos;
 
 #elif INTPTR_MAX == INT32_MAX
@@ -379,7 +368,7 @@ static int buf16_to_int64(
         utmp >>= 4;
         pos--;
     }
-    *out = is_negative ? int64_safe_neg_unsigned(utmp) : (int64_t) utmp;
+    *out = int64_cond_neg_unsigned(is_negative, utmp);
     return pos;
 
 #elif INTPTR_MAX == INT32_MAX
@@ -407,7 +396,7 @@ static int buf16_to_int64(
         pos--;
     }
     // this trick is useful to avoid any intermediate undefined/overflow
-    *out = is_negative ? int64_safe_neg_unsigned(combined) : (int64_t) combined;
+    *out = int64_cond_neg_unsigned(is_negative, combined);
 
     return pos;
 #else
diff --git a/src/libAtomVM/utils.h b/src/libAtomVM/utils.h
@@ -29,6 +29,7 @@
 #define _UTILS_H_
 
 #include <inttypes.h>
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -348,6 +349,70 @@ static inline __attribute__((always_inline)) func_ptr_t cast_void_to_func_ptr(vo
 #define ASSUME(...)
 #endif
 
+static inline int32_t int32_neg_unsigned(uint32_t u32)
+{
+    return (UINT32_C(0) - u32);
+}
+
+static inline int64_t int64_neg_unsigned(uint64_t u64)
+{
+    return (UINT64_C(0) - u64);
+}
+
+static inline int32_t int32_cond_neg_unsigned(bool negative, uint32_t u32)
+{
+    return negative ? int32_neg_unsigned(u32) : (int32_t) u32;
+}
+
+static inline int64_t int64_cond_neg_unsigned(bool negative, uint64_t u64)
+{
+    return negative ? int64_neg_unsigned(u64) : (int64_t) u64;
+}
+
+static inline bool uint32_does_overflow_int32(uint32_t u32, bool is_negative)
+{
+    return ((is_negative && (u32 > ((uint32_t) INT32_MAX) + 1))
+        || (!is_negative && (u32 > ((uint32_t) INT32_MAX))));
+}
+
+static inline bool uint64_does_overflow_int64(uint64_t u64, bool is_negative)
+{
+    return ((is_negative && (u64 > ((uint64_t) INT64_MAX) + 1))
+        || (!is_negative && (u64 > ((uint64_t) INT64_MAX))));
+}
+
+static inline uint32_t int32_safe_unsigned_abs(int32_t i32)
+{
+    return (i32 < 0) ? ((uint32_t) - (i32 + 1)) + 1 : (uint32_t) i32;
+}
+
+static inline uint64_t int64_safe_unsigned_abs(int64_t i64)
+{
+    return (i64 < 0) ? ((uint64_t) - (i64 + 1)) + 1 : (uint64_t) i64;
+}
+
+static inline bool int32_is_negative(int32_t i32)
+{
+    return ((uint32_t) i32) >> 31;
+}
+
+static inline bool int64_is_negative(int64_t i64)
+{
+    return ((uint64_t) i64) >> 63;
+}
+
+static inline uint32_t int32_safe_unsigned_abs_set_flag(int32_t i32, bool *is_negative)
+{
+    *is_negative = int32_is_negative(i32);
+    return int32_safe_unsigned_abs(i32);
+}
+
+static inline uint64_t int64_safe_unsigned_abs_set_flag(int64_t i64, bool *is_negative)
+{
+    *is_negative = int64_is_negative(i64);
+    return int64_safe_unsigned_abs(i64);
+}
+
 #if INTPTR_MAX <= INT32_MAX
 #define INTPTR_WRITE_TO_ASCII_BUF_LEN (32 + 1)
 #elif INTPTR_MAX <= INT64_MAX