[m-rev.] for review: Replace MD4 implementation with MD5.

Peter Wang novalazy at gmail.com
Thu Jun 3 15:31:18 AEST 2021


We used MD4 digests in the implementation of the `--track-flags'
feature. While there is no particular need to change, IIRC the only
reason I didn't use the more common MD5 algorithm is because I did not
know of a small portable implementation at the time.

compiler/md5.m:
compiler/Mercury.options:
    Add implementation of MD5 digest algorithm.
    The C code is public domain code, and apparently widely used.

compiler/md4.m:
    Delete this module.

compiler/libs.m:
    Include new module and delete old module.

compiler/make.m:
    Calculate MD5 digest for `.track_flags' files instead of MD4.
---
 compiler/Mercury.options |   2 +-
 compiler/libs.m          |   2 +-
 compiler/make.m          |   4 +-
 compiler/md4.m           | 274 -----------------------------
 compiler/md5.m           | 368 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 372 insertions(+), 278 deletions(-)
 delete mode 100644 compiler/md4.m
 create mode 100644 compiler/md5.m

diff --git a/compiler/Mercury.options b/compiler/Mercury.options
index d1eae1bdc..e025612fd 100644
--- a/compiler/Mercury.options
+++ b/compiler/Mercury.options
@@ -69,7 +69,7 @@ MCFLAGS-transform_hlds.table_gen          = --no-optimize-constructor-last-call
 # In these files, some imports are needed only in some grades.
 # Until unused_imports.m can avoid generating messages about these,
 # disable the pass on these files.
-MCFLAGS-libs.md4                          = --no-warn-unused-imports
+MCFLAGS-libs.md5                          = --no-warn-unused-imports
 MCFLAGS-libs.process_util                 = --no-warn-unused-imports
 
 # process_util.m uses `kill' and `struct sigaction' from <signal.h>,
diff --git a/compiler/libs.m b/compiler/libs.m
index 815a6d6d7..5ae041228 100644
--- a/compiler/libs.m
+++ b/compiler/libs.m
@@ -37,7 +37,7 @@
 :- include_module file_util.
 :- include_module graph_colour.
 :- include_module int_emu.
-:- include_module md4.
+:- include_module md5.
 :- include_module pickle.
 :- include_module uint_emu.
 
diff --git a/compiler/make.m b/compiler/make.m
index b30890067..276f05148 100644
--- a/compiler/make.m
+++ b/compiler/make.m
@@ -70,7 +70,7 @@
 :- import_module backend_libs.
 :- import_module backend_libs.compile_target_code.
 :- import_module libs.handle_options.
-:- import_module libs.md4.
+:- import_module libs.md5.
 :- import_module libs.options.
 :- import_module libs.timestamp.
 :- import_module make.build.
@@ -700,7 +700,7 @@ option_table_hash(AllOptionArgs, Hash, !IO) :-
     list.filter(include_option_in_hash(InconsequentialOptions),
         OptionList, HashOptionList),
     globals.get_opt_tuple(AllOptionArgsGlobals, OptTuple),
-    Hash = md4sum(string({HashOptionList, OptTuple})).
+    Hash = md5sum(string({HashOptionList, OptTuple})).
 
 :- pred include_option_in_hash(set(option)::in,
     pair(option, option_data)::in) is semidet.
diff --git a/compiler/md4.m b/compiler/md4.m
deleted file mode 100644
index 64026b9bf..000000000
--- a/compiler/md4.m
+++ /dev/null
...
diff --git a/compiler/md5.m b/compiler/md5.m
new file mode 100644
index 000000000..e491cc59a
--- /dev/null
+++ b/compiler/md5.m
@@ -0,0 +1,368 @@
+%---------------------------------------------------------------------------%
+% vim: ft=mercury ts=4 sw=4 et
+%---------------------------------------------------------------------------%
+% Copyright (C) 2021 The Mercury team.
+% This file may only be copied under the terms of the GNU General
+% Public License - see the file COPYING in the Mercury distribution.
+%---------------------------------------------------------------------------%
+%
+% File: md5.
+% Main author: wangp.
+%
+% This module contains an implementation of the MD5 message digest algorithm.
+%
+%---------------------------------------------------------------------------%
+
+:- module libs.md5.
+:- interface.
+
+:- func md5sum(string) = string.
+
+%---------------------------------------------------------------------------%
+%---------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module require.   % Required by non-C grades.
+
+%---------------------------------------------------------------------------%
+
+/*
+ * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
+ * MD5 Message-Digest Algorithm (RFC 1321).
+ *
+ * Homepage:
+ * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
+ *
+ * Author:
+ * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
+ *
+ * This software was written by Alexander Peslyak in 2001.  No copyright is
+ * claimed, and the software is hereby placed in the public domain.
+ * In case this attempt to disclaim copyright and place the software in the
+ * public domain is deemed null and void, then the software is
+ * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
+ * general public under the following terms:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted.
+ *
+ * There's ABSOLUTELY NO WARRANTY, express or implied.
+ *
+ * (This is a heavily cut-down "BSD license".)
+ *
+ * This differs from Colin Plumb's older public domain implementation in that
+ * no exactly 32-bit integer data type is required (any 32-bit or wider
+ * unsigned integer data type will do), there's no compile-time endianness
+ * configuration, and the function prototypes match OpenSSL's.  No code from
+ * Colin Plumb's implementation has been reused; this comment merely compares
+ * the properties of the two independent implementations.
+ *
+ * The primary goals of this implementation are portability and ease of use.
+ * It is meant to be fast, but not as fast as possible.  Some known
+ * optimizations are not included to reduce source code size and avoid
+ * compile-time configuration.
+ */
+
+:- pragma foreign_decl("C", "local", "
+/* Any 32-bit or wider unsigned integer data type will do */
+typedef MR_uint_least32_t MD5_u32plus;
+
+typedef struct {
+    MD5_u32plus lo, hi;
+    MD5_u32plus a, b, c, d;
+    unsigned char buffer[64];
+    MD5_u32plus block[16];
+} MD5_CTX;
+
+static void MD5_Init(MD5_CTX *ctx);
+static void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size);
+static void MD5_Final(unsigned char *result, MD5_CTX *ctx);
+").
+
+:- pragma foreign_code("C", "
+/*
+ * The basic MD5 functions.
+ *
+ * F and G are optimized compared to their RFC 1321 definitions for
+ * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
+ * implementation.
+ */
+#define F(x, y, z)          ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z)          ((y) ^ ((z) & ((x) ^ (y))))
+#define H(x, y, z)          (((x) ^ (y)) ^ (z))
+#define H2(x, y, z)         ((x) ^ ((y) ^ (z)))
+#define I(x, y, z)          ((y) ^ ((x) | ~(z)))
+
+/*
+ * The MD5 transformation for all four rounds.
+ */
+#define STEP(f, a, b, c, d, x, t, s) \
+    (a) += f((b), (c), (d)) + (x) + (t); \
+    (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
+    (a) += (b);
+
+/*
+ * SET reads 4 input bytes in little-endian byte order and stores them in a
+ * properly aligned word in host byte order.
+ *
+ * The check for little-endian architectures that tolerate unaligned memory
+ * accesses is just an optimization.  Nothing will break if it fails to detect
+ * a suitable architecture.
+ *
+ * Unfortunately, this optimization may be a C strict aliasing rules violation
+ * if the caller's data buffer has effective type that cannot be aliased by
+ * MD5_u32plus.  In practice, this problem may occur if these MD5 routines are
+ * inlined into a calling function, or with future and dangerously advanced
+ * link-time optimizations.  For the time being, keeping these MD5 routines in
+ * their own translation unit avoids the problem.
+ */
+#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
+#define SET(n) \
+    (*(MD5_u32plus *)&ptr[(n) * 4])
+#define GET(n) \
+    SET(n)
+#else
+#define SET(n) \
+    (ctx->block[(n)] = \
+    (MD5_u32plus)ptr[(n) * 4] | \
+    ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
+    ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
+    ((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
+#define GET(n) \
+    (ctx->block[(n)])
+#endif
+
+/*
+ * This processes one or more 64-byte data blocks, but does NOT update the bit
+ * counters.  There are no alignment requirements.
+ */
+static const void *body(MD5_CTX *ctx, const void *data, unsigned long size)
+{
+    const unsigned char *ptr;
+    MD5_u32plus a, b, c, d;
+    MD5_u32plus saved_a, saved_b, saved_c, saved_d;
+
+    ptr = (const unsigned char *)data;
+
+    a = ctx->a;
+    b = ctx->b;
+    c = ctx->c;
+    d = ctx->d;
+
+    do {
+        saved_a = a;
+        saved_b = b;
+        saved_c = c;
+        saved_d = d;
+
+        /* Round 1 */
+        STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
+        STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
+        STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
+        STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
+        STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
+        STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
+        STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
+        STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
+        STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
+        STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
+        STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
+        STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
+        STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
+        STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
+        STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
+        STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
+
+        /* Round 2 */
+        STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
+        STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
+        STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
+        STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
+        STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
+        STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
+        STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
+        STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
+        STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
+        STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
+        STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
+        STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
+        STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
+        STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
+        STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
+        STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
+
+        /* Round 3 */
+        STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
+        STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
+        STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
+        STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
+        STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
+        STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
+        STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
+        STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
+        STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
+        STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
+        STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
+        STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
+        STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
+        STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
+        STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
+        STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
+
+        /* Round 4 */
+        STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
+        STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
+        STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
+        STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
+        STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
+        STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
+        STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
+        STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
+        STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
+        STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
+        STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
+        STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
+        STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
+        STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
+        STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
+        STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
+
+        a += saved_a;
+        b += saved_b;
+        c += saved_c;
+        d += saved_d;
+
+        ptr += 64;
+    } while (size -= 64);
+
+    ctx->a = a;
+    ctx->b = b;
+    ctx->c = c;
+    ctx->d = d;
+
+    return ptr;
+}
+
+static void MD5_Init(MD5_CTX *ctx)
+{
+    ctx->a = 0x67452301;
+    ctx->b = 0xefcdab89;
+    ctx->c = 0x98badcfe;
+    ctx->d = 0x10325476;
+
+    ctx->lo = 0;
+    ctx->hi = 0;
+}
+
+static void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size)
+{
+    MD5_u32plus saved_lo;
+    unsigned long used, available;
+
+    saved_lo = ctx->lo;
+    if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
+        ctx->hi++;
+    ctx->hi += size >> 29;
+
+    used = saved_lo & 0x3f;
+
+    if (used) {
+        available = 64 - used;
+
+        if (size < available) {
+            memcpy(&ctx->buffer[used], data, size);
+            return;
+        }
+
+        memcpy(&ctx->buffer[used], data, available);
+        data = (const unsigned char *)data + available;
+        size -= available;
+        body(ctx, ctx->buffer, 64);
+    }
+
+    if (size >= 64) {
+        data = body(ctx, data, size & ~(unsigned long)0x3f);
+        size &= 0x3f;
+    }
+
+    memcpy(ctx->buffer, data, size);
+}
+
+#define OUT(dst, src) \
+    (dst)[0] = (unsigned char)(src); \
+    (dst)[1] = (unsigned char)((src) >> 8); \
+    (dst)[2] = (unsigned char)((src) >> 16); \
+    (dst)[3] = (unsigned char)((src) >> 24);
+
+static void MD5_Final(unsigned char *result, MD5_CTX *ctx)
+{
+    unsigned long used, available;
+
+    used = ctx->lo & 0x3f;
+
+    ctx->buffer[used++] = 0x80;
+
+    available = 64 - used;
+
+    if (available < 8) {
+        memset(&ctx->buffer[used], 0, available);
+        body(ctx, ctx->buffer, 64);
+        used = 0;
+        available = 64;
+    }
+
+    memset(&ctx->buffer[used], 0, available - 8);
+
+    ctx->lo <<= 3;
+    OUT(&ctx->buffer[56], ctx->lo)
+    OUT(&ctx->buffer[60], ctx->hi)
+
+    body(ctx, ctx->buffer, 64);
+
+    OUT(&result[0], ctx->a)
+    OUT(&result[4], ctx->b)
+    OUT(&result[8], ctx->c)
+    OUT(&result[12], ctx->d)
+
+    memset(ctx, 0, sizeof(*ctx));
+}
+").
+
+%---------------------------------------------------------------------------%
+
+:- pragma no_determinism_warning(md5sum/1).
+:- pragma foreign_proc("C",
+    md5sum(In::in) = (Digest::out),
+    [will_not_call_mercury, promise_pure, thread_safe, may_not_duplicate],
+"
+    MD5_CTX         ctx;
+    unsigned char   sum[16];
+    const char      hex[16] = ""0123456789abcdef"";
+    char            hexbuf[sizeof(sum) * 2 + 1];
+    char            *p;
+    size_t          i;
+
+    MD5_Init(&ctx);
+    MD5_Update(&ctx, (const unsigned char *) In, strlen(In));
+    MD5_Final(sum, &ctx);
+
+    /* Convert to hexadecimal string representation. */
+    p = hexbuf;
+    for (i = 0; i < sizeof(sum); i++) {
+        *p++ = hex[(sum[i] & 0xf0) >> 4];
+        *p++ = hex[(sum[i] & 0x0f)];
+    }
+    *p = '\\0';
+
+    MR_make_aligned_string_copy(Digest, hexbuf);
+").
+
+    % Implementation for non-C backends left as an exercise for the reader.
+    %
+md5sum(_) = _ :-
+    sorry($file, $pred).
+
+%---------------------------------------------------------------------------%
+:- end_module libs.md5.
+%---------------------------------------------------------------------------%
-- 
2.31.1



More information about the reviews mailing list