[m-rev.] for review: Replace MD4 implementation with MD5.
Peter Wang
novalazy at gmail.com
Thu Jun 3 15:31:18 AEST 2021
We used MD4 digests in the implementation of the `--track-flags'
feature. While there is no particular need to change, IIRC the only
reason I didn't use the more common MD5 algorithm is because I did not
know of a small portable implementation at the time.
compiler/md5.m:
compiler/Mercury.options:
Add implementation of MD5 digest algorithm.
The C code is public domain code, and apparently widely used.
compiler/md4.m:
Delete this module.
compiler/libs.m:
Include new module and delete old module.
compiler/make.m:
Calculate MD5 digest for `.track_flags' files instead of MD4.
---
compiler/Mercury.options | 2 +-
compiler/libs.m | 2 +-
compiler/make.m | 4 +-
compiler/md4.m | 274 -----------------------------
compiler/md5.m | 368 +++++++++++++++++++++++++++++++++++++++
5 files changed, 372 insertions(+), 278 deletions(-)
delete mode 100644 compiler/md4.m
create mode 100644 compiler/md5.m
diff --git a/compiler/Mercury.options b/compiler/Mercury.options
index d1eae1bdc..e025612fd 100644
--- a/compiler/Mercury.options
+++ b/compiler/Mercury.options
@@ -69,7 +69,7 @@ MCFLAGS-transform_hlds.table_gen = --no-optimize-constructor-last-call
# In these files, some imports are needed only in some grades.
# Until unused_imports.m can avoid generating messages about these,
# disable the pass on these files.
-MCFLAGS-libs.md4 = --no-warn-unused-imports
+MCFLAGS-libs.md5 = --no-warn-unused-imports
MCFLAGS-libs.process_util = --no-warn-unused-imports
# process_util.m uses `kill' and `struct sigaction' from <signal.h>,
diff --git a/compiler/libs.m b/compiler/libs.m
index 815a6d6d7..5ae041228 100644
--- a/compiler/libs.m
+++ b/compiler/libs.m
@@ -37,7 +37,7 @@
:- include_module file_util.
:- include_module graph_colour.
:- include_module int_emu.
-:- include_module md4.
+:- include_module md5.
:- include_module pickle.
:- include_module uint_emu.
diff --git a/compiler/make.m b/compiler/make.m
index b30890067..276f05148 100644
--- a/compiler/make.m
+++ b/compiler/make.m
@@ -70,7 +70,7 @@
:- import_module backend_libs.
:- import_module backend_libs.compile_target_code.
:- import_module libs.handle_options.
-:- import_module libs.md4.
+:- import_module libs.md5.
:- import_module libs.options.
:- import_module libs.timestamp.
:- import_module make.build.
@@ -700,7 +700,7 @@ option_table_hash(AllOptionArgs, Hash, !IO) :-
list.filter(include_option_in_hash(InconsequentialOptions),
OptionList, HashOptionList),
globals.get_opt_tuple(AllOptionArgsGlobals, OptTuple),
- Hash = md4sum(string({HashOptionList, OptTuple})).
+ Hash = md5sum(string({HashOptionList, OptTuple})).
:- pred include_option_in_hash(set(option)::in,
pair(option, option_data)::in) is semidet.
diff --git a/compiler/md4.m b/compiler/md4.m
deleted file mode 100644
index 64026b9bf..000000000
--- a/compiler/md4.m
+++ /dev/null
...
diff --git a/compiler/md5.m b/compiler/md5.m
new file mode 100644
index 000000000..e491cc59a
--- /dev/null
+++ b/compiler/md5.m
@@ -0,0 +1,368 @@
+%---------------------------------------------------------------------------%
+% vim: ft=mercury ts=4 sw=4 et
+%---------------------------------------------------------------------------%
+% Copyright (C) 2021 The Mercury team.
+% This file may only be copied under the terms of the GNU General
+% Public License - see the file COPYING in the Mercury distribution.
+%---------------------------------------------------------------------------%
+%
+% File: md5.
+% Main author: wangp.
+%
+% This module contains an implementation of the MD5 message digest algorithm.
+%
+%---------------------------------------------------------------------------%
+
+:- module libs.md5.
+:- interface.
+
+:- func md5sum(string) = string.
+
+%---------------------------------------------------------------------------%
+%---------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module require. % Required by non-C grades.
+
+%---------------------------------------------------------------------------%
+
+/*
+ * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
+ * MD5 Message-Digest Algorithm (RFC 1321).
+ *
+ * Homepage:
+ * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
+ *
+ * Author:
+ * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
+ *
+ * This software was written by Alexander Peslyak in 2001. No copyright is
+ * claimed, and the software is hereby placed in the public domain.
+ * In case this attempt to disclaim copyright and place the software in the
+ * public domain is deemed null and void, then the software is
+ * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
+ * general public under the following terms:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted.
+ *
+ * There's ABSOLUTELY NO WARRANTY, express or implied.
+ *
+ * (This is a heavily cut-down "BSD license".)
+ *
+ * This differs from Colin Plumb's older public domain implementation in that
+ * no exactly 32-bit integer data type is required (any 32-bit or wider
+ * unsigned integer data type will do), there's no compile-time endianness
+ * configuration, and the function prototypes match OpenSSL's. No code from
+ * Colin Plumb's implementation has been reused; this comment merely compares
+ * the properties of the two independent implementations.
+ *
+ * The primary goals of this implementation are portability and ease of use.
+ * It is meant to be fast, but not as fast as possible. Some known
+ * optimizations are not included to reduce source code size and avoid
+ * compile-time configuration.
+ */
+
+:- pragma foreign_decl("C", "local", "
+/* Any 32-bit or wider unsigned integer data type will do */
+typedef MR_uint_least32_t MD5_u32plus;
+
+typedef struct {
+ MD5_u32plus lo, hi;
+ MD5_u32plus a, b, c, d;
+ unsigned char buffer[64];
+ MD5_u32plus block[16];
+} MD5_CTX;
+
+static void MD5_Init(MD5_CTX *ctx);
+static void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size);
+static void MD5_Final(unsigned char *result, MD5_CTX *ctx);
+").
+
+:- pragma foreign_code("C", "
+/*
+ * The basic MD5 functions.
+ *
+ * F and G are optimized compared to their RFC 1321 definitions for
+ * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
+ * implementation.
+ */
+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
+#define H(x, y, z) (((x) ^ (y)) ^ (z))
+#define H2(x, y, z) ((x) ^ ((y) ^ (z)))
+#define I(x, y, z) ((y) ^ ((x) | ~(z)))
+
+/*
+ * The MD5 transformation for all four rounds.
+ */
+#define STEP(f, a, b, c, d, x, t, s) \
+ (a) += f((b), (c), (d)) + (x) + (t); \
+ (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
+ (a) += (b);
+
+/*
+ * SET reads 4 input bytes in little-endian byte order and stores them in a
+ * properly aligned word in host byte order.
+ *
+ * The check for little-endian architectures that tolerate unaligned memory
+ * accesses is just an optimization. Nothing will break if it fails to detect
+ * a suitable architecture.
+ *
+ * Unfortunately, this optimization may be a C strict aliasing rules violation
+ * if the caller's data buffer has effective type that cannot be aliased by
+ * MD5_u32plus. In practice, this problem may occur if these MD5 routines are
+ * inlined into a calling function, or with future and dangerously advanced
+ * link-time optimizations. For the time being, keeping these MD5 routines in
+ * their own translation unit avoids the problem.
+ */
+#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
+#define SET(n) \
+ (*(MD5_u32plus *)&ptr[(n) * 4])
+#define GET(n) \
+ SET(n)
+#else
+#define SET(n) \
+ (ctx->block[(n)] = \
+ (MD5_u32plus)ptr[(n) * 4] | \
+ ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
+ ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
+ ((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
+#define GET(n) \
+ (ctx->block[(n)])
+#endif
+
+/*
+ * This processes one or more 64-byte data blocks, but does NOT update the bit
+ * counters. There are no alignment requirements.
+ */
+static const void *body(MD5_CTX *ctx, const void *data, unsigned long size)
+{
+ const unsigned char *ptr;
+ MD5_u32plus a, b, c, d;
+ MD5_u32plus saved_a, saved_b, saved_c, saved_d;
+
+ ptr = (const unsigned char *)data;
+
+ a = ctx->a;
+ b = ctx->b;
+ c = ctx->c;
+ d = ctx->d;
+
+ do {
+ saved_a = a;
+ saved_b = b;
+ saved_c = c;
+ saved_d = d;
+
+ /* Round 1 */
+ STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
+ STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
+ STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
+ STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
+ STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
+ STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
+ STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
+ STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
+ STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
+ STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
+ STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
+ STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
+ STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
+ STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
+ STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
+ STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
+
+ /* Round 2 */
+ STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
+ STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
+ STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
+ STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
+ STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
+ STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
+ STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
+ STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
+ STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
+ STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
+ STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
+ STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
+ STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
+ STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
+ STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
+ STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
+
+ /* Round 3 */
+ STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
+ STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
+ STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
+ STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
+ STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
+ STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
+ STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
+ STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
+ STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
+ STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
+ STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
+ STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
+ STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
+ STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
+ STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
+ STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
+
+ /* Round 4 */
+ STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
+ STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
+ STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
+ STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
+ STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
+ STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
+ STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
+ STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
+ STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
+ STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
+ STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
+ STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
+ STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
+ STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
+ STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
+ STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
+
+ a += saved_a;
+ b += saved_b;
+ c += saved_c;
+ d += saved_d;
+
+ ptr += 64;
+ } while (size -= 64);
+
+ ctx->a = a;
+ ctx->b = b;
+ ctx->c = c;
+ ctx->d = d;
+
+ return ptr;
+}
+
+static void MD5_Init(MD5_CTX *ctx)
+{
+ ctx->a = 0x67452301;
+ ctx->b = 0xefcdab89;
+ ctx->c = 0x98badcfe;
+ ctx->d = 0x10325476;
+
+ ctx->lo = 0;
+ ctx->hi = 0;
+}
+
+static void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size)
+{
+ MD5_u32plus saved_lo;
+ unsigned long used, available;
+
+ saved_lo = ctx->lo;
+ if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
+ ctx->hi++;
+ ctx->hi += size >> 29;
+
+ used = saved_lo & 0x3f;
+
+ if (used) {
+ available = 64 - used;
+
+ if (size < available) {
+ memcpy(&ctx->buffer[used], data, size);
+ return;
+ }
+
+ memcpy(&ctx->buffer[used], data, available);
+ data = (const unsigned char *)data + available;
+ size -= available;
+ body(ctx, ctx->buffer, 64);
+ }
+
+ if (size >= 64) {
+ data = body(ctx, data, size & ~(unsigned long)0x3f);
+ size &= 0x3f;
+ }
+
+ memcpy(ctx->buffer, data, size);
+}
+
+#define OUT(dst, src) \
+ (dst)[0] = (unsigned char)(src); \
+ (dst)[1] = (unsigned char)((src) >> 8); \
+ (dst)[2] = (unsigned char)((src) >> 16); \
+ (dst)[3] = (unsigned char)((src) >> 24);
+
+static void MD5_Final(unsigned char *result, MD5_CTX *ctx)
+{
+ unsigned long used, available;
+
+ used = ctx->lo & 0x3f;
+
+ ctx->buffer[used++] = 0x80;
+
+ available = 64 - used;
+
+ if (available < 8) {
+ memset(&ctx->buffer[used], 0, available);
+ body(ctx, ctx->buffer, 64);
+ used = 0;
+ available = 64;
+ }
+
+ memset(&ctx->buffer[used], 0, available - 8);
+
+ ctx->lo <<= 3;
+ OUT(&ctx->buffer[56], ctx->lo)
+ OUT(&ctx->buffer[60], ctx->hi)
+
+ body(ctx, ctx->buffer, 64);
+
+ OUT(&result[0], ctx->a)
+ OUT(&result[4], ctx->b)
+ OUT(&result[8], ctx->c)
+ OUT(&result[12], ctx->d)
+
+ memset(ctx, 0, sizeof(*ctx));
+}
+").
+
+%---------------------------------------------------------------------------%
+
+:- pragma no_determinism_warning(md5sum/1).
+:- pragma foreign_proc("C",
+ md5sum(In::in) = (Digest::out),
+ [will_not_call_mercury, promise_pure, thread_safe, may_not_duplicate],
+"
+ MD5_CTX ctx;
+ unsigned char sum[16];
+ const char hex[16] = ""0123456789abcdef"";
+ char hexbuf[sizeof(sum) * 2 + 1];
+ char *p;
+ size_t i;
+
+ MD5_Init(&ctx);
+ MD5_Update(&ctx, (const unsigned char *) In, strlen(In));
+ MD5_Final(sum, &ctx);
+
+ /* Convert to hexadecimal string representation. */
+ p = hexbuf;
+ for (i = 0; i < sizeof(sum); i++) {
+ *p++ = hex[(sum[i] & 0xf0) >> 4];
+ *p++ = hex[(sum[i] & 0x0f)];
+ }
+ *p = '\\0';
+
+ MR_make_aligned_string_copy(Digest, hexbuf);
+").
+
+ % Implementation for non-C backends left as an exercise for the reader.
+ %
+md5sum(_) = _ :-
+ sorry($file, $pred).
+
+%---------------------------------------------------------------------------%
+:- end_module libs.md5.
+%---------------------------------------------------------------------------%
--
2.31.1
More information about the reviews
mailing list