[m-rev.] for review: read min_int

Peter Wang novalazy at gmail.com
Fri May 11 10:19:50 AEST 2012


Branches: main

lexer.m tokenises "-INTEGER" as two tokens, a minus sign and a positive
integer.  This fails when the overall negative value is min_int, i.e. the
absolute value is max_int+1 -- too big to store in an int.

One less obvious consequence of the bug is that io.read could not parse some
plain Mercury terms written out by io.write.

library/lexer.m:
	Add a `token.big_integer' constructor to hold big integer literals in
	their string representation.  Currently this is only done for base 10
	literals which cannot fit in an int.

library/parser.m:
	Parse the token sequence, minus sign followed by big_integer max_int+1,
	as the integer term with value min_int.

tests/hard_coded/Mmakefile:
tests/hard_coded/lexer_bigint.exp:
tests/hard_coded/lexer_bigint.exp2:
tests/hard_coded/lexer_bigint.inp:
tests/hard_coded/lexer_bigint.m:
tests/hard_coded/read_min_int.exp:
tests/hard_coded/read_min_int.exp2:
tests/hard_coded/read_min_int.inp:
tests/hard_coded/read_min_int.m:
	Add test cases.

diff --git a/library/lexer.m b/library/lexer.m
index 8c387b2..2579276 100644
--- a/library/lexer.m
+++ b/library/lexer.m
@@ -1,7 +1,7 @@
 %-----------------------------------------------------------------------------%
 % vim: ft=mercury ts=4 sw=4 et
 %-----------------------------------------------------------------------------%
-% Copyright (C) 1993-2000, 2003-2008, 2011 The University of Melbourne.
+% Copyright (C) 1993-2000, 2003-2008, 2011-2012 The University of Melbourne.
 % This file may only be copied under the terms of the GNU Library General
 % Public License - see the file COPYING.LIB in the Mercury distribution.
 %-----------------------------------------------------------------------------%
@@ -30,6 +30,7 @@
     --->    name(string)
     ;       variable(string)
     ;       integer(int)
+    ;       big_integer(string) % does not fit in int
     ;       float(float)
     ;       string(string)      % "...."
     ;       implementation_defined(string) % $name
@@ -160,6 +161,8 @@ token_to_string(variable(Var), String) :-
 token_to_string(integer(Int), String) :-
     string.int_to_string(Int, IntString),
     string.append_list(["integer `", IntString, "'"], String).
+token_to_string(big_integer(BigInt), String) :-
+    string.append_list(["big integer `", BigInt, "'"], String).
 token_to_string(float(Float), String) :-
     string.float_to_string(Float, FloatString),
     string.append_list(["float `", FloatString, "'"], String).
@@ -230,6 +233,7 @@ get_token_list_2(Stream, Token0, Context0, Tokens, !IO) :-
         ; Token0 = string(_)
         ; Token0 = variable(_)
         ; Token0 = integer(_)
+        ; Token0 = big_integer(_)
         ; Token0 = implementation_defined(_)
         ; Token0 = junk(_)
         ; Token0 = name(_)
@@ -268,6 +272,7 @@ string_get_token_list_max(String, Len, Tokens, !Posn) :-
         ; Token = string(_)
         ; Token = variable(_)
         ; Token = integer(_)
+        ; Token = big_integer(_)
         ; Token = integer_dot(_)
         ; Token = implementation_defined(_)
         ; Token = junk(_)
@@ -2449,6 +2454,8 @@ rev_char_list_to_int(RevChars, Base, Token) :-
 conv_string_to_int(String, Base, Token) :-
     ( string.base_string_to_int(Base, String, Int) ->
         Token = integer(Int)
+    ; Base = 10 ->
+        Token = big_integer(String)
     ;
         Token = error("invalid integer token")
     ).
diff --git a/library/parser.m b/library/parser.m
index 3552e96..0a263c4 100644
--- a/library/parser.m
+++ b/library/parser.m
@@ -252,6 +252,7 @@ check_for_errors(ok(Term), VarSet, Tokens, LeftOverTokens, Result) :-
 :- pred check_for_bad_token(token_list::in, string::out, int::out) is semidet.
 
 check_for_bad_token(token_cons(Token, LineNum0, Tokens), Message, LineNum) :-
+    require_complete_switch [Token]
     (
         Token = io_error(IO_Error),
         io.error_message(IO_Error, IO_ErrorMessage),
@@ -273,6 +274,7 @@ check_for_bad_token(token_cons(Token, LineNum0, Tokens), Message, LineNum) :-
         ( Token = name(_)
         ; Token = variable(_)
         ; Token = integer(_)
+        ; Token = big_integer(_)
         ; Token = float(_)
         ; Token = string(_)
         ; Token = implementation_defined(_)
@@ -366,10 +368,17 @@ parse_left_term(MaxPriority, TermKind, OpPriority, Term, !PS) :-
         (
             % Check for unary minus of integer.
             Token = name("-"),
-            parser_get_token_context(integer(X), _IntContext, !PS)
+            parser_get_token_context(IntToken, _IntContext, !PS),
+            (
+                IntToken = integer(X),
+                NegX = 0 - X
+            ;
+                IntToken = big_integer(BigString),
+                max_int_plus_1(int.bits_per_int, BigString),
+                NegX = int.min_int
+            )
         ->
             get_term_context(!.PS, Context, TermContext),
-            NegX = 0 - X,
             Term = ok(term.functor(term.integer(NegX), [], TermContext)),
             OpPriority = 0
         ;
@@ -652,6 +661,10 @@ parse_simple_term_2(integer(Int), Context, _, Term, !PS) :-
     get_term_context(!.PS, Context, TermContext),
     Term = ok(term.functor(term.integer(Int), [], TermContext)).
 
+parse_simple_term_2(big_integer(_), _Context, _, _Term, !PS) :-
+    % The term type does not yet support big integers.
+    fail.
+
 parse_simple_term_2(float(Float), Context, _, Term, !PS) :-
     get_term_context(!.PS, Context, TermContext),
     Term = ok(term.functor(term.float(Float), [], TermContext)).
@@ -980,6 +993,7 @@ make_error(ParserState, Message) = error(Message, Tokens) :-
 could_start_term(name(_), yes).
 could_start_term(variable(_), yes).
 could_start_term(integer(_), yes).
+could_start_term(big_integer(_), yes).
 could_start_term(float(_), yes).
 could_start_term(string(_), yes).
 could_start_term(implementation_defined(_), yes).
@@ -1001,6 +1015,13 @@ could_start_term(integer_dot(_), no).
 
 %-----------------------------------------------------------------------------%
 
+:- pred max_int_plus_1(int::in, string::in) is semidet.
+
+max_int_plus_1(32, "2147483648").
+max_int_plus_1(64, "9223372036854775808").
+
+%-----------------------------------------------------------------------------%
+
 :- pred init_parser_state(Ops::in, string::in, token_list::in,
     state(Ops, T)::out) is det <= op_table(Ops).
 
diff --git a/tests/hard_coded/Mmakefile b/tests/hard_coded/Mmakefile
index 033412a..5727a42 100644
--- a/tests/hard_coded/Mmakefile
+++ b/tests/hard_coded/Mmakefile
@@ -169,6 +169,7 @@ ORDINARY_PROGS=	\
 	lco_mday_bug_2 \
 	lco_no_inline \
 	lco_pack_args \
+	lexer_bigint \
 	lexer_zero \
 	list_series_int \
 	lookup_disj \
@@ -236,6 +237,7 @@ ORDINARY_PROGS=	\
 	random_permutation \
 	random_simple \
 	rational_test \
+	read_min_int \
 	recursive_main \
 	redoip_clobber \
 	remove_file \
diff --git a/tests/hard_coded/lexer_bigint.exp b/tests/hard_coded/lexer_bigint.exp
new file mode 100644
index 0000000..05c1843
--- /dev/null
+++ b/tests/hard_coded/lexer_bigint.exp
@@ -0,0 +1,51 @@
+integer(2147483646)
+integer(2147483647)
+big_integer("2147483648")
+name("-")
+integer(2147483647)
+name("-")
+big_integer("2147483648")
+name("-")
+big_integer("2147483649")
+integer(-1)
+integer(-1)
+integer(-1)
+big_integer("9223372036854775807")
+big_integer("9223372036854775808")
+big_integer("9223372036854775809")
+name("-")
+big_integer("9223372036854775807")
+name("-")
+big_integer("9223372036854775808")
+name("-")
+big_integer("9223372036854775809")
+integer(-1)
+integer(-1)
+integer(-1)
+big_integer("999999999999999999999999987654321")
+
+integer(2147483646)
+integer(2147483647)
+big_integer("2147483648")
+name("-")
+integer(2147483647)
+name("-")
+big_integer("2147483648")
+name("-")
+big_integer("2147483649")
+integer(-1)
+integer(-1)
+integer(-1)
+big_integer("9223372036854775807")
+big_integer("9223372036854775808")
+big_integer("9223372036854775809")
+name("-")
+big_integer("9223372036854775807")
+name("-")
+big_integer("9223372036854775808")
+name("-")
+big_integer("9223372036854775809")
+integer(-1)
+integer(-1)
+integer(-1)
+big_integer("999999999999999999999999987654321")
diff --git a/tests/hard_coded/lexer_bigint.exp2 b/tests/hard_coded/lexer_bigint.exp2
new file mode 100644
index 0000000..fd58a93
--- /dev/null
+++ b/tests/hard_coded/lexer_bigint.exp2
@@ -0,0 +1,51 @@
+integer(2147483646)
+integer(2147483647)
+integer(2147483648)
+name("-")
+integer(2147483647)
+name("-")
+integer(2147483648)
+name("-")
+integer(2147483649)
+integer(4294967295)
+integer(4294967295)
+integer(4294967295)
+integer(9223372036854775807)
+big_integer("9223372036854775808")
+big_integer("9223372036854775809")
+name("-")
+integer(9223372036854775807)
+name("-")
+big_integer("9223372036854775808")
+name("-")
+big_integer("9223372036854775809")
+integer(-1)
+integer(-1)
+integer(-1)
+big_integer("999999999999999999999999987654321")
+
+integer(2147483646)
+integer(2147483647)
+integer(2147483648)
+name("-")
+integer(2147483647)
+name("-")
+integer(2147483648)
+name("-")
+integer(2147483649)
+integer(4294967295)
+integer(4294967295)
+integer(4294967295)
+integer(9223372036854775807)
+big_integer("9223372036854775808")
+big_integer("9223372036854775809")
+name("-")
+integer(9223372036854775807)
+name("-")
+big_integer("9223372036854775808")
+name("-")
+big_integer("9223372036854775809")
+integer(-1)
+integer(-1)
+integer(-1)
+big_integer("999999999999999999999999987654321")
diff --git a/tests/hard_coded/lexer_bigint.inp b/tests/hard_coded/lexer_bigint.inp
new file mode 100644
index 0000000..eaa6f90
--- /dev/null
+++ b/tests/hard_coded/lexer_bigint.inp
@@ -0,0 +1,31 @@
+% 32-bit
+
+2147483646
+2147483647
+2147483648
+
+-2147483647
+-2147483648
+-2147483649
+
+0b11111111111111111111111111111111
+0o37777777777
+0xffffffff
+
+% 64-bit
+
+9223372036854775807
+9223372036854775808
+9223372036854775809
+
+-9223372036854775807
+-9223372036854775808
+-9223372036854775809
+
+0b1111111111111111111111111111111111111111111111111111111111111111
+0o1777777777777777777777
+0xffffffffffffffff
+
+% big integer everywhere
+
+999999999999999999999999987654321
diff --git a/tests/hard_coded/lexer_bigint.m b/tests/hard_coded/lexer_bigint.m
new file mode 100644
index 0000000..6e9ff9b
--- /dev/null
+++ b/tests/hard_coded/lexer_bigint.m
@@ -0,0 +1,57 @@
+%-----------------------------------------------------------------------------%
+
+:- module lexer_bigint.
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is det.
+
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module list.
+:- import_module lexer.
+:- import_module string.
+
+%-----------------------------------------------------------------------------%
+
+main(!IO) :-
+    % Read from the current input stream.
+    lexer.get_token_list(Tokens, !IO),
+    write_token_list(Tokens, !IO),
+    io.nl(!IO),
+
+    % Read from a string.
+    io.open_input("lexer_bigint.inp", OpenRes, !IO),
+    (
+        OpenRes = ok(Stream),
+        io.read_file_as_string(Stream, ReadRes, !IO),
+        (
+            ReadRes = ok(String),
+            Posn0 = posn(1, 0, 0),
+            lexer.string_get_token_list(String, StringTokens, Posn0, _Posn),
+            write_token_list(StringTokens, !IO)
+        ;
+            ReadRes = error(_, Error),
+            io.write(Error, !IO),
+            io.nl(!IO)
+        )
+    ;
+        OpenRes = error(Error),
+        io.write(Error, !IO),
+        io.nl(!IO)
+    ).
+
+:- pred write_token_list(token_list::in, io::di, io::uo) is det.
+
+write_token_list(token_nil, !IO).
+write_token_list(token_cons(Token, _Context, List), !IO) :-
+    io.write(Token, !IO),
+    io.nl(!IO),
+    write_token_list(List, !IO).
+
+%-----------------------------------------------------------------------------%
+% vim: ft=mercury ts=4 sts=4 sw=4 et
diff --git a/tests/hard_coded/read_min_int.exp b/tests/hard_coded/read_min_int.exp
new file mode 100644
index 0000000..86226d3
--- /dev/null
+++ b/tests/hard_coded/read_min_int.exp
@@ -0,0 +1,13 @@
+foo(-2147483648)
+foo(2147483647)
+Syntax error at big integer `2147483648': unexpected token at start of (sub)term
+Syntax error at big integer `9223372036854775808': unexpected token at start of (sub)term
+Syntax error at big integer `9223372036854775807': unexpected token at start of (sub)term
+Syntax error at big integer `9223372036854775808': unexpected token at start of (sub)term
+
+foo(-2147483648)
+foo(2147483647)
+Syntax error at big integer `2147483648': unexpected token at start of (sub)term
+Syntax error at big integer `9223372036854775808': unexpected token at start of (sub)term
+Syntax error at big integer `9223372036854775807': unexpected token at start of (sub)term
+Syntax error at big integer `9223372036854775808': unexpected token at start of (sub)term
diff --git a/tests/hard_coded/read_min_int.exp2 b/tests/hard_coded/read_min_int.exp2
new file mode 100644
index 0000000..0f63cc5
--- /dev/null
+++ b/tests/hard_coded/read_min_int.exp2
@@ -0,0 +1,13 @@
+foo(-2147483648)
+foo(2147483647)
+foo(2147483648)
+foo(-9223372036854775808)
+foo(9223372036854775807)
+Syntax error at big integer `9223372036854775808': unexpected token at start of (sub)term
+
+foo(-2147483648)
+foo(2147483647)
+foo(2147483648)
+foo(-9223372036854775808)
+foo(9223372036854775807)
+Syntax error at big integer `9223372036854775808': unexpected token at start of (sub)term
diff --git a/tests/hard_coded/read_min_int.inp b/tests/hard_coded/read_min_int.inp
new file mode 100644
index 0000000..3685321
--- /dev/null
+++ b/tests/hard_coded/read_min_int.inp
@@ -0,0 +1,11 @@
+% 32-bit
+
+foo(-2147483648).           % min_int (ok)
+foo(2147483647).            % max_int (ok)
+foo(2147483648).            % max_int+1 (unsupported)
+
+% 64-bit
+
+foo(-9223372036854775808).  % min_int (ok)
+foo(9223372036854775807).   % max_int (ok)
+foo(9223372036854775808).   % max_int+1 (unsupported)
diff --git a/tests/hard_coded/read_min_int.m b/tests/hard_coded/read_min_int.m
new file mode 100644
index 0000000..5096401
--- /dev/null
+++ b/tests/hard_coded/read_min_int.m
@@ -0,0 +1,88 @@
+% Test reading terms with min_int arguments.
+
+:- module read_min_int.
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is det.
+
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module int.
+:- import_module list.
+:- import_module string.
+
+:- type foo
+    --->    foo(int).
+
+main(!IO) :-
+    % Test io.read.
+    test_stdin(!IO),
+    io.nl(!IO),
+
+    % Test io.read_from_string.
+    io.open_input("read_min_int.inp", OpenRes, !IO),
+    (
+        OpenRes = ok(Stream),
+        test_read_from_string(Stream, !IO),
+        io.close_input(Stream, !IO)
+    ;
+        OpenRes = error(Error),
+        io.write(Error, !IO),
+        io.nl(!IO)
+    ).
+
+:- pred test_stdin(io::di, io::uo) is det.
+
+test_stdin(!IO) :-
+    io.read(Res, !IO),
+    (
+        Res = ok(X : foo),
+        io.write(X, !IO),
+        io.nl(!IO),
+        test_stdin(!IO)
+    ;
+        Res = error(Error, _),
+        io.write_string(Error, !IO),
+        io.nl(!IO),
+        test_stdin(!IO)
+    ;
+        Res = eof
+    ).
+
+:- pred test_read_from_string(io.input_stream::in, io::di, io::uo) is det.
+
+test_read_from_string(Stream, !IO) :-
+    io.read_line_as_string(Stream, IORes, !IO),
+    (
+        IORes = ok(String),
+        FileName = "",
+        Posn0 = posn(1, 0, 0),
+        io.read_from_string(FileName, String, length(String), Res,
+            Posn0, _Posn),
+        (
+            Res = ok(X : foo),
+            io.write(X, !IO),
+            io.nl(!IO)
+        ;
+            Res = eof
+        ;
+            Res = error(Error, _),
+            io.write_string(Error, !IO),
+            io.nl(!IO)
+        ),
+        test_read_from_string(Stream, !IO)
+    ;
+        IORes = eof
+    ;
+        IORes = error(IOError),
+        io.write_string(io.error_message(IOError), !IO),
+        io.nl(!IO)
+    ).
+
+%-----------------------------------------------------------------------------%
+% vim: ft=mercury ts=4 sts=4 sw=4 et

--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to:       mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions:          mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------



More information about the reviews mailing list