[m-rev.] for review: read min_int
Peter Wang
novalazy at gmail.com
Fri May 11 10:19:50 AEST 2012
Branches: main
lexer.m tokenises "-INTEGER" as two tokens, a minus sign and a positive
integer. This fails when the overall negative value is min_int, i.e. the
absolute value is max_int+1 -- too big to store in an int.
One less obvious consequence of the bug is that io.read could not parse some
plain Mercury terms written out by io.write.
library/lexer.m:
Add a `token.big_integer' constructor to hold big integer literals in
their string representation. Currently this is only done for base 10
literals which cannot fit in an int.
library/parser.m:
Parse the token sequence, minus sign followed by big_integer max_int+1,
as the integer term with value min_int.
tests/hard_coded/Mmakefile:
tests/hard_coded/lexer_bigint.exp:
tests/hard_coded/lexer_bigint.exp2:
tests/hard_coded/lexer_bigint.inp:
tests/hard_coded/lexer_bigint.m:
tests/hard_coded/read_min_int.exp:
tests/hard_coded/read_min_int.exp2:
tests/hard_coded/read_min_int.inp:
tests/hard_coded/read_min_int.m:
Add test cases.
diff --git a/library/lexer.m b/library/lexer.m
index 8c387b2..2579276 100644
--- a/library/lexer.m
+++ b/library/lexer.m
@@ -1,7 +1,7 @@
%-----------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%-----------------------------------------------------------------------------%
-% Copyright (C) 1993-2000, 2003-2008, 2011 The University of Melbourne.
+% Copyright (C) 1993-2000, 2003-2008, 2011-2012 The University of Melbourne.
% This file may only be copied under the terms of the GNU Library General
% Public License - see the file COPYING.LIB in the Mercury distribution.
%-----------------------------------------------------------------------------%
@@ -30,6 +30,7 @@
---> name(string)
; variable(string)
; integer(int)
+ ; big_integer(string) % does not fit in int
; float(float)
; string(string) % "...."
; implementation_defined(string) % $name
@@ -160,6 +161,8 @@ token_to_string(variable(Var), String) :-
token_to_string(integer(Int), String) :-
string.int_to_string(Int, IntString),
string.append_list(["integer `", IntString, "'"], String).
+token_to_string(big_integer(BigInt), String) :-
+ string.append_list(["big integer `", BigInt, "'"], String).
token_to_string(float(Float), String) :-
string.float_to_string(Float, FloatString),
string.append_list(["float `", FloatString, "'"], String).
@@ -230,6 +233,7 @@ get_token_list_2(Stream, Token0, Context0, Tokens, !IO) :-
; Token0 = string(_)
; Token0 = variable(_)
; Token0 = integer(_)
+ ; Token0 = big_integer(_)
; Token0 = implementation_defined(_)
; Token0 = junk(_)
; Token0 = name(_)
@@ -268,6 +272,7 @@ string_get_token_list_max(String, Len, Tokens, !Posn) :-
; Token = string(_)
; Token = variable(_)
; Token = integer(_)
+ ; Token = big_integer(_)
; Token = integer_dot(_)
; Token = implementation_defined(_)
; Token = junk(_)
@@ -2449,6 +2454,8 @@ rev_char_list_to_int(RevChars, Base, Token) :-
conv_string_to_int(String, Base, Token) :-
( string.base_string_to_int(Base, String, Int) ->
Token = integer(Int)
+ ; Base = 10 ->
+ Token = big_integer(String)
;
Token = error("invalid integer token")
).
diff --git a/library/parser.m b/library/parser.m
index 3552e96..0a263c4 100644
--- a/library/parser.m
+++ b/library/parser.m
@@ -252,6 +252,7 @@ check_for_errors(ok(Term), VarSet, Tokens, LeftOverTokens, Result) :-
:- pred check_for_bad_token(token_list::in, string::out, int::out) is semidet.
check_for_bad_token(token_cons(Token, LineNum0, Tokens), Message, LineNum) :-
+ require_complete_switch [Token]
(
Token = io_error(IO_Error),
io.error_message(IO_Error, IO_ErrorMessage),
@@ -273,6 +274,7 @@ check_for_bad_token(token_cons(Token, LineNum0, Tokens), Message, LineNum) :-
( Token = name(_)
; Token = variable(_)
; Token = integer(_)
+ ; Token = big_integer(_)
; Token = float(_)
; Token = string(_)
; Token = implementation_defined(_)
@@ -366,10 +368,17 @@ parse_left_term(MaxPriority, TermKind, OpPriority, Term, !PS) :-
(
% Check for unary minus of integer.
Token = name("-"),
- parser_get_token_context(integer(X), _IntContext, !PS)
+ parser_get_token_context(IntToken, _IntContext, !PS),
+ (
+ IntToken = integer(X),
+ NegX = 0 - X
+ ;
+ IntToken = big_integer(BigString),
+ max_int_plus_1(int.bits_per_int, BigString),
+ NegX = int.min_int
+ )
->
get_term_context(!.PS, Context, TermContext),
- NegX = 0 - X,
Term = ok(term.functor(term.integer(NegX), [], TermContext)),
OpPriority = 0
;
@@ -652,6 +661,10 @@ parse_simple_term_2(integer(Int), Context, _, Term, !PS) :-
get_term_context(!.PS, Context, TermContext),
Term = ok(term.functor(term.integer(Int), [], TermContext)).
+parse_simple_term_2(big_integer(_), _Context, _, _Term, !PS) :-
+ % The term type does not yet support big integers.
+ fail.
+
parse_simple_term_2(float(Float), Context, _, Term, !PS) :-
get_term_context(!.PS, Context, TermContext),
Term = ok(term.functor(term.float(Float), [], TermContext)).
@@ -980,6 +993,7 @@ make_error(ParserState, Message) = error(Message, Tokens) :-
could_start_term(name(_), yes).
could_start_term(variable(_), yes).
could_start_term(integer(_), yes).
+could_start_term(big_integer(_), yes).
could_start_term(float(_), yes).
could_start_term(string(_), yes).
could_start_term(implementation_defined(_), yes).
@@ -1001,6 +1015,13 @@ could_start_term(integer_dot(_), no).
%-----------------------------------------------------------------------------%
+:- pred max_int_plus_1(int::in, string::in) is semidet.
+
+max_int_plus_1(32, "2147483648").
+max_int_plus_1(64, "9223372036854775808").
+
+%-----------------------------------------------------------------------------%
+
:- pred init_parser_state(Ops::in, string::in, token_list::in,
state(Ops, T)::out) is det <= op_table(Ops).
diff --git a/tests/hard_coded/Mmakefile b/tests/hard_coded/Mmakefile
index 033412a..5727a42 100644
--- a/tests/hard_coded/Mmakefile
+++ b/tests/hard_coded/Mmakefile
@@ -169,6 +169,7 @@ ORDINARY_PROGS= \
lco_mday_bug_2 \
lco_no_inline \
lco_pack_args \
+ lexer_bigint \
lexer_zero \
list_series_int \
lookup_disj \
@@ -236,6 +237,7 @@ ORDINARY_PROGS= \
random_permutation \
random_simple \
rational_test \
+ read_min_int \
recursive_main \
redoip_clobber \
remove_file \
diff --git a/tests/hard_coded/lexer_bigint.exp b/tests/hard_coded/lexer_bigint.exp
new file mode 100644
index 0000000..05c1843
--- /dev/null
+++ b/tests/hard_coded/lexer_bigint.exp
@@ -0,0 +1,51 @@
+integer(2147483646)
+integer(2147483647)
+big_integer("2147483648")
+name("-")
+integer(2147483647)
+name("-")
+big_integer("2147483648")
+name("-")
+big_integer("2147483649")
+integer(-1)
+integer(-1)
+integer(-1)
+big_integer("9223372036854775807")
+big_integer("9223372036854775808")
+big_integer("9223372036854775809")
+name("-")
+big_integer("9223372036854775807")
+name("-")
+big_integer("9223372036854775808")
+name("-")
+big_integer("9223372036854775809")
+integer(-1)
+integer(-1)
+integer(-1)
+big_integer("999999999999999999999999987654321")
+
+integer(2147483646)
+integer(2147483647)
+big_integer("2147483648")
+name("-")
+integer(2147483647)
+name("-")
+big_integer("2147483648")
+name("-")
+big_integer("2147483649")
+integer(-1)
+integer(-1)
+integer(-1)
+big_integer("9223372036854775807")
+big_integer("9223372036854775808")
+big_integer("9223372036854775809")
+name("-")
+big_integer("9223372036854775807")
+name("-")
+big_integer("9223372036854775808")
+name("-")
+big_integer("9223372036854775809")
+integer(-1)
+integer(-1)
+integer(-1)
+big_integer("999999999999999999999999987654321")
diff --git a/tests/hard_coded/lexer_bigint.exp2 b/tests/hard_coded/lexer_bigint.exp2
new file mode 100644
index 0000000..fd58a93
--- /dev/null
+++ b/tests/hard_coded/lexer_bigint.exp2
@@ -0,0 +1,51 @@
+integer(2147483646)
+integer(2147483647)
+integer(2147483648)
+name("-")
+integer(2147483647)
+name("-")
+integer(2147483648)
+name("-")
+integer(2147483649)
+integer(4294967295)
+integer(4294967295)
+integer(4294967295)
+integer(9223372036854775807)
+big_integer("9223372036854775808")
+big_integer("9223372036854775809")
+name("-")
+integer(9223372036854775807)
+name("-")
+big_integer("9223372036854775808")
+name("-")
+big_integer("9223372036854775809")
+integer(-1)
+integer(-1)
+integer(-1)
+big_integer("999999999999999999999999987654321")
+
+integer(2147483646)
+integer(2147483647)
+integer(2147483648)
+name("-")
+integer(2147483647)
+name("-")
+integer(2147483648)
+name("-")
+integer(2147483649)
+integer(4294967295)
+integer(4294967295)
+integer(4294967295)
+integer(9223372036854775807)
+big_integer("9223372036854775808")
+big_integer("9223372036854775809")
+name("-")
+integer(9223372036854775807)
+name("-")
+big_integer("9223372036854775808")
+name("-")
+big_integer("9223372036854775809")
+integer(-1)
+integer(-1)
+integer(-1)
+big_integer("999999999999999999999999987654321")
diff --git a/tests/hard_coded/lexer_bigint.inp b/tests/hard_coded/lexer_bigint.inp
new file mode 100644
index 0000000..eaa6f90
--- /dev/null
+++ b/tests/hard_coded/lexer_bigint.inp
@@ -0,0 +1,31 @@
+% 32-bit
+
+2147483646
+2147483647
+2147483648
+
+-2147483647
+-2147483648
+-2147483649
+
+0b11111111111111111111111111111111
+0o37777777777
+0xffffffff
+
+% 64-bit
+
+9223372036854775807
+9223372036854775808
+9223372036854775809
+
+-9223372036854775807
+-9223372036854775808
+-9223372036854775809
+
+0b1111111111111111111111111111111111111111111111111111111111111111
+0o1777777777777777777777
+0xffffffffffffffff
+
+% big integer everywhere
+
+999999999999999999999999987654321
diff --git a/tests/hard_coded/lexer_bigint.m b/tests/hard_coded/lexer_bigint.m
new file mode 100644
index 0000000..6e9ff9b
--- /dev/null
+++ b/tests/hard_coded/lexer_bigint.m
@@ -0,0 +1,57 @@
+%-----------------------------------------------------------------------------%
+
+:- module lexer_bigint.
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is det.
+
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module list.
+:- import_module lexer.
+:- import_module string.
+
+%-----------------------------------------------------------------------------%
+
+main(!IO) :-
+ % Read from the current input stream.
+ lexer.get_token_list(Tokens, !IO),
+ write_token_list(Tokens, !IO),
+ io.nl(!IO),
+
+ % Read from a string.
+ io.open_input("lexer_bigint.inp", OpenRes, !IO),
+ (
+ OpenRes = ok(Stream),
+ io.read_file_as_string(Stream, ReadRes, !IO),
+ (
+ ReadRes = ok(String),
+ Posn0 = posn(1, 0, 0),
+ lexer.string_get_token_list(String, StringTokens, Posn0, _Posn),
+ write_token_list(StringTokens, !IO)
+ ;
+ ReadRes = error(_, Error),
+ io.write(Error, !IO),
+ io.nl(!IO)
+ )
+ ;
+ OpenRes = error(Error),
+ io.write(Error, !IO),
+ io.nl(!IO)
+ ).
+
+:- pred write_token_list(token_list::in, io::di, io::uo) is det.
+
+write_token_list(token_nil, !IO).
+write_token_list(token_cons(Token, _Context, List), !IO) :-
+ io.write(Token, !IO),
+ io.nl(!IO),
+ write_token_list(List, !IO).
+
+%-----------------------------------------------------------------------------%
+% vim: ft=mercury ts=4 sts=4 sw=4 et
diff --git a/tests/hard_coded/read_min_int.exp b/tests/hard_coded/read_min_int.exp
new file mode 100644
index 0000000..86226d3
--- /dev/null
+++ b/tests/hard_coded/read_min_int.exp
@@ -0,0 +1,13 @@
+foo(-2147483648)
+foo(2147483647)
+Syntax error at big integer `2147483648': unexpected token at start of (sub)term
+Syntax error at big integer `9223372036854775808': unexpected token at start of (sub)term
+Syntax error at big integer `9223372036854775807': unexpected token at start of (sub)term
+Syntax error at big integer `9223372036854775808': unexpected token at start of (sub)term
+
+foo(-2147483648)
+foo(2147483647)
+Syntax error at big integer `2147483648': unexpected token at start of (sub)term
+Syntax error at big integer `9223372036854775808': unexpected token at start of (sub)term
+Syntax error at big integer `9223372036854775807': unexpected token at start of (sub)term
+Syntax error at big integer `9223372036854775808': unexpected token at start of (sub)term
diff --git a/tests/hard_coded/read_min_int.exp2 b/tests/hard_coded/read_min_int.exp2
new file mode 100644
index 0000000..0f63cc5
--- /dev/null
+++ b/tests/hard_coded/read_min_int.exp2
@@ -0,0 +1,13 @@
+foo(-2147483648)
+foo(2147483647)
+foo(2147483648)
+foo(-9223372036854775808)
+foo(9223372036854775807)
+Syntax error at big integer `9223372036854775808': unexpected token at start of (sub)term
+
+foo(-2147483648)
+foo(2147483647)
+foo(2147483648)
+foo(-9223372036854775808)
+foo(9223372036854775807)
+Syntax error at big integer `9223372036854775808': unexpected token at start of (sub)term
diff --git a/tests/hard_coded/read_min_int.inp b/tests/hard_coded/read_min_int.inp
new file mode 100644
index 0000000..3685321
--- /dev/null
+++ b/tests/hard_coded/read_min_int.inp
@@ -0,0 +1,11 @@
+% 32-bit
+
+foo(-2147483648). % min_int (ok)
+foo(2147483647). % max_int (ok)
+foo(2147483648). % max_int+1 (unsupported)
+
+% 64-bit
+
+foo(-9223372036854775808). % min_int (ok)
+foo(9223372036854775807). % max_int (ok)
+foo(9223372036854775808). % max_int+1 (unsupported)
diff --git a/tests/hard_coded/read_min_int.m b/tests/hard_coded/read_min_int.m
new file mode 100644
index 0000000..5096401
--- /dev/null
+++ b/tests/hard_coded/read_min_int.m
@@ -0,0 +1,88 @@
+% Test reading terms with min_int arguments.
+
+:- module read_min_int.
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is det.
+
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module int.
+:- import_module list.
+:- import_module string.
+
+:- type foo
+ ---> foo(int).
+
+main(!IO) :-
+ % Test io.read.
+ test_stdin(!IO),
+ io.nl(!IO),
+
+ % Test io.read_from_string.
+ io.open_input("read_min_int.inp", OpenRes, !IO),
+ (
+ OpenRes = ok(Stream),
+ test_read_from_string(Stream, !IO),
+ io.close_input(Stream, !IO)
+ ;
+ OpenRes = error(Error),
+ io.write(Error, !IO),
+ io.nl(!IO)
+ ).
+
+:- pred test_stdin(io::di, io::uo) is det.
+
+test_stdin(!IO) :-
+ io.read(Res, !IO),
+ (
+ Res = ok(X : foo),
+ io.write(X, !IO),
+ io.nl(!IO),
+ test_stdin(!IO)
+ ;
+ Res = error(Error, _),
+ io.write_string(Error, !IO),
+ io.nl(!IO),
+ test_stdin(!IO)
+ ;
+ Res = eof
+ ).
+
+:- pred test_read_from_string(io.input_stream::in, io::di, io::uo) is det.
+
+test_read_from_string(Stream, !IO) :-
+ io.read_line_as_string(Stream, IORes, !IO),
+ (
+ IORes = ok(String),
+ FileName = "",
+ Posn0 = posn(1, 0, 0),
+ io.read_from_string(FileName, String, length(String), Res,
+ Posn0, _Posn),
+ (
+ Res = ok(X : foo),
+ io.write(X, !IO),
+ io.nl(!IO)
+ ;
+ Res = eof
+ ;
+ Res = error(Error, _),
+ io.write_string(Error, !IO),
+ io.nl(!IO)
+ ),
+ test_read_from_string(Stream, !IO)
+ ;
+ IORes = eof
+ ;
+ IORes = error(IOError),
+ io.write_string(io.error_message(IOError), !IO),
+ io.nl(!IO)
+ ).
+
+%-----------------------------------------------------------------------------%
+% vim: ft=mercury ts=4 sts=4 sw=4 et
--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to: mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions: mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------
More information about the reviews
mailing list