[m-rev.] diff: fix parsing_utils.src_to_line_numbers for non-ASCII input
Peter Wang
novalazy at gmail.com
Mon Sep 5 16:16:17 AEST 2011
Branches: main, 11.07
parsing_utils.src_to_line_numbers crashed if given an input string
containing multi-code-unit characters, i.e. non-ASCII.
library/parsing_utils.m:
Rewrite src_to_line_numbers.
tests/general/test_parsing_utils.exp:
tests/general/test_parsing_utils.m:
Add test case.
tests/general/test_parsing_utils.exp2:
Add expected output for grades using UTF-16 string encoding.
diff --git a/library/parsing_utils.m b/library/parsing_utils.m
index 454dd05..595da30 100644
--- a/library/parsing_utils.m
+++ b/library/parsing_utils.m
@@ -437,13 +437,23 @@ skip_whitespace(Src, PS0, PS) :-
src_to_line_numbers(Src) = LineNos :-
Str = Src ^ input_string,
- Lo = 0,
- Hi = Src ^ input_length - 1,
- F = ( func(I, Ns) =
- ( if string.unsafe_index(Str, I) = ('\n') then [I | Ns] else Ns )
- ),
- LineNosList = int.fold_down(F, Lo, Hi, [Src ^ input_length]),
- LineNos = array(LineNosList).
+ src_to_line_numbers_2(Str, 0, [], RevLineNosList),
+ LineNos = array.from_reverse_list(RevLineNosList).
+
+:- pred src_to_line_numbers_2(string::in, int::in,
+ list(int)::in, list(int)::out) is det.
+
+src_to_line_numbers_2(Str, Pos0, !RevLineNosList) :-
+ ( string.unsafe_index_next(Str, Pos0, Pos, Char) ->
+ ( Char = '\n' ->
+ !:RevLineNosList = [Pos0 | !.RevLineNosList]
+ ;
+ true
+ ),
+ src_to_line_numbers_2(Str, Pos, !RevLineNosList)
+ ;
+ !:RevLineNosList = [Pos0 | !.RevLineNosList]
+ ).
%-----------------------------------------------------------------------------%
diff --git a/tests/general/test_parsing_utils.exp b/tests/general/test_parsing_utils.exp
index 977e8be..fa22c89 100644
--- a/tests/general/test_parsing_utils.exp
+++ b/tests/general/test_parsing_utils.exp
@@ -269,6 +269,7 @@ pass: one_or_more(int_with_state) on "abc"
pass: one_or_more(int_with_state) on "1 2 3"
returned [3, 2, 1] as expected
[5 chars consumed]
+--
Line = 2, Pos = 5
Line = 2, Pos = 3
Line = 7, Pos = 1
@@ -279,6 +280,18 @@ Line = 1, Pos = 1
Line = 2, Pos = 10
Line = 3, Pos = 1
Line = 1, Pos = 1
+--
+Line = 1, Pos = 1
+Line = 1, Pos = 2
+Line = 1, Pos = 3
+Line = 1, Pos = 4
+Line = 1, Pos = 5
+Line = 2, Pos = 1
+Line = 2, Pos = 2
+Line = 2, Pos = 3
+Line = 2, Pos = 4
+Line = 3, Pos = 1
+--
expecting an operator
12 + x-pow(x + 3; y)
^
diff --git a/tests/general/test_parsing_utils.exp2 b/tests/general/test_parsing_utils.exp2
new file mode 100644
index 0000000..6f05593
--- /dev/null
+++ b/tests/general/test_parsing_utils.exp2
@@ -0,0 +1,315 @@
+pass: next_char on ""
+ failed as expected
+pass: next_char on "123"
+ returned '1' as expected
+ [1 chars consumed]
+pass: char_in_class("123") on ""
+ failed as expected
+pass: char_in_class("123") on "abc"
+ failed as expected
+pass: char_in_class("123") on "123"
+ returned '1' as expected
+ [1 chars consumed]
+pass: punct("!") on ""
+ failed as expected
+pass: punct("!") on "abc"
+ failed as expected
+pass: punct("!") on "*"
+ failed as expected
+pass: punct("!") on "!"
+ returned unit as expected
+ [1 chars consumed]
+pass: keyword("ABC", "ABC") on ""
+ failed as expected
+pass: keyword("ABC", "ABC") on "123"
+ failed as expected
+pass: keyword("ABC", "ABC") on "ABCA"
+ failed as expected
+pass: keyword("ABC", "ABC") on "ABC 123"
+ returned unit as expected
+ [4 chars consumed]
+pass: keyword("αβγ", "αβγ") on ""
+ failed as expected
+pass: keyword("αβγ", "αβγ") on "123"
+ failed as expected
+pass: keyword("αβγ", "αβγ") on "αβγα"
+ failed as expected
+pass: keyword("αβγ", "αβγ") on "αβγ 123"
+ returned unit as expected
+ [4 chars consumed]
+pass: keyword("ABC", "ABC") on "abc 123"
+ returned unit as expected
+ [4 chars consumed]
+pass: ikeyword("αβγ", "αβγ") on "αβγ 123"
+ returned unit as expected
+ [4 chars consumed]
+pass: identifier("ABC", "ABCabc_") on ""
+ failed as expected
+pass: identifier("ABC", "ABCabc_") on "abc"
+ failed as expected
+pass: identifier("ABC", "ABCabc_") on "_"
+ failed as expected
+pass: identifier("ABC", "ABCabc_") on "A"
+ returned "A" as expected
+ [1 chars consumed]
+pass: identifier("ABC", "ABCabc_") on "Ab_c"
+ returned "Ab_c" as expected
+ [4 chars consumed]
+pass: identifier("ABC", "ABCabc_") on "*"
+ failed as expected
+pass: identifier("ABC", "ABCabc_") on "Abc !"
+ returned "Abc" as expected
+ [4 chars consumed]
+pass: identifier("αβγ", "αβγ_") on ""
+ failed as expected
+pass: identifier("αβγ", "αβγ_") on "abc"
+ failed as expected
+pass: identifier("αβγ", "αβγ_") on "_"
+ failed as expected
+pass: identifier("αβγ", "αβγ_") on "α"
+ returned "α" as expected
+ [1 chars consumed]
+pass: identifier("αβγ", "αβγ_") on "αβ_γ"
+ returned "αβ_γ" as expected
+ [4 chars consumed]
+pass: whitespace on ""
+ returned unit as expected
+ [0 chars consumed]
+pass: whitespace on "123"
+ returned unit as expected
+ [0 chars consumed]
+pass: whitespace on " "
+ returned unit as expected
+ [3 chars consumed]
+pass: whitespace on " 123"
+ returned unit as expected
+ [3 chars consumed]
+pass: skip_to_eol on ""
+ failed as expected
+pass: skip_to_eol on "blah blah
+"
+ returned unit as expected
+ [10 chars consumed]
+pass: skip_to_eol on "blah blah
+123"
+ returned unit as expected
+ [10 chars consumed]
+pass: eof on "123"
+ failed as expected
+pass: eof on ""
+ returned unit as expected
+ [0 chars consumed]
+pass: float_literal_as_string on ""
+ failed as expected
+pass: float_literal_as_string on "abc"
+ failed as expected
+pass: float_literal_as_string on "123"
+ failed as expected
+pass: float_literal_as_string on "123.0 abc"
+ returned "123.0" as expected
+ [8 chars consumed]
+pass: float_literal_as_string on "123.0e1 abc"
+ returned "123.0e1" as expected
+ [10 chars consumed]
+pass: float_literal_as_string on "-123.0 abc"
+ returned "-123.0" as expected
+ [9 chars consumed]
+pass: float_literal_as_string on "-123.0e1 abc"
+ returned "-123.0e1" as expected
+ [11 chars consumed]
+pass: float_literal_as_string on "-123.0E-1 abc"
+ returned "-123.0E-1" as expected
+ [12 chars consumed]
+pass: float_literal on ""
+ failed as expected
+pass: float_literal on "abc"
+ failed as expected
+pass: float_literal on "123"
+ failed as expected
+pass: float_literal on "123.0 abc"
+ returned 123.0 as expected
+ [8 chars consumed]
+pass: float_literal on "123.0e1 abc"
+ returned 1230.0 as expected
+ [10 chars consumed]
+pass: float_literal on "-123.0 abc"
+ returned -123.0 as expected
+ [9 chars consumed]
+pass: float_literal on "-123.0e1 abc"
+ returned -1230.0 as expected
+ [11 chars consumed]
+pass: float_literal on "-123.0E-1 abc"
+ returned -12.3 as expected
+ [12 chars consumed]
+pass: int_literal_as_string on ""
+ failed as expected
+pass: int_literal_as_string on "abc"
+ failed as expected
+pass: int_literal_as_string on "123.0"
+ failed as expected
+pass: int_literal_as_string on "123 abc"
+ returned "123" as expected
+ [6 chars consumed]
+pass: int_literal_as_string on "-123 abc"
+ returned "-123" as expected
+ [7 chars consumed]
+pass: int_literal_as_string on "999999999999999999999 abc"
+ returned "999999999999999999999" as expected
+ [24 chars consumed]
+pass: int_literal on ""
+ failed as expected
+pass: int_literal on "abc"
+ failed as expected
+pass: int_literal on "123.0"
+ failed as expected
+pass: int_literal on "123 abc"
+ returned 123 as expected
+ [6 chars consumed]
+pass: int_literal on "-123 abc"
+ returned -123 as expected
+ [7 chars consumed]
+pass: int_literal on "999999999999999999999 abc"
+ failed as expected
+pass: string_literal('\"') on ""
+ failed as expected
+pass: string_literal('\"') on ""123" abc"
+ returned "123" as expected
+ [8 chars consumed]
+pass: string_literal('\"') on ""1\"2\"3" abc"
+ returned "1\\\"2\\\"3" as expected
+ [12 chars consumed]
+pass: string_literal('\'') on ""
+ failed as expected
+pass: string_literal('\'') on "'123' abc"
+ returned "123" as expected
+ [8 chars consumed]
+pass: string_literal('\'') on "'1\'2\'3' abc"
+ returned "1\\\'2\\\'3" as expected
+ [12 chars consumed]
+pass: string_literal('‖') on ""
+ failed as expected
+pass: string_literal('‖') on "‖123‖ abc"
+ returned "123" as expected
+ [8 chars consumed]
+pass: string_literal('‖') on "‖αβγ‖ abc"
+ returned "αβγ" as expected
+ [8 chars consumed]
+pass: optional(punct("!")) on ""
+ returned no as expected
+ [0 chars consumed]
+pass: optional(punct("!")) on "abc"
+ returned no as expected
+ [0 chars consumed]
+pass: optional(punct("!")) on "! "
+ returned yes(unit) as expected
+ [4 chars consumed]
+pass: zero_or_more(punct("!")) on ""
+ returned [] as expected
+ [0 chars consumed]
+pass: zero_or_more(punct("!")) on "abc"
+ returned [] as expected
+ [0 chars consumed]
+pass: zero_or_more(punct("!")) on "!!! abc"
+ returned [unit, unit, unit] as expected
+ [6 chars consumed]
+pass: one_or_more(punct("!")) on ""
+ failed as expected
+pass: one_or_more(punct("!")) on "abc"
+ failed as expected
+pass: one_or_more(punct("!")) on "!!! abc"
+ returned [unit, unit, unit] as expected
+ [6 chars consumed]
+pass: brackets("(", ")", punct("!")) on ""
+ failed as expected
+pass: brackets("(", ")", punct("!")) on "abc"
+ failed as expected
+pass: brackets("(", ")", punct("!")) on "(abc)"
+ failed as expected
+pass: brackets("(", ")", punct("!")) on "(!) abc"
+ returned unit as expected
+ [6 chars consumed]
+pass: separated_list("+", punct("!")) on ""
+ returned [] as expected
+ [0 chars consumed]
+pass: separated_list("+", punct("!")) on "abc"
+ returned [] as expected
+ [0 chars consumed]
+pass: separated_list("+", punct("!")) on "! abc"
+ returned [unit] as expected
+ [4 chars consumed]
+pass: separated_list("+", punct("!")) on "!+ ! + ! abc"
+ returned [unit, unit, unit] as expected
+ [11 chars consumed]
+pass: comma_separated_list(punct("!")) on ""
+ returned [] as expected
+ [0 chars consumed]
+pass: comma_separated_list(punct("!")) on "abc"
+ returned [] as expected
+ [0 chars consumed]
+pass: comma_separated_list(punct("!")) on "! abc"
+ returned [unit] as expected
+ [4 chars consumed]
+pass: comma_separated_list(punct("!")) on "!, ! , ! abc"
+ returned [unit, unit, unit] as expected
+ [11 chars consumed]
+pass: optional(int_with_state) on "abc"
+ returned [] as expected
+ [0 chars consumed]
+pass: optional(int_with_state) on "1"
+ returned [1] as expected
+ [1 chars consumed]
+pass: zero_or_more(int_with_state) on "abc"
+ returned [] as expected
+ [0 chars consumed]
+pass: zero_or_more(int_with_state) on "1 2 3"
+ returned [3, 2, 1] as expected
+ [5 chars consumed]
+pass: one_or_more(int_with_state) on "abc"
+ failed as expected
+pass: one_or_more(int_with_state) on "1 2 3"
+ returned [3, 2, 1] as expected
+ [5 chars consumed]
+--
+Line = 2, Pos = 5
+Line = 2, Pos = 3
+Line = 7, Pos = 1
+Line = 4, Pos = 2
+Line = 3, Pos = 1
+Line = 1, Pos = 3
+Line = 1, Pos = 1
+Line = 2, Pos = 10
+Line = 3, Pos = 1
+Line = 1, Pos = 1
+--
+Line = 1, Pos = 1
+Line = 1, Pos = 2
+Line = 1, Pos = 3
+Line = 1, Pos = 4
+Line = 2, Pos = 1
+Line = 2, Pos = 2
+Line = 3, Pos = 1
+Line = 4, Pos = 1
+Line = 4, Pos = 2
+Line = 4, Pos = 3
+--
+expecting an operator
+12 + x-pow(x + 3; y)
+ ^
+syntax error
+abs(x ++ 3)
+ ^
+expecting an operator
+abs (x))
+ ^
+unknown function: f
+1 + 3 MoD 2 + f(3 + x)
+ ^
+expecting an operator
+1 + /* comment */ 3 mody 2 + f(3 + x)
+ ^
+expecting an operator
+1 + 1x
+ ^
+unterminated comment
+1 + 2 /* blah blah ...
+ ^
diff --git a/tests/general/test_parsing_utils.m b/tests/general/test_parsing_utils.m
index 2ca65f5..e9f0460 100644
--- a/tests/general/test_parsing_utils.m
+++ b/tests/general/test_parsing_utils.m
@@ -33,6 +33,7 @@
main(!IO) :-
unsorted_aggregate(run_test, io.write_string, !IO),
+ io.write_string("--\n", !IO),
test_pos("123456789\n123456789\n", 14, !IO),
test_pos("\n123456789\n123456789\n\n\n\n\n\n", 3, !IO),
test_pos("\n1234\n12\n\n\nfewefwef\nwwfwe\n\n", 20, !IO),
@@ -43,6 +44,18 @@ main(!IO) :-
test_pos("123456789\n123456789\n\n", 19, !IO),
test_pos("123456789\n123456789\n\n", 20, !IO),
test_pos("", 0, !IO),
+ io.write_string("--\n", !IO),
+ test_pos("ábc\n☿\n\n", 0, !IO),
+ test_pos("ábc\n☿\n\n", 1, !IO),
+ test_pos("ábc\n☿\n\n", 2, !IO),
+ test_pos("ábc\n☿\n\n", 3, !IO),
+ test_pos("ábc\n☿\n\n", 4, !IO),
+ test_pos("ábc\n☿\n\n", 5, !IO),
+ test_pos("ábc\n☿\n\n", 6, !IO),
+ test_pos("ábc\n☿\n\n", 7, !IO),
+ test_pos("ábc\n☿\n\n", 8, !IO),
+ test_pos("ábc\n☿\n\n", 9, !IO),
+ io.write_string("--\n", !IO),
test_err("12 + x-pow(x + 3; y)", expr_top, !IO),
test_err("abs(x ++ 3)", expr_top, !IO),
test_err("abs (x))", expr_top, !IO),
--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to: mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions: mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------
More information about the reviews
mailing list