[m-rev.] for review: fix some problems with non-ASCII strings
Peter Wang
novalazy at gmail.com
Tue May 17 11:03:02 AEST 2011
Branches: main
Fix some problems with non-ASCII strings.
compiler/error_util.m:
Make string formatting routines count code points instead of code
units.
Iterate over non-ASCII strings correctly.
compiler/llds_out_data.m:
compiler/stack_layout.m:
Explicitly count UTF-8 code units when counting the length of strings
that will be written to C source files, in case the compiler is built
in a grade which uses some other encoding. (The length argument to
the `MR_string_const' macro isn't actually used, so that change won't
have any practical effect.)
compiler/inst_check.m:
compiler/mercury_to_mercury.m:
compiler/structure_reuse.direct.choose_reuse.m:
Fix some code unit counts which should be code point counts.
compiler/make.m:
Iterate over non-ASCII characters correctly.
compiler/passes_aux.m:
Simplify the implementation of `stage_num_str'.
compiler/timestamp.m:
Make `string_to_timestamp' handle non-ASCII strings cleanly,
although they should never occur.
compiler/x86_64_out.m:
Split long comments at code point boundaries.
compiler/elds_to_erlang.m:
compiler/erl_code_gen.m:
compiler/mlds_to_cs.m:
compiler/pickle.m:
compiler/switch_util.m:
Add some comments relating to non-ASCII characters.
library/parsing_utils.m:
Fix the string and keyword parsers to work on non-ASCII characters.
library/pprint.m:
library/pretty_printer.m:
Fix code unit counts which should be code point counts.
library/string.m:
Add `count_utf8_code_units'.
Fix the portable implementation of `string.to_char_list' (not
actually used) to work on non-ASCII strings.
Make string formatting routines count code points instead of code
units.
library/term_io.m:
Use a direct string comparison to check string is non-empty.
tests/general/test_parsing_utils.exp:
tests/general/test_parsing_utils.m:
tests/hard_coded/test_pretty_printer.exp:
tests/hard_coded/test_pretty_printer.m:
Test non-ASCII strings.
diff --git a/compiler/elds_to_erlang.m b/compiler/elds_to_erlang.m
index f1e8ea9..76ec6ba 100644
--- a/compiler/elds_to_erlang.m
+++ b/compiler/elds_to_erlang.m
@@ -957,9 +957,10 @@ output_rtti_id(ModuleInfo, RttiId, !IO) :-
:- func shorten_long_atom_name(string) = string.
shorten_long_atom_name(Name0) = Name :-
- % Erlang atom names can be up to 255 characters long, but the Erlang
- % compiler may mangle it (e.g. to derive the names of anonymous functions)
- % which would then exceed the limit.
+ % Erlang atom names can be up to 255 characters (bytes) long, but the
+ % Erlang compiler may mangle it (e.g. to derive the names of anonymous
+ % functions) which would then exceed the limit.
+ % This assumes the atom name consists of only ASCII characters.
(if string.length(Name0) =< 200 then
Name = Name0
else
diff --git a/compiler/erl_code_gen.m b/compiler/erl_code_gen.m
index 1fbf700..5ae5286 100644
--- a/compiler/erl_code_gen.m
+++ b/compiler/erl_code_gen.m
@@ -799,9 +799,9 @@ erl_gen_switch(Var, CanFail, CasesList, CodeModel, InstMap0, _Context,
list.length(CasesList) > switch_strings_as_atoms_limit,
- % The Erlang implementation limits atoms to be 255 characters long or
- % less, so we don't use the workaround if any cases are longer than
- % that.
+ % The Erlang implementation limits atoms to be 255 characters (bytes)
+ % long or less, so we don't use the workaround if any cases are longer
+ % than that.
all [String] (
(
list.member(case(MainConsId, OtherConsIds, _), CasesList),
diff --git a/compiler/error_util.m b/compiler/error_util.m
index 6861c22..43fe26c 100644
--- a/compiler/error_util.m
+++ b/compiler/error_util.m
@@ -950,9 +950,9 @@ do_write_error_pieces(TreatAsFirst, MaybeContext, FixedIndent, MaxWidth,
MaybeContext = yes(Context),
term.context_file(Context, FileName),
term.context_line(Context, LineNumber),
- string.length(FileName, FileNameLength),
+ string.count_codepoints(FileName, FileNameLength),
string.int_to_string(LineNumber, LineNumberStr),
- string.length(LineNumberStr, LineNumberStrLength0),
+ string.count_codepoints(LineNumberStr, LineNumberStrLength0),
( LineNumberStrLength0 < 3 ->
LineNumberStrLength = 3
;
@@ -1342,10 +1342,8 @@ break_into_words(String, Words0, Words) :-
break_into_words_from(String, Cur, Words0, Words) :-
( find_word_start(String, Cur, Start) ->
find_word_end(String, Start, End),
- Length = End - Start + 1,
- string.substring(String, Start, Length, WordStr),
- Next = End + 1,
- break_into_words_from(String, Next, [plain_word(WordStr) | Words0],
+ string.substring(String, Start, End - Start, WordStr),
+ break_into_words_from(String, End, [plain_word(WordStr) | Words0],
Words)
;
Words = Words0
@@ -1354,9 +1352,8 @@ break_into_words_from(String, Cur, Words0, Words) :-
:- pred find_word_start(string::in, int::in, int::out) is semidet.
find_word_start(String, Cur, WordStart) :-
- string.index(String, Cur, Char),
+ string.unsafe_index_next(String, Cur, Next, Char),
( char.is_whitespace(Char) ->
- Next = Cur + 1,
find_word_start(String, Next, WordStart)
;
WordStart = Cur
@@ -1365,8 +1362,7 @@ find_word_start(String, Cur, WordStart) :-
:- pred find_word_end(string::in, int::in, int::out) is det.
find_word_end(String, Cur, WordEnd) :-
- Next = Cur + 1,
- ( string.index(String, Next, Char) ->
+ ( string.unsafe_index_next(String, Cur, Next, Char) ->
( char.is_whitespace(Char) ->
WordEnd = Cur
;
@@ -1451,7 +1447,7 @@ group_nonfirst_line_words(Words, Indent, Max, Lines) :-
list(string)::out, list(string)::out) is det.
get_line_of_words(FirstWord, LaterWords, Indent, Max, Line, RestWords) :-
- string.length(FirstWord, FirstWordLen),
+ string.count_codepoints(FirstWord, FirstWordLen),
Avail = Max - Indent * indent_increment,
get_later_words(LaterWords, FirstWordLen, Avail, [FirstWord],
Line, RestWords).
@@ -1461,7 +1457,7 @@ get_line_of_words(FirstWord, LaterWords, Indent, Max, Line, RestWords) :-
get_later_words([], _, _, Line, Line, []).
get_later_words([Word | Words], OldLen, Avail, Line0, Line, RestWords) :-
- string.length(Word, WordLen),
+ string.count_codepoints(Word, WordLen),
NewLen = OldLen + 1 + WordLen,
( NewLen =< Avail ->
list.append(Line0, [Word], Line1),
diff --git a/compiler/inst_check.m b/compiler/inst_check.m
index 64f5175..1492da3 100644
--- a/compiler/inst_check.m
+++ b/compiler/inst_check.m
@@ -247,7 +247,7 @@ find_types_for_functor(FunctorsToTypes, Functor, Types) :-
% Zero arity functors with length 1 could match the builtin
% character type.
Name = unqualified(NameStr),
- string.length(NameStr) = 1
+ string.count_codepoints(NameStr) = 1
->
TypesExceptTuple = [type_builtin(builtin_type_char)
| TypesExceptChar]
diff --git a/compiler/llds_out_data.m b/compiler/llds_out_data.m
index 6ab3124..171a2b1 100644
--- a/compiler/llds_out_data.m
+++ b/compiler/llds_out_data.m
@@ -1025,18 +1025,18 @@ output_rval_const(Info, Const, !IO) :-
Const = llconst_string(String),
io.write_string("MR_string_const(""", !IO),
c_util.output_quoted_string(String, !IO),
- string.length(String, StringLength),
io.write_string(""", ", !IO),
- io.write_int(StringLength, !IO),
+ io.write_int(string.count_utf8_code_units(String), !IO),
io.write_string(")", !IO)
;
- Const = llconst_multi_string(String),
+ Const = llconst_multi_string(Strings),
io.write_string("MR_string_const(""", !IO),
- c_util.output_quoted_multi_string(String, !IO),
+ c_util.output_quoted_multi_string(Strings, !IO),
io.write_string(""", ", !IO),
% The "+1" is for the NULL character.
- Length = list.foldl((func(S, L0) = L0 + length(S) + 1), String, 0),
+ SumLengths = (func(S, L0) = L0 + string.count_utf8_code_units(S) + 1),
+ Length = list.foldl(SumLengths, Strings, 0),
io.write_int(Length, !IO),
io.write_string(")", !IO)
;
diff --git a/compiler/make.m b/compiler/make.m
index f722e78..f377619 100644
--- a/compiler/make.m
+++ b/compiler/make.m
@@ -428,7 +428,7 @@ make_target(Globals, Target, Success, !Info, !IO) :-
classify_target(Globals, FileName, ModuleName - TargetType) :-
(
string.length(FileName, NameLength),
- search_backwards_for_dot(FileName, NameLength - 1, DotLocn),
+ search_backwards_for_dot(FileName, NameLength, DotLocn),
string.split(FileName, DotLocn, ModuleNameStr0, Suffix),
solutions(classify_target_2(Globals, ModuleNameStr0, Suffix),
TargetFiles),
@@ -549,11 +549,11 @@ classify_target_2(Globals, ModuleNameStr0, Suffix, ModuleName - TargetType) :-
:- pred search_backwards_for_dot(string::in, int::in, int::out) is semidet.
search_backwards_for_dot(String, Index, DotIndex) :-
- Index >= 0,
- ( string.index_det(String, Index, '.') ->
- DotIndex = Index
+ string.unsafe_prev_index(String, Index, CharIndex, Char),
+ ( Char = ('.') ->
+ DotIndex = CharIndex
;
- search_backwards_for_dot(String, Index - 1, DotIndex)
+ search_backwards_for_dot(String, CharIndex, DotIndex)
).
:- func get_executable_type(globals) = linked_target_type.
diff --git a/compiler/mercury_to_mercury.m b/compiler/mercury_to_mercury.m
index bdaada4..aeda650 100644
--- a/compiler/mercury_to_mercury.m
+++ b/compiler/mercury_to_mercury.m
@@ -4359,7 +4359,7 @@ mercury_limited_term_nq_to_string(VarSet, AppendVarnums, NextToGraphicToken,
Limit, Term) = String :-
mercury_format_term_nq(VarSet, AppendVarnums, NextToGraphicToken, Term,
"", FullString),
- FullLen = string.length(FullString),
+ FullLen = string.count_codepoints(FullString),
( FullLen =< Limit ->
String = FullString
;
diff --git a/compiler/mlds_to_cs.m b/compiler/mlds_to_cs.m
index 1585856..45f3b4c 100644
--- a/compiler/mlds_to_cs.m
+++ b/compiler/mlds_to_cs.m
@@ -2011,6 +2011,7 @@ write_identifier_string(String, !IO) :-
% Although the C# spec does not limit identifier lengths, the Microsoft
% compiler restricts identifiers to 511 characters and Mono restricts
% identifiers to 512 characters.
+ % This assumes the identifier contains only ASCII characters.
Length = string.length(String),
( Length > 511 ->
Left = string.left(String, 251),
diff --git a/compiler/passes_aux.m b/compiler/passes_aux.m
index 9ce8a64..8f5b86c 100644
--- a/compiler/passes_aux.m
+++ b/compiler/passes_aux.m
@@ -546,15 +546,7 @@ should_dump_stage(StageNum, StageNumStr, StageName, DumpStages) :-
)
).
-stage_num_str(StageNum) = StageNumStr :-
- int_to_string(StageNum, StageNumStr0),
- ( string.length(StageNumStr0, 1) ->
- StageNumStr = "00" ++ StageNumStr0
- ; string.length(StageNumStr0, 2) ->
- StageNumStr = "0" ++ StageNumStr0
- ;
- StageNumStr = StageNumStr0
- ).
+stage_num_str(StageNum) = string.format("%03d", [i(StageNum)]).
maybe_dump_hlds(HLDS, StageNum, StageName, !DumpInfo, !IO) :-
module_info_get_globals(HLDS, Globals),
diff --git a/compiler/pickle.m b/compiler/pickle.m
index 616289f..23a8ca0 100644
--- a/compiler/pickle.m
+++ b/compiler/pickle.m
@@ -393,6 +393,7 @@ sign_extend_32(X) = R :-
:- pred pickle_char(char::in, io::di, io::uo) is det.
pickle_char(Char, !IO) :-
+ % XXX handle non-ASCII characters
char.to_int(Char, Int),
io.write_byte(Int, !IO).
diff --git a/compiler/stack_layout.m b/compiler/stack_layout.m
index 7cda580..55a0e3c 100644
--- a/compiler/stack_layout.m
+++ b/compiler/stack_layout.m
@@ -117,6 +117,7 @@
:- import_module parse_tree.prog_event.
:- import_module bool.
+:- import_module char.
:- import_module cord.
:- import_module counter.
:- import_module int.
@@ -2533,7 +2534,7 @@ lookup_string_in_table(String, Offset, !StringTable) :-
( map.search(TableMap0, String, OldOffset) ->
Offset = OldOffset
;
- string.length(String, Length),
+ Length = string.count_utf8_code_units(String),
TableOffset = TableOffset0 + Length + 1,
% We use a 32 bit unsigned integer to represent the offset.
% Computing that limit exactly without getting an overflow
diff --git a/compiler/structure_reuse.direct.choose_reuse.m b/compiler/structure_reuse.direct.choose_reuse.m
index 0f58aa9..fd92de8 100644
--- a/compiler/structure_reuse.direct.choose_reuse.m
+++ b/compiler/structure_reuse.direct.choose_reuse.m
@@ -1294,7 +1294,7 @@ line_length = 79.
dump_line(Msg, !IO) :-
Prefix = "%---",
Start = string.append(Prefix, Msg),
- Remainder = line_length - string.length(Start) - 1,
+ Remainder = line_length - string.count_codepoints(Start) - 1,
Line = Start ++ string.duplicate_char('-', Remainder),
io.write_string(Line, !IO),
io.write_string("%\n", !IO).
diff --git a/compiler/switch_util.m b/compiler/switch_util.m
index 1f14313..ab01d49 100644
--- a/compiler/switch_util.m
+++ b/compiler/switch_util.m
@@ -543,6 +543,7 @@ estimate_switch_tag_test_cost(Tag) = Cost :-
% the end of the string. The multiplication is an attempt to factor in
% the fact that each character comparison is in a loop, and thus takes
% more than one instruction.
+ % On non-ASCII strings, this cost depends on the compiler back-end.
Cost = 1 + 2 * string.length(String)
;
Tag = shared_with_reserved_addresses_tag(RAs, SubTag),
diff --git a/compiler/timestamp.m b/compiler/timestamp.m
index 81be210..e28f41f 100644
--- a/compiler/timestamp.m
+++ b/compiler/timestamp.m
@@ -53,6 +53,7 @@
:- implementation.
+:- import_module char.
:- import_module int.
:- import_module maybe.
:- import_module string.
@@ -153,9 +154,11 @@ timestamp_to_string(timestamp(Timestamp)) = Timestamp.
string_to_timestamp(Timestamp) = timestamp(Timestamp) :-
% The if-then-else here is to force order of evaluation --
- % we need to ensure that the length check occurs before the
- % calls to unsafe_undex to avoid dereferencing invalid pointers.
+ % we need to ensure that the sanity checks occur before the
+ % calls to unsafe_index. The offsets are only valid if the string
+ % contains only ASCII characters, as expected.
(
+ string.all_match(plausible_timestamp_char, Timestamp),
string.length(Timestamp) : int = string.length("yyyy-mm-dd hh:mm:ss")
->
string.to_int(string.unsafe_substring(Timestamp, 0, 4), _),
@@ -192,3 +195,10 @@ string_to_timestamp(Timestamp) = timestamp(Timestamp) :-
;
fail
).
+
+:- pred plausible_timestamp_char(char::in) is semidet.
+
+plausible_timestamp_char(Char) :-
+ char.to_int(Char, CharInt),
+ char.to_int(':', HighestInt),
+ CharInt =< HighestInt.
diff --git a/compiler/x86_64_out.m b/compiler/x86_64_out.m
index 0dbc518..2145434 100644
--- a/compiler/x86_64_out.m
+++ b/compiler/x86_64_out.m
@@ -509,10 +509,11 @@ output_x86_64_instr_list(Stream, Instrs, !IO) :-
io::di, io::uo) is det <= stream.writer(Stream, string, io).
output_x86_64_instr(Stream, x86_64_comment(Comment), !IO) :-
- ( string.length(Comment) > 0 ->
+ ( Comment \= "" ->
put(Stream, "\t# ", !IO),
- ( string.length(Comment) > comment_length ->
- string.split(Comment, comment_length, Comment1, Comment2),
+ ( string.count_codepoints(Comment) > comment_length ->
+ string.split_by_codepoint(Comment, comment_length,
+ Comment1, Comment2),
put(Stream, string.word_wrap(Comment1, comment_length), !IO),
put(Stream, "\n", !IO),
output_x86_64_instr(Stream, x86_64_comment(Comment2), !IO)
@@ -523,7 +524,7 @@ output_x86_64_instr(Stream, x86_64_comment(Comment), !IO) :-
true
).
output_x86_64_instr(Stream, x86_64_label(LabelName), !IO) :-
- ( string.length(LabelName) > 0 ->
+ ( LabelName \= "" ->
put(Stream, "\n" ++ LabelName ++ ":", !IO)
;
true
@@ -1057,7 +1058,7 @@ output_x86_64_inst(Stream, xor(Src, Dest), !IO) :-
is det <= stream.writer(Stream, string, io).
output_x86_64_comment(Stream, Comment, !IO) :-
- ( string.length(Comment) > 0 ->
+ ( Comment \= "" ->
put(Stream, "\t# ", !IO),
put(Stream, Comment, !IO)
;
diff --git a/library/parsing_utils.m b/library/parsing_utils.m
index 0114bb5..a29138a 100644
--- a/library/parsing_utils.m
+++ b/library/parsing_utils.m
@@ -159,6 +159,7 @@
% ikeyword(IdChars, Keyword, Src, _, !PS)
% Case-insensitive version of keyword/6.
+ % Only upper and lowercase unaccented Latin letters are treated specially.
%
:- pred ikeyword(string::in, string::in, src::in, unit::out,
ps::in, ps::out) is semidet.
@@ -498,11 +499,11 @@ eof(Src, unit, !PS) :-
next_char(Src, Char, !PS) :-
promise_pure (
- current_offset(Src, Offset, !PS),
+ current_offset(Src, Offset, !.PS, _),
Offset < Src ^ input_length,
- Char = Src ^ input_string ^ unsafe_elem(Offset),
+ string.unsafe_index_next(Src ^ input_string, Offset, NextOffset, Char),
impure record_progress(Src, Offset),
- !:PS = !.PS + 1
+ !:PS = NextOffset
).
%-----------------------------------------------------------------------------%
@@ -538,7 +539,8 @@ match_string(MatchStr, Src, PS, PS + N) :-
match_string_2(N, I, MatchStr, Offset, Str) :-
( if I < N then
- MatchStr ^ unsafe_elem(I) = Str ^ unsafe_elem(Offset + I),
+ string.unsafe_index_code_unit(MatchStr, I, CodeUnit),
+ string.unsafe_index_code_unit(Str, Offset + I, CodeUnit),
match_string_2(N, I + 1, MatchStr, Offset, Str)
else
true
@@ -559,9 +561,14 @@ imatch_string(MatchStr, Src, PS, PS + N) :-
imatch_string_2(N, I, MatchStr, Offset, Str) :-
( if I < N then
- char.to_upper(MatchStr ^ unsafe_elem(I), Chr1),
- char.to_upper(Str ^ unsafe_elem(Offset + I), Chr2),
- Chr1 = Chr2,
+ % We can compare by code units because char.to_upper only converts
+ % letters in the ASCII range, and ASCII characters are always encoded
+ % in a single code unit.
+ string.unsafe_index_code_unit(MatchStr, I, CodeUnit1),
+ string.unsafe_index_code_unit(Str, Offset + I, CodeUnit2),
+ char.det_from_int(CodeUnit1, Chr1),
+ char.det_from_int(CodeUnit2, Chr2),
+ char.to_upper(Chr1) = char.to_upper(Chr2) : char,
imatch_string_2(N, I + 1, MatchStr, Offset, Str)
else
true
@@ -824,12 +831,13 @@ digits_2(Base, Src, unit, !PS) :-
%-----------------------------------------------------------------------------%
string_literal(QuoteChar, Src, String, !PS) :-
- current_offset(Src, Start, !PS),
next_char(Src, QuoteChar, !PS),
+ current_offset(Src, Start, !PS),
string_literal_2(Src, QuoteChar, _, !PS),
current_offset(Src, EndPlusOne, !PS),
+ string.unsafe_prev_index(Src ^ input_string, EndPlusOne, End, QuoteChar),
skip_whitespace(Src, !PS),
- input_substring(Src, Start + 1, EndPlusOne - 1, String).
+ input_substring(Src, Start, End, String).
%-----------------------------------------------------------------------------%
diff --git a/library/pprint.m b/library/pprint.m
index ba19416..8b6cda9 100644
--- a/library/pprint.m
+++ b/library/pprint.m
@@ -496,7 +496,7 @@ lb(P, W, K0, K, I, 'LABEL'(L, X), S0, S) :-
lb(P, W, K0, K, I ++ L, X, S0, S).
lb(P, _, _, K, I, 'LINE', S0, S) :-
- K = string.length(I),
+ K = string.count_codepoints(I),
P("\n", S0, S1),
P(I, S1, S ).
@@ -509,7 +509,7 @@ lb(P, W, K0, K, I, 'DOC'(D, U), S0, S) :-
lb(P, W, K0, K, I, to_doc(D, univ_value(U)), S0, S).
lb(P, _, K0, K, _, 'TEXT'(T), S0, S) :-
- K = K0 + string.length(T),
+ K = K0 + string.count_codepoints(T),
P(T, S0, S).
%-----------------------------------------------------------------------------%
@@ -531,7 +531,7 @@ ff('LINE', R) = R.
ff('GROUP'(X), R) = ff(X, R).
ff('DOC'(D, U), R) = ff(to_doc(D, univ_value(U)), R).
ff('TEXT'(S), R) = R - L :-
- L = string.length(S),
+ L = string.count_codepoints(S),
R > L.
%-----------------------------------------------------------------------------%
@@ -563,7 +563,7 @@ layout_flat(P, K0, K, 'DOC'(D, U), S0, S) :-
layout_flat(P, K0, K, to_doc(D, univ_value(U)), S0, S).
layout_flat(P, K0, K, 'TEXT'(T), S0, S) :-
- K = K0 + string.length(T),
+ K = K0 + string.count_codepoints(T),
P(T, S0, S).
%-----------------------------------------------------------------------------%
diff --git a/library/pretty_printer.m b/library/pretty_printer.m
index 8433d0a..f959638 100644
--- a/library/pretty_printer.m
+++ b/library/pretty_printer.m
@@ -381,12 +381,13 @@ write_doc_to_stream(Stream, Canonicalize, FMap, LineWidth, [Doc | Docs0],
%
Doc = str(String),
stream.put(Stream, String, !IO),
- !:RemainingWidth = !.RemainingWidth - string.length(String),
+ !:RemainingWidth = !.RemainingWidth -
+ string.count_codepoints(String),
Docs = Docs0
;
Doc = nl,
( if
- F = ( func(S, W) = string.length(S) + W ),
+ F = ( func(S, W) = string.count_codepoints(S) + W ),
IndentWidth = list.foldl(F, !.Indents, 0),
!.RemainingWidth < LineWidth - IndentWidth
then
@@ -479,7 +480,7 @@ output_current_group(Stream, LineWidth, Indents, OpenGroups,
[Doc | Docs0], Docs, !RemainingWidth, !RemainingLines, !IO) :-
( if Doc = str(String) then
stream.put(Stream, String, !IO),
- !:RemainingWidth = !.RemainingWidth - string.length(String),
+ !:RemainingWidth = !.RemainingWidth - string.count_codepoints(String),
output_current_group(Stream, LineWidth, Indents, OpenGroups,
Docs0, Docs, !RemainingWidth, !RemainingLines, !IO)
else if Doc = hard_nl then
@@ -545,7 +546,8 @@ expand_docs(Canonicalize, FMap, [Doc | Docs0], Docs, OpenGroups,
else
(
Doc = str(String),
- !:RemainingWidth = !.RemainingWidth - string.length(String),
+ !:RemainingWidth = !.RemainingWidth -
+ string.count_codepoints(String),
Docs = [Doc | Docs1],
expand_docs(Canonicalize, FMap, Docs0, Docs1, OpenGroups,
!Limit, !Pri, !RemainingWidth)
@@ -638,7 +640,7 @@ output_indentation(_Stream, [], !RemainingWidth, !IO).
output_indentation(Stream, [Indent | Indents], !RemainingWidth, !IO) :-
output_indentation(Stream, Indents, !RemainingWidth, !IO),
stream.put(Stream, Indent, !IO),
- !:RemainingWidth = !.RemainingWidth - string.length(Indent).
+ !:RemainingWidth = !.RemainingWidth - string.count_codepoints(Indent).
%-----------------------------------------------------------------------------%
diff --git a/library/string.m b/library/string.m
index 0806723..770a450 100644
--- a/library/string.m
+++ b/library/string.m
@@ -94,6 +94,11 @@
:- func string.count_codepoints(string) = int.
:- pred string.count_codepoints(string::in, int::out) is det.
+ % Determine the number of code units required to represent a string
+ % in UTF-8 encoding.
+ %
+:- func string.count_utf8_code_units(string) = int.
+
% string.codepoint_offset(String, CodePointCount, CodePointOffset):
% Equivalent to `string.codepoint_offset(String, 0, CodePointCount,
% CodePointOffset)'.
@@ -605,7 +610,7 @@
% The length (in code units) of the maximal suffix of `String' consisting
% entirely of characters (code points) satisfying Pred.
%
-:- func suffix_length(pred(char)::in(pred(in) is semidet), string::in)
+:- func string.suffix_length(pred(char)::in(pred(in) is semidet), string::in)
= (int::out) is det.
% string.set_char(Char, Index, String0, String):
@@ -1101,7 +1106,7 @@ string.to_int(String, Int) :-
string.base_string_to_int(Base, String, Int) :-
string.index(String, 0, Char),
- Len = string.length(String),
+ Len = string.count_codepoints(String),
( Char = ('-') ->
Len > 1,
foldl_substring(accumulate_negative_int(Base), String, 1,
@@ -1614,19 +1619,7 @@ string.to_char_list(Str::uo, CharList::in) :-
").
string.to_char_list_2(Str, CharList) :-
- string.to_char_list_3(Str, string.length(Str) - 1, [], CharList).
-
-:- pred string.to_char_list_3(string::in, int::in,
- list(char)::di, list(char)::uo) is det.
-
-string.to_char_list_3(Str, Index, CharList0, CharList) :-
- ( Index >= 0 ->
- string.unsafe_index(Str, Index, Char),
- CharList1 = [Char | CharList0],
- string.to_char_list_3(Str, Index - 1, CharList1, CharList)
- ;
- CharList = CharList0
- ).
+ string.foldr(list.cons, Str, [], CharList).
%-----------------------------------------------------------------------------%
@@ -3258,7 +3251,7 @@ format_int(Flags, Width, Prec, Int) = String :-
AbsInteger = integer.abs(Integer),
AbsIntStr = integer.to_string(AbsInteger)
),
- AbsIntStrLength = string.length(AbsIntStr),
+ AbsIntStrLength = string.count_codepoints(AbsIntStr),
% Do we need to increase precision?
(
@@ -3273,7 +3266,7 @@ format_int(Flags, Width, Prec, Int) = String :-
% Do we need to pad to the field width?
(
Width = yes(FieldWidth),
- FieldWidth > string.length(PrecStr),
+ FieldWidth > string.count_codepoints(PrecStr),
member('0', Flags),
\+ member('-', Flags),
Prec = no
@@ -3324,7 +3317,7 @@ format_unsigned_int(Flags, Width, Prec, Base, Int, IsTypeP, Prefix) = String :-
AbsIntStr = AbsIntStr0
)
),
- AbsIntStrLength = string.length(AbsIntStr),
+ AbsIntStrLength = string.count_codepoints(AbsIntStr),
% Do we need to increase precision?
(
@@ -3350,7 +3343,7 @@ format_unsigned_int(Flags, Width, Prec, Base, Int, IsTypeP, Prefix) = String :-
% Do we need to pad to the field width?
(
Width = yes(FieldWidth),
- FieldWidth > string.length(PrecModStr),
+ FieldWidth > string.count_codepoints(PrecModStr),
member('0', Flags),
\+ member('-', Flags),
Prec = no
@@ -3412,7 +3405,7 @@ format_float(Flags, Width, Prec, Float) = NewFloat :-
\+ member('#', Flags),
Prec = yes(0)
->
- PrecStrLen = string.length(PrecStr),
+ PrecStrLen = string.count_codepoints(PrecStr),
PrecModStr = string.substring(PrecStr, 0, PrecStrLen - 1)
;
PrecModStr = PrecStr
@@ -3422,7 +3415,7 @@ format_float(Flags, Width, Prec, Float) = NewFloat :-
% Do we need to change field width?
(
Width = yes(FieldWidth),
- FieldWidth > string.length(PrecModStr),
+ FieldWidth > string.count_codepoints(PrecModStr),
member('0', Flags),
\+ member('-', Flags)
->
@@ -3469,7 +3462,7 @@ format_scientific_number_g(Flags, Width, Prec, Float, E) = NewFloat :-
%
(
Width = yes(FieldWidth),
- FieldWidth > string.length(PrecStr),
+ FieldWidth > string.count_codepoints(PrecStr),
member('0', Flags),
\+ member('-', Flags)
->
@@ -3521,7 +3514,7 @@ format_scientific_number(Flags, Width, Prec, Float, E) = NewFloat :-
% Do we need to change field width?
(
Width = yes(FieldWidth),
- FieldWidth > string.length(PrecModStr),
+ FieldWidth > string.count_codepoints(PrecModStr),
member('0', Flags),
\+ member('-', Flags)
->
@@ -3802,7 +3795,7 @@ change_to_e_notation(Float, Prec, E) = ScientificFloat :-
% Is mantissa greater than one digit long?
split_at_decimal_point(UnsafeBase, MantissaStr, _FractionStr),
- ( string.length(MantissaStr) > 1 ->
+ ( string.count_codepoints(MantissaStr) > 1 ->
% Need to append 0, to fix the problem of having no numbers
% after the decimal point.
SafeBase = calculate_base_unsafe(string.append(UnsafeBase, "0"),
@@ -3842,7 +3835,7 @@ size_of_required_exponent(Float, Prec) = Exponent :-
% Is mantissa one digit long?
split_at_decimal_point(UnsafeBase, MantissaStr, _FractionStr),
- ( string.length(MantissaStr) > 1 ->
+ ( string.count_codepoints(MantissaStr) > 1 ->
% We will need to move decimal pt one place to the left:
% therefore, increment exponent.
Exponent = UnsafeExponent + 1
@@ -3883,7 +3876,7 @@ remove_zeros(CharNum) = TrimmedNum :-
decimal_pos(Float) = Pos :-
split_at_decimal_point(Float, MantissaStr, _FractionStr),
- NumZeros = string.length(MantissaStr) - 1,
+ NumZeros = string.count_codepoints(MantissaStr) - 1,
Pos = find_non_zero_pos(string.to_char_list(Float), NumZeros).
% Given a list of chars representing a floating point number, this
@@ -3951,7 +3944,7 @@ calculate_base_unsafe(Float, Prec) = Exp :-
change_precision(Prec, OldFloat) = NewFloat :-
split_at_decimal_point(OldFloat, MantissaStr, FractionStr),
- FracStrLen = string.length(FractionStr),
+ FracStrLen = string.count_codepoints(FractionStr),
( Prec > FracStrLen ->
PrecFracStr = string.pad_right(FractionStr, '0', Prec),
PrecMantissaStr = MantissaStr
@@ -3965,7 +3958,10 @@ change_precision(Prec, OldFloat) = NewFloat :-
NewPrecFrac = string.det_to_int(UnroundedFrac) + 1,
NewPrecFracStrNotOK = string.int_to_string( NewPrecFrac),
NewPrecFracStr = string.pad_left(NewPrecFracStrNotOK, '0', Prec),
- ( string.length(NewPrecFracStr) > string.length(UnroundedFrac) ->
+ (
+ string.count_codepoints(NewPrecFracStr) >
+ string.count_codepoints(UnroundedFrac)
+ ->
PrecFracStr = substring(NewPrecFracStr, 1, Prec),
PrecMantissaInt = det_to_int(MantissaStr) + 1,
PrecMantissaStr = int_to_string(PrecMantissaInt)
@@ -4995,6 +4991,38 @@ count_codepoints_2(String, I, Count0, Count) :-
Count = String.codePointCount(0, String.length());
").
+/*-----------------------------------------------------------------------*/
+
+:- pragma foreign_proc("C",
+ string.count_utf8_code_units(Str::in) = (Length::out),
+ [will_not_call_mercury, promise_pure, thread_safe],
+"
+ Length = strlen(Str);
+").
+:- pragma foreign_proc("Erlang",
+ string.count_utf8_code_units(Str::in) = (Length::out),
+ [will_not_call_mercury, promise_pure, thread_safe],
+"
+ Length = size(Str)
+").
+
+string.count_utf8_code_units(String) = Length :-
+ string.foldl(count_utf8_code_units_2, String, 0, Length).
+
+:- pred count_utf8_code_units_2(char::in, int::in, int::out) is det.
+
+count_utf8_code_units_2(Char, !Length) :-
+ char.to_int(Char, CharInt),
+ ( CharInt =< 0x7f ->
+ !:Length = !.Length + 1
+ ; char.to_utf8(Char, UTF8) ->
+ !:Length = !.Length + list.length(UTF8)
+ ;
+ error("string.count_utf8_code_units: char.to_utf8 failed")
+ ).
+
+/*-----------------------------------------------------------------------*/
+
% Note: we do not define what happens with unpaired surrogates.
%
string.codepoint_offset(String, N, Index) :-
diff --git a/library/term_io.m b/library/term_io.m
index 5407d56..cbf64ef 100644
--- a/library/term_io.m
+++ b/library/term_io.m
@@ -652,7 +652,7 @@ should_atom_be_quoted(S, NextToGraphicToken) = ShouldQuote :-
;
% Graphic token (6.4.2)
string.all_match(lexer.graphic_token_char, S),
- string.length(S) > 0,
+ S \= "",
% We need to quote tokens starting with '#', because Mercury uses
% '#' to start source line number indicators.
diff --git a/tests/general/test_parsing_utils.exp b/tests/general/test_parsing_utils.exp
index 6f78c66..977e8be 100644
--- a/tests/general/test_parsing_utils.exp
+++ b/tests/general/test_parsing_utils.exp
@@ -28,6 +28,21 @@ pass: keyword("ABC", "ABC") on "ABCA"
pass: keyword("ABC", "ABC") on "ABC 123"
returned unit as expected
[4 chars consumed]
+pass: keyword("αβγ", "αβγ") on ""
+ failed as expected
+pass: keyword("αβγ", "αβγ") on "123"
+ failed as expected
+pass: keyword("αβγ", "αβγ") on "αβγα"
+ failed as expected
+pass: keyword("αβγ", "αβγ") on "αβγ 123"
+ returned unit as expected
+ [4 code points consumed]
+pass: keyword("ABC", "ABC") on "abc 123"
+ returned unit as expected
+ [4 chars consumed]
+pass: ikeyword("αβγ", "αβγ") on "αβγ 123"
+ returned unit as expected
+ [4 code points consumed]
pass: identifier("ABC", "ABCabc_") on ""
failed as expected
pass: identifier("ABC", "ABCabc_") on "abc"
@@ -45,6 +60,18 @@ pass: identifier("ABC", "ABCabc_") on "*"
pass: identifier("ABC", "ABCabc_") on "Abc !"
returned "Abc" as expected
[4 chars consumed]
+pass: identifier("αβγ", "αβγ_") on ""
+ failed as expected
+pass: identifier("αβγ", "αβγ_") on "abc"
+ failed as expected
+pass: identifier("αβγ", "αβγ_") on "_"
+ failed as expected
+pass: identifier("αβγ", "αβγ_") on "α"
+ returned "α" as expected
+ [1 code points consumed]
+pass: identifier("αβγ", "αβγ_") on "αβ_γ"
+ returned "αβ_γ" as expected
+ [4 code points consumed]
pass: whitespace on ""
returned unit as expected
[0 chars consumed]
@@ -159,6 +186,14 @@ pass: string_literal('\'') on "'123' abc"
pass: string_literal('\'') on "'1\'2\'3' abc"
returned "1\\\'2\\\'3" as expected
[12 chars consumed]
+pass: string_literal('‖') on ""
+ failed as expected
+pass: string_literal('‖') on "‖123‖ abc"
+ returned "123" as expected
+ [8 code points consumed]
+pass: string_literal('‖') on "‖αβγ‖ abc"
+ returned "αβγ" as expected
+ [8 code points consumed]
pass: optional(punct("!")) on ""
returned no as expected
[0 chars consumed]
diff --git a/tests/general/test_parsing_utils.m b/tests/general/test_parsing_utils.m
index 24e53af..2ca65f5 100644
--- a/tests/general/test_parsing_utils.m
+++ b/tests/general/test_parsing_utils.m
@@ -97,15 +97,25 @@ run_test(Result) :-
PassFail = "fail"
)
),
+ (
+ CurrentOffset = yes(CO),
+ input_substring(Src, 0, CO, Substring)
+ ->
+ NumCodePoints = string.count_codepoints(Substring),
+ NumCodeUnits = string.count_code_units(Substring),
+ ( NumCodeUnits = NumCodePoints ->
+ What = "chars"
+ ;
+ What = "code points"
+ ),
+ Consumed = string.format("\n\t[%d %s consumed]",
+ [i(NumCodePoints), s(What)])
+ ;
+ Consumed = ""
+ ),
Result = PassFail ++ ": " ++
ParserName ++ " on \"" ++ TestString ++ "\"\n\t" ++
- Outcome ++
- ( if CurrentOffset = yes(CO) then
- string.format("\n\t[%d chars consumed]", [i(CO)])
- else
- ""
- ) ++
- "\n".
+ Outcome ++ Consumed ++ "\n".
%-----------------------------------------------------------------------------%
@@ -146,6 +156,20 @@ test_case("keyword(\"ABC\", \"ABC\")", stringify(keyword("ABC", "ABC")),
test_case("keyword(\"ABC\", \"ABC\")", stringify(keyword("ABC", "ABC")),
"ABC 123", yes("unit")).
+test_case("keyword(\"αβγ\", \"αβγ\")", stringify(keyword("αβγ", "αβγ")),
+ "", no).
+test_case("keyword(\"αβγ\", \"αβγ\")", stringify(keyword("αβγ", "αβγ")),
+ "123", no).
+test_case("keyword(\"αβγ\", \"αβγ\")", stringify(keyword("αβγ", "αβγ")),
+ "αβγα", no).
+test_case("keyword(\"αβγ\", \"αβγ\")", stringify(keyword("αβγ", "αβγ")),
+ "αβγ 123", yes("unit")).
+
+test_case("keyword(\"ABC\", \"ABC\")", stringify(ikeyword("ABC", "ABC")),
+ "abc 123", yes("unit")).
+test_case("ikeyword(\"αβγ\", \"αβγ\")", stringify(ikeyword("αβγ", "αβγ")),
+ "αβγ 123", yes("unit")).
+
test_case("identifier(\"ABC\", \"ABCabc_\")", stringify(identifier("ABC", "ABCabc_")),
"", no).
test_case("identifier(\"ABC\", \"ABCabc_\")", stringify(identifier("ABC", "ABCabc_")),
@@ -161,6 +185,17 @@ test_case("identifier(\"ABC\", \"ABCabc_\")", stringify(identifier("ABC", "ABCab
test_case("identifier(\"ABC\", \"ABCabc_\")", stringify(identifier("ABC", "ABCabc_")),
"Abc !", yes("\"Abc\"")).
+test_case("identifier(\"αβγ\", \"αβγ_\")", stringify(identifier("αβγ", "αβγ_")),
+ "", no).
+test_case("identifier(\"αβγ\", \"αβγ_\")", stringify(identifier("αβγ", "αβγ_")),
+ "abc", no).
+test_case("identifier(\"αβγ\", \"αβγ_\")", stringify(identifier("αβγ", "αβγ_")),
+ "_", no).
+test_case("identifier(\"αβγ\", \"αβγ_\")", stringify(identifier("αβγ", "αβγ_")),
+ "α", yes("\"α\"")).
+test_case("identifier(\"αβγ\", \"αβγ_\")", stringify(identifier("αβγ", "αβγ_")),
+ "αβ_γ", yes("\"αβ_γ\"")).
+
test_case("whitespace", stringify(whitespace),
"", yes("unit")).
test_case("whitespace", stringify(whitespace),
@@ -255,6 +290,13 @@ test_case("string_literal('\\\'')", stringify(string_literal('\'')),
test_case("string_literal('\\\'')", stringify(string_literal('\'')),
"\'1\\\'2\\\'3\' abc", yes("\"1\\\\\\\'2\\\\\\\'3\"")).
+test_case("string_literal('‖')", stringify(string_literal('‖')),
+ "", no).
+test_case("string_literal('‖')", stringify(string_literal('‖')),
+ "‖123‖ abc", yes("\"123\"")).
+test_case("string_literal('‖')", stringify(string_literal('‖')),
+ "‖αβγ‖ abc", yes("\"αβγ\"")).
+
test_case("optional(punct(\"!\"))", stringify(optional(punct("!"))),
"", yes("no")).
test_case("optional(punct(\"!\"))", stringify(optional(punct("!"))),
diff --git a/tests/hard_coded/test_pretty_printer.exp b/tests/hard_coded/test_pretty_printer.exp
index 1dd2230..5a8d764 100644
--- a/tests/hard_coded/test_pretty_printer.exp
+++ b/tests/hard_coded/test_pretty_printer.exp
@@ -525,7 +525,7 @@ limit = triangular(10), max lines = 3, line width = 38
limit = triangular(10), max lines = 3, line width = 38
|------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|------------------------------------|
limit = triangular(10), max lines = 3, line width = 38
@@ -600,7 +600,7 @@ limit = triangular(10), max lines = 3, line width = 78
limit = triangular(10), max lines = 3, line width = 78
|----------------------------------------------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|----------------------------------------------------------------------------|
limit = triangular(10), max lines = 3, line width = 78
@@ -674,7 +674,7 @@ limit = triangular(10), max lines = 10, line width = 38
limit = triangular(10), max lines = 10, line width = 38
|------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|------------------------------------|
limit = triangular(10), max lines = 10, line width = 38
@@ -765,7 +765,7 @@ limit = triangular(10), max lines = 10, line width = 78
limit = triangular(10), max lines = 10, line width = 78
|----------------------------------------------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|----------------------------------------------------------------------------|
limit = triangular(10), max lines = 10, line width = 78
@@ -834,7 +834,7 @@ limit = triangular(100), max lines = 3, line width = 38
limit = triangular(100), max lines = 3, line width = 38
|------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|------------------------------------|
limit = triangular(100), max lines = 3, line width = 38
@@ -919,7 +919,7 @@ limit = triangular(100), max lines = 3, line width = 78
limit = triangular(100), max lines = 3, line width = 78
|----------------------------------------------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|----------------------------------------------------------------------------|
limit = triangular(100), max lines = 3, line width = 78
@@ -1009,7 +1009,7 @@ limit = triangular(100), max lines = 10, line width = 38
limit = triangular(100), max lines = 10, line width = 38
|------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|------------------------------------|
limit = triangular(100), max lines = 10, line width = 38
@@ -1138,7 +1138,7 @@ limit = triangular(100), max lines = 10, line width = 78
limit = triangular(100), max lines = 10, line width = 78
|----------------------------------------------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|----------------------------------------------------------------------------|
limit = triangular(100), max lines = 10, line width = 78
@@ -1235,7 +1235,7 @@ limit = linear(10), max lines = 3, line width = 38
limit = linear(10), max lines = 3, line width = 38
|------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|------------------------------------|
limit = linear(10), max lines = 3, line width = 38
@@ -1304,7 +1304,7 @@ limit = linear(10), max lines = 3, line width = 78
limit = linear(10), max lines = 3, line width = 78
|----------------------------------------------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|----------------------------------------------------------------------------|
limit = linear(10), max lines = 3, line width = 78
@@ -1368,7 +1368,7 @@ limit = linear(10), max lines = 10, line width = 38
limit = linear(10), max lines = 10, line width = 38
|------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|------------------------------------|
limit = linear(10), max lines = 10, line width = 38
@@ -1446,7 +1446,7 @@ limit = linear(10), max lines = 10, line width = 78
limit = linear(10), max lines = 10, line width = 78
|----------------------------------------------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|----------------------------------------------------------------------------|
limit = linear(10), max lines = 10, line width = 78
@@ -1510,7 +1510,7 @@ limit = linear(100), max lines = 3, line width = 38
limit = linear(100), max lines = 3, line width = 38
|------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|------------------------------------|
limit = linear(100), max lines = 3, line width = 38
@@ -1595,7 +1595,7 @@ limit = linear(100), max lines = 3, line width = 78
limit = linear(100), max lines = 3, line width = 78
|----------------------------------------------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|----------------------------------------------------------------------------|
limit = linear(100), max lines = 3, line width = 78
@@ -1685,7 +1685,7 @@ limit = linear(100), max lines = 10, line width = 38
limit = linear(100), max lines = 10, line width = 38
|------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|------------------------------------|
limit = linear(100), max lines = 10, line width = 38
@@ -1814,7 +1814,7 @@ limit = linear(100), max lines = 10, line width = 78
limit = linear(100), max lines = 10, line width = 78
|----------------------------------------------------------------------------|
-{1, 2.0, "three", '4', {5}}
+{1, 2.0, "three", '4', {5}, "«ąąąąą»"}
|----------------------------------------------------------------------------|
limit = linear(100), max lines = 10, line width = 78
diff --git a/tests/hard_coded/test_pretty_printer.m b/tests/hard_coded/test_pretty_printer.m
index bd08378..2867ae3 100644
--- a/tests/hard_coded/test_pretty_printer.m
+++ b/tests/hard_coded/test_pretty_printer.m
@@ -214,7 +214,7 @@ test_case(test_case(LineWidth, MaxLines, Limit, Doc)) :-
List, map.init : map(int, float)),
OpTree = mk_op_tree(200),
Church = list.foldl(func(_, X) = succ(X), 1..10, zero),
- Tuple = {1, 2.0, "three", '4', {5}},
+ Tuple = {1, 2.0, "three", '4', {5}, "«ąąąąą»"},
Square = list.duplicate(10, 1..10) : list(list(int)),
IndentTest = docs([
str("indentation test:"),
--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to: mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions: mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------
More information about the reviews
mailing list