[m-rev.] for review: structured terms in error messages

Fri Oct 14 17:48:45 AEDT 2022

On Fri, 14 Oct 2022, Zoltan Somogyi wrote:

> Print structured terms on one line if they fit.
> 
> The compiler's diagnostic outputs often contain terms, representing
> either terms in the program, or other entities such as types or modes.
> These can be printed either as
>
>     f(a, b)
> 
> or as
>
>     f(
>         a,
>         b
>     )

...

> diff --git a/compiler/error_spec.m b/compiler/error_spec.m
> index aabb8612a..5bbf18db3 100644
> --- a/compiler/error_spec.m
> +++ b/compiler/error_spec.m
> @@ -424,6 +424,55 @@
>      ;       pragma_decl(string)
>              % As above, but prefix the string with ":- pragma ".
> 
> +    ;       left_paren_maybe_nl_inc(string, lp_piece_kind)
> +    ;       maybe_nl_dec_right_paren(string, rp_piece_kind)
> +            % These two pieces are intended to help implement messages
> +            % that should be formatted to look like either
> +            %
> +            %   aaa(bbb, ccc)
> +            %
> +            % if there is space for this on the current line, or to look like

Delete "for this".

> +            %
> +            %   aaa(
> +            %       bbb,
> +            %       ccc
> +            %   )
> +            %
> +            % if there isn't.
> +            %
> +            % The piece sequence that would yield the above would be
> +            %
> +            %   fixed("aaa")
> +            %   left_paren_maybe_nl_inc("(", lp_suffix)
> +            %   fixed("bbb"),
> +            %   suffix(","),
> +            %   nl,
> +            %   fixed("ccc"),
> +            %   maybe_nl_dec_right_paren(")", rp_plain)
> +            %

...

> diff --git a/compiler/write_error_spec.m b/compiler/write_error_spec.m
> index 79de042dc..1e55ff878 100644
> --- a/compiler/write_error_spec.m
> +++ b/compiler/write_error_spec.m
> @@ -525,7 +525,17 @@ do_write_error_pieces(Stream, Globals, MaybeContext, TreatAsFirst, FixedIndent,
>              ),
>              FirstIndent = (if TreatAsFirst = treat_as_first then 0 else 1),
>              divide_paragraphs_into_lines(MaybeAvailLen, TreatAsFirst,
> -                FirstIndent, Paragraphs, Lines),
> +                FirstIndent, Paragraphs, Lines0),
> +            try_to_join_lp_to_rp_lines(Lines0, Lines),
> +            trace [compile_time(flag("debug_try_join_lp_to_rp")), io(!TIO)] (
> +                io.stderr_stream(StdErr, !TIO),
> +                io.write_string(StdErr, "START\n", !TIO),
> +                list.foldl(io.write_line(StdErr), Paragraphs, !TIO),
> +                list.foldl(io.write_line(StdErr), Lines0, !TIO),
> +                io.write_string(StdErr, "JOINED\n", !TIO),
> +                list.foldl(io.write_line(StdErr), Lines, !TIO),
> +                io.write_string(StdErr, "END\n", !TIO)
> +            ),
>              write_msg_lines(Stream, PrefixStr, Lines, !IO)
>          )
>      ).
> @@ -568,15 +578,13 @@ write_msg_lines(Stream, PrefixStr, [Line | Lines], !IO) :-
>      io::di, io::uo) is det.
>
>  write_msg_line(Stream, PrefixStr, Line, !IO) :-
> -    Line = error_line(_MaybeAvail, LineIndent, LineWords, _LineWordsLen),
> -    (
> -        LineWords = [],
> +    Line = error_line(_MaybeAvail, LineIndent, LineWordsStr, _LineWordsLen,
> +        _LineParen),
> +    ( if LineWordsStr = "" then
>          % Don't bother to print out out indents that are followed by nothing.

Double "out".

...

> @@ -946,14 +1005,36 @@ find_word_end(String, Cur, WordEnd) :-
>                  % to get the number of spaces this turns into.
>                  line_indent_level   :: int,
> 
> -                % The words on the line.
> -                line_words          :: list(string),
> +                % The words on the line as a single string, with one space
> +                % between each pair of words.
> +                line_words_str      :: string,
> 
> -                % Total number of characters in the words, including
> -                % the spaces between words.
> +                % The length of the line_words_str field.
>                  %
>                  % This field is meaningful only if maybe_avail_len is yes(...).
> -                line_words_len      :: int
> +                line_words_len      :: int,
> +
> +                % If this field is paren_none, this a normal line.
> +                % If this field is paren_lp_end, this line ends a left
> +                % parenthesis.
> +                % If this field is paren_end_rp, the *next* line *starts*
> +                % with a right parenthesis.
> +                %
> +                % We use these fields to try to put everything
> +                %
> +                % - in a paren_lp_end line,
> +                % - in zero or more paren_none lines,
> +                % - in a paren_end_rp line, and
> +                % - the very next line (which starts with the rp)
> +                %
> +                % into a single line, if there is room. (Note that the code
> +                % that creates a paren_end_rp line will also ensure that
> +                % there *will be* a nexy line.)

s/nexy/next/

> +                %
> +                % It is ok for some of the lines to be squashed together
> +                % to result from earlier squash operations, on inner
> +                % parentheses.
> +                line_paren          :: paren_status
>              ).
>
>      % Groups the words in the given paragraphs into lines. The first line

...

> @@ -1062,6 +1159,198 @@ get_later_words(Avail, [Word | Words], CurLen, FinalLen,
>          RestWords = [Word | Words]s

...

> +:- pred find_matching_rp_and_maybe_join(error_line::in,
> +    list(error_line)::in, list(error_line)::out, list(error_line)::out) is det.
> +
> +find_matching_rp_and_maybe_join(LPLine, TailLines0, ReplacementLines,
> +        LeftOverLines) :-
> +    LPLine = error_line(MaybeAvailLen, LPIndent, LPLineWordsStr,
> +        LPLineWordsLen, LPParen),
> +    expect(unify(LPParen, paren_lp_end), $pred, "LPParen != paren_lp_end"),
> +    ( if
> +        find_matching_rp(TailLines0, cord.init, MidLinesCord, 0, MidLinesLen,
> +            RPLine, LeftOverLinesPrime)
> +    then
> +        RPLine = error_line(_, _RPIndent, RPLineWordsStr, RPLineWordsLen,
> +            RPParen),
> +        MidLines = cord.list(MidLinesCord),
> +        list.length(MidLines, NumMidLines),
> +        MidLineSpaces = (if NumMidLines = 0 then 0 else NumMidLines - 1),
> +        TotalLpRpLen =
> +            LPLineWordsLen + MidLinesLen + MidLineSpaces + RPLineWordsLen,
> +        ChunkLines = [LPLine | MidLines] ++ [RPLine],
> +        ( if
> +            (
> +                MaybeAvailLen = no
> +            ;
> +                MaybeAvailLen = yes(AvailLen),
> +                LPIndent * indent_increment + TotalLpRpLen =< AvailLen
> +            )
> +        then
> +            % We insert spaces
> +            % - between the middle lines, but
> +            % - not after the ( in LPLine,
> +            % - nor before the ) in RPLine.
> +            MidLineStrs = list.map((func(L) = L ^ line_words_str), MidLines),
> +            MidSpaceLinesStr = string.join_list(" ", MidLineStrs),
> +            ReplacementLineStr =
> +                LPLineWordsStr ++ MidSpaceLinesStr ++ RPLineWordsStr,
> +            string.count_codepoints(ReplacementLineStr, ReplacementLineStrLen),
> +            expect(unify(TotalLpRpLen, ReplacementLineStrLen), $pred,
> +                "TotalLpRpLen != ReplacementLineStrLen"),
> +            ReplacementLine = error_line(MaybeAvailLen, LPIndent,
> +                ReplacementLineStr, TotalLpRpLen, RPParen),
> +            ReplacementLines = [ReplacementLine]
> +        else
> +            ReplacementLines = ChunkLines
> +        ),
> +        LeftOverLines = LeftOverLinesPrime
> +    else
> +        % We can't find the rest of the patterm so we can't optimize anything.

s/patterm/pattern.

...

> +:- pred find_matching_rp(list(error_line)::in,
> +    cord(error_line)::in, cord(error_line)::out,
> +    int::in, int::out, error_line::out, list(error_line)::out) is semidet.
> +
> +find_matching_rp([], !MidLinesCord, !MidLinesLen, _, _) :-
> +    fail.
> +find_matching_rp([HeadLine0 | TailLines0], !MidLinesCord, !MidLinesLen,
> +        RPLine, LeftOverLines) :-
> +    HeadLine0 = error_line(_HeadMaybeAvailLen, _HeadIndent, _HeadLineWordsStr,
> +        HeadLineWordsLen, HeadParen),
> +    (
> +        HeadParen = paren_none,
> +        cord.snoc(HeadLine0, !MidLinesCord),
> +        !:MidLinesLen = !.MidLinesLen + HeadLineWordsLen,
> +        find_matching_rp(TailLines0, !MidLinesCord, !MidLinesLen, RPLine,
> +            LeftOverLines)
> +    ;
> +        HeadParen = paren_end_rp,
> +        % The right parenthesis is at the start of the *next* line.
> +        (
> +            TailLines0 = [],
> +            % There is no right paren; the original left paren is unbalanced.
> +            fail
> +        ;
> +            TailLines0 = [RPLine | TailTailLines0],
> +            cord.snoc(HeadLine0, !MidLinesCord),
> +            !:MidLinesLen = !.MidLinesLen + HeadLineWordsLen,
> +            LeftOverLines = TailTailLines0
> +        )
> +    ;
> +        HeadParen = paren_lp_end,
> +        find_matching_rp_and_maybe_join(HeadLine0, TailLines0,
> +            ReplacementLines, AfterRpLines),
> +        (
> +            ReplacementLines = [],
> +            % Getting here means that the text has _HeadLineWordsStr
> +            % has disappeared, which is a bug.
> +            unexpected($pred, "ReplacementLines = []")
> +        ;
> +            ReplacementLines = [HeadReplacementLine | TailReplacementLines],
> +            (
> +                TailReplacementLines = [],
> +                % We replaced the inner pattern with a single line.
> +                % Try to fit this single line into the larger pattern.
> +                find_matching_rp([HeadReplacementLine | AfterRpLines],
> +                    !MidLinesCord, !MidLinesLen, RPLine, LeftOverLines)
> +            ;
> +                TailReplacementLines = [_ | _],
> +                % We could't optimize the pattern starting at HeadLine0,

s/could't/couldn't/

> +                % which is nested inside the larger pattern our ancestor
> +                % find_matching_rp_and_maybe_join call was trying to
> +                % optimize. But if even just the lines in this inner pattern
> +                % are too long to fit on our caller's available space,
> +                % then the large pattern that includes those lines *and *more*
> +                % will also be too large to fit into that same space.
> +                %
> +                % NOTE: This assumes that the inner pattern is at least as
> +                % indented as our caller's paren_lp_end line. As far as I (zs)
> +                % know, this is true for all our error messages.
> +                fail
> +            )
> +        )
> +    ).

That looks fine otherwise.

Julien.