[mercury-users] Stack overflow, because I don't use unique modes?

Ondrej Bojar oboj7042 at ss1000.ms.mff.cuni.cz
Mon Feb 11 04:34:15 AEDT 2002


Hi.

I want Mercury to walk over very big files and get some statistics about
the files. I walk line by line, perform (not too simple, but
from the outside deterministic) calculation and accumulate the
statistics. The problem is that the stack overflows after parsing more
than about 50000 lines of input (and succeeds if the input is shorter).

For simplicity, I've split the job into a feeder and the calculation
itself. So I write only:

main -->
  ...
  feeder__fold_lines(nodots, parse_line, stat__blank, OutputStatistics)
  ...

where

:- pred feeder__fold_lines(progress_dots, pred(string, Aku, Aku), Aku,
                Aku, io, io).
:- mode feeder__fold_lines(in, pred(in, in, out) is det, in, out, di, uo)
                is det.

(type feeder__progress_dots ---> dots(dot_every_N_lines_parsed) ; nodots.
is just for debugging)

:- pred parse_line(string::in, stat::in, stat::out) is det.

:- type stat == bag(list(string)).

The predicate parse_line parses the input line, creates lots of temporary
structures, but the only outside effect is storing another stats item in
the bag. Although there is the huge number of input lines, there is only a
very limited set of stats items, so this is not the reason to fill up the
memory.

I'm afraid the thing is I do not use di and uo modes for the stats
accumulator. Unfortunately, I do not understand unique modes well enough,
and when I try to replace 'in' and 'out' with 'di' and 'uo', I get strange
errors I cannot solve, such as:

For :- mode parse_line(in, di, uo) instead of (in, in, out):
countvjv.m:047: In clause for `parse_line(in, di, uo)':
countvjv.m:047:   mode error: argument 3 did not get sufficiently instantiated.
countvjv.m:047:   Final instantiatedness of `HeadVar__3' was `ground',
countvjv.m:047:   expected final instantiatedness was `unique'.

Thanks for help,

  Andrew.

P.S.: feeder, stat and the main module, counvjv, attached. Other used
modules are not attached, there are too many of them.
-------------- next part --------------
:- module feeder.
% Obsahuje rutiny pro cteni stringu ze stdin.
% pouzij proste tento main:
% main -->
%   feeder(line, MyPred).
%
% kde MyPred je pred(string::in) is nondet.
% Taky muzes volat
%   feeder(block, MyPred).
% Pak je predikat zavolan pro kazdou skupinku radek az k prazdne.
% Feeder se ukonci sam zadanim prazdne radky.

:- interface.

:- import_module io.
:- import_module string.

:- type feeder_input ---> line; block.

:- pred feeder(feeder_input, pred(string, string),io__state, io__state).
:- mode feeder(in, pred(in, out) is nondet, di, uo) is det.
:- mode feeder(in, pred(in, out) is multi, di, uo) is det.
:- mode feeder(in, pred(in, out) is semidet, di, uo) is det.
:- mode feeder(in, pred(in, out) is det, di, uo) is det.

:- import_module int.

:- type progress_dots ---> dots(int, int); nodots.

:- pred fold_lines(progress_dots, pred(string, Aku, Aku), Aku, Aku, io, io).
:- mode fold_lines(in, pred(in, in, out) is det, in, out, di, uo) is det.
:- mode fold_lines(in, pred(in, di, uo) is det, di, uo, di, uo) is det.

:- pred fold_lines(input_stream, progress_dots, pred(string, Aku, Aku), Aku, Aku, io, io).
:- mode fold_lines(in, in, pred(in, in, out) is det, in, out, di, uo) is det.
:- mode fold_lines(in, in, pred(in, di, uo) is det, di, uo, di, uo) is det.


:- implementation.

:- import_module bool.
:- import_module char.
:- import_module list.
:- import_module std_util.
:- import_module debugstr.

feeder(line, Pred) -->
  radka(Radka, Ok),
  (
  if { Ok = no ; Radka = ['\n'] }
  then
    io__write_string("Feeder done.\n")
  else
    { string__from_char_list(Radka, Str) },
    { solutions((pred(Out::out) is nondet :- call(Pred, Str, Out)), Sols) },
    io__write_list(Sols, "\n----------\n", io__write_string),
    io__nl,
    feeder(line, Pred)
  )
  .

feeder(block, Pred) -->
  radky(TRadky),
  (
  if { TRadky = [] }
  then
    io__write_string("Feeder done.\n")
  else
    { string__from_char_list(TRadky, Str) },
    { solutions((pred(Out::out) is nondet :- call(Pred, Str, Out)), Sols) },
    io__write_list(Sols, "\n----------\n", io__write_string),
    io__nl,
    feeder(block, Pred)
  )
  .


:- pred radky(list(char)::out, io__state::di, io__state::uo) is det.

radky(Radky) -->
  radka(Radka, Ok),
  (
  if { Ok = yes }
  then
    (
    if { Radka = ['\n'] }
    then
      {Radky = []}
    else
      radky(DalsiRadky),
      { Radky = Radka ++ DalsiRadky }
    )
  else
    {Radky = []}
  )
  .

:- pred radka(list(char)::out, bool::out, io__state::di, io__state::uo) is det.

radka(Line, Ok) -->
  io__write_string("f> "),
  io__flush_output,
  io__input_stream(Stdin),
  io__read_line(Stdin, Result),
  ( { Result = ok(Line1) } ->
      { Ok = yes, Line = Line1 }
  ;
      { Ok = no, Line = [] }
  )
  .


fold_lines(ProgressDots, Pred, InAku, OutAku) -->
  io__input_stream(Stdin),
  fold_lines(Stdin, ProgressDots, Pred, InAku, OutAku).

fold_lines(InStream, ProgressDots, Pred, InAku, OutAku) -->
  fold_lines2(InStream, ProgressDots, Pred, InAku, OutAku, 0).

:- pred fold_lines2(input_stream, progress_dots, pred(string, Aku, Aku), Aku, Aku, int, io, io).
:- mode fold_lines2(in, in, pred(in, in, out) is det, in, out, in, di, uo) is det.
:- mode fold_lines2(in, in, pred(in, di, uo) is det, di, uo, in, di, uo) is det.

fold_lines2(InStream, ProgressDots, Pred, InAku, OutAku, Cnt) -->
  io__read_line(InStream, Result),
  {
  if ProgressDots = dots(DotEvery, NumberEvery)
  then
    (if Cnt rem DotEvery = 0 then debugstr(".") else true),
    (if Cnt rem NumberEvery = 0 then debugstr("("++string__int_to_string(Cnt)++")") else true)
  else
    true
  },
  (
  if { Result = ok(TLine) }
  then
    { string__from_char_list(TLine, LineStr) },
    { Pred(LineStr, InAku, TAku) },
    fold_lines2(InStream, ProgressDots, Pred, TAku, OutAku, Cnt+1)
  else
    { OutAku = InAku }
  ).
-------------- next part --------------
:- module stat.

:- interface.

:- import_module string, list, int.

:- type stat.
:- type statkey == list(string).

:- func init = (stat::out) is det.
:- func blank = (stat::out) is det.

:- pred count(statkey, stat, stat).
:- mode count(in,      in,   out) is det.

:- pred count(int, statkey, stat, stat).
:- mode count(in,  in,      in,   out) is det.

:- import_module pprint.

:- func to_doc(stat::in) = (doc::out) is det.
:- func to_string(stat::in) = (string::out) is det.

:- implementation.

:- import_module bag.
:- import_module std_util.
:- import_module stringutils.

:- type stat == bag(list(string)).

init = bag__init.
blank = stat__init.

count(Elem, InStat, OutStat) :-
  bag__insert(InStat, Elem, OutStat).
count(Count, Elem, InStat, OutStat) :-
  bag__insert_list(InStat, list__duplicate(Count, Elem), OutStat).

to_string(Stat) = Str :-
  Str = pprint__to_string(150, stat__to_doc(Stat)).

to_doc(Stat) = 
  text("% Base Counts:\n")
  `<>` plain_to_doc(Stat)
  `</>`
  text("% Subtotals:\n")
  `<>` subtotals_to_doc(Stat).

:- func plain_to_doc(stat::in) = (doc::out) is det.
:- func subtotals_to_doc(stat::in) = (doc::out) is det.

plain_to_doc(Stat) = Out :-
  Counts = bag__to_assoc_list(Stat),
  Out = 
    % text("% Statistiky") `</>`
    separated(
      (func((Name-Count)::in) = (Doc::out) is det :-
        Doc = to_doc(Count) `<>` text("\t")
          `<>` text(join("-", Name))
      ), line, Counts) 
    `<>` line.

subtotals_to_doc(Stat) = Out :-
  Counts = bag__to_assoc_list(Stat),
  list__foldl(
    (pred((Name - Count)::in, InBag::in, OutBag::out) is det :-
      aggregate(substarts(Name), count(Count), InBag, OutBag)
    ), Counts, Stat, Subtotals),
  Out = plain_to_doc(Subtotals).

:- pred substarts(list(T)::in, list(T)::out) is multi.

substarts([], []).
substarts([LastElem], []).
substarts([Elem1, Elem2|Rest], Out) :-
  Out = []
  ;
  substarts([Elem2|Rest], TOut),
  Out = [Elem1|TOut].

-------------- next part --------------
:- module countvjv.
% pocita velmi jednoduche vety

:- interface.

:- import_module io.

:- pred main(io__state::di, io__state::uo) is det.

:- implementation.

:- import_module soup.
:- import_module sentence.

:- import_module fs.
:- import_module fsre.
:- import_module czre.
:- import_module feeder.
:- import_module fstype.
:- import_module morfcat.
:- import_module dumbsgml.
:- import_module csts2uttr.
:- import_module debugstr.
:- import_module stringutils.
:- import_module sentence.

:- import_module string.
:- import_module int.
:- import_module std_util.
:- import_module list.
:- import_module char.

:- import_module exception.
:- import_module require.

:- import_module stat.


main -->
  fold_lines(parse_line, stat__blank, OutMem),
  { debugstr("Vysledna pamet:", OutMem) },
  { debugstr("Stats: ", stat__to_string(OutMem)) }.
  
  
:- pred parse_line(string::in, stat::in, stat::out) is det.

parse_line(Line0, InMem, OutMem) :-
  Line = chomp(Line0),
  (
  if parse(Line, Content)
  then
    (
    MaybeSent = promise_only_solution(
      pred(MyMaybeSent::out) is cc_multi:-
        try(soup2sentence(list__reverse(Content)), MyMaybeSent)
      ),
     ( if (  MaybeSent = exception(Error)
       ) then ( 
      ( if univ_to_type(Error, ErrorData), ErrorData = software_error(TErrorStr) then ErrorStr = TErrorStr else ErrorStr = dump(Error) ),
      count(["Input", "failed", "got sgml, excetion while reading fss", ErrorStr], InMem, OutMem)
     ) else ( if (  MaybeSent = succeeded(Sentence)
       ) then ( 
      count(["Input", "got smgl, got sentence"], InMem, TMem),
      do_sentence(Sentence, TMem, OutMem)
     ) else (  % MaybeSent = failed,
      count(["Input", "failed", "got sgml, failed to read fss"], InMem, OutMem)
     )  )  ) 
    )
  else
    count(["Input", "failed", "failed to read sgml"], InMem, OutMem)
  )
  .

:- pred do_sentence(sentence::in, stat::in, stat::out) is det.

do_sentence(s(ID, Words), InStat, OutStat) :-
  if velmi_jednoducha_veta(Words)
  then
    count(["Stats", "velmi jednoducha veta"], InStat, OutStat)
  else
    count(["Stats", "non velmi jednoducha veta"], InStat, OutStat)
  .

/*
  (
  if
    fsre__match(cat([dotstar, fs(clausedelimfs), plus(dot)]), Words)
  then
    MoreClauses = "M "
  else
    MoreClauses = "  "
  ),
    % dump("More clauses possible\t", dump(ID)++fss_dump(Words))
  % else
    solutions(
      (pred(Out::out) is nondet :-
        fsre__match(
          cat([dotstar, group("verb", fs(fs([cat-da(verb)]))), dotstar]),
          Words, Groups),
          % cat([dotstar, group("verb", fs(finverbfs)), dotstar]),
          % Words, Groups),
        extract_group(Groups, "verb", Words, VerbFSs),
        Out = fss_dump(VerbFSs)
      ), Verbs),
    dump(MoreClauses++string__int_to_string(length(Verbs))++" verbs\t", join(" ", Verbs)++"\t"++fss_dump(Words))
  .

:- pred testfsre(sentence::in, string::out) is nondet.


testfsre(S, "") :-
  member(FSRE, fsres),
  % debugstr("Trying FSRE: ", fsre__dump(FSRE)),
  % debugstr("On Sentence: ", fss_dump(S^words)),

  (
  % if
  fsre__unify(FSRE, Grps, S^words, _MatchedWords),
  % then
    % debugstr("Trying FSRE: ", fsre__dump(FSRE)),
    % debugnl,
    % debugstr("On Sentence: ", fss_dump(S^words)),
    % debugstr("Matched groups:\n", dump_groups(Grps, S^words)),
    dump(nicedumpgroups(Grps, S^words), "\n")
    % debugstr("Matched words:  ", fss_dump(MatchedWords))
  % else
    % debugstr("No match on sentence: ", fss_dump(S^words)),
    % true
  ).


:- func fsres = (list(fsre(string))::out) is det.

fsres = [
  % cat([dotstar, group("mid", cat([dot, dot])), dotstar])

  % dotstar,

  % cat([dotstar, fs(fs([lemma-da(string(":"))])), group("za dvojteèkou",dotstar)])

  % cat([dotstar, fs(fs([lemma-da(string("("))])), group("v závorce", dotstar), fs(fs([lemma-da(string(")"))])), dotstar]),

  % cat([dotstar, group("jmenná skupina", cat([star(any([cat([questionmark(adv) , group("adj",adj)]), pron])), group("hlava", noun)])), dotstar]),

  cat([dotstar, cat([
    clausedelim,
      group("innerklauze", cat([star(neitherfs([finverbfs, clausedelimfs])), group("sloveso",fs(finverbfs)), star(neitherfs([finverbfs, clausedelimfs]))])),
    clausedelim 
  ]), dotstar])

  % group("bez : -", star(neitherfs([fs([lemma-da(string(":"))]), fs([lemma-da(string("-"))])])))

].

:- func adv = (fsre(GLT)::out) is det.
:- func adj = (fsre(GLT)::out) is det.
:- func pron = (fsre(GLT)::out) is det.
:- func noun = (fsre(GLT)::out) is det.
:- func finverb(string::in) = (fsre(GLT)::out) is det.

adv = fs(fs([cat-da(adv)])).
adj = fs(fs([cat-da(adj)])).
pron = fs(fs([cat-da(pron)])).
noun = fs(fs([cat-da(noun)])).
finverb(Lemma) = fs(sort(fs([cat-da(verb),
       morfcat-or([da(morfcat(verb(presfut))), da(morfcat(verb(imper))), da(morfcat(verb(presfut_with_t)))]),
       lemma-da(string(Lemma))
   ]))).

:- func finverbfs = (fs::out) is det.
:- func clausedelimfs = (fs::out) is det.

finverbfs = sort(
  fs([
    cat-da(verb),
    morfcat-or([
      da(morfcat(verb(presfut))), da(morfcat(verb(imper))), da(morfcat(verb(presfut_with_t))),
      da(morfcat(verb(particactive))), da(morfcat(verb(particactive_with_t))), da(morfcat(verb(particpassive))),
      da(morfcat(verb(transgrpres)))
    ])
  ])
  ).

clausedelimfs = sort(
    fs([
      lemma-or([
             da(string("a")),
             da(string("i")),
             da(string("ani")),
             da(string("nebo")),
             da(string("(")),
             da(string(")")),
             da(string(".")),
             da(string(",")),
             da(string("?")),
             da(string("!")),
             da(string(":")),
             da(string(";"))
           ])
    ])
  ).

:- func clausedelim = (fsre(GLT)::out) is det.

clausedelim = any([fs(clausedelimfs), beg, end]).


*/


More information about the users mailing list