[m-rev.] for review: separate Mercury-specific code into evaluate.conf and improve networking code

Samrith UONG samuong at gmail.com
Fri Feb 10 17:00:49 AEDT 2006


For review by Ralph or Julien.

I'm almost ready to commit this to CVS (does it belong in a root
module or under benchmarks or extras?).  Here are the changes I made
since the last review.

Estimated hours taken: 10.

*:
	Made all the changes suggested in the previous reviews.

evaluate, evaluate.conf:
	All the calls to mmc are now in evaluate.conf, meaning that
	the program should work with any set of benchmark programs
	and any programming language, as long as everything is set
	properly up in evaluate.conf.

gator (was mcflags), evaluate:
	Fixed a limitation in the way it used to distribute jobs to
	the other hosts.  If one of the hosts was busy (e.g., if it
	was running its nightly build), it would hold up the process,
	leaving all the other hosts sitting idle.  This was wasting
	hours of CPU time every day.

$ cvs diff -uD 'Feb 3, 2006 1:44 PM'
? Mercury
? evolve
? evolve.err
? evolve.mh
? gator
? gator.conf
? gator.out.1.bz2
? generations
? genotype.err
? genotype.mh
? phenotype.err
? phenotype.mh
? tausworthe3.err
? tausworthe3.mh
Index: evaluate
===================================================================
RCS file: /home/sam/src/master/mcflags/evaluate,v
retrieving revision 1.10
retrieving revision 1.12
diff -u -r1.10 -r1.12
--- evaluate	3 Feb 2006 01:33:20 -0000	1.10
+++ evaluate	4 Feb 2006 13:26:50 -0000	1.12
@@ -10,115 +10,96 @@
 # See evaluate.conf for the list of benchmark programs, and phenotype.m
 # for details on the phenotype data structure.
 #
-
-set -x
+# If for some reason, an executable is not produced by the compiler, we
+# print a large number for the executable size and run time, so that the
+# genes are unlikely to be passed on to the next generation (and cause
+# further compilation errors).
+#

 prog=`basename "$0"`
-usage="usage: $prog [ -a first ] -z last"
+usage="usage: $prog -b path_to_benchmarks -p path [-v] -w path_to_workspace"

-while getopts a:b:p:w:z: f
+while getopts b:p:vw: f
 do
 	case $f in
-	a)	first="$OPTARG";;
 	b)	benchmarks="$OPTARG";;
 	p)	PATH="$OPTARG":$PATH;;
 	w)	workspace="$OPTARG";;
-	z)	last="$OPTARG";;
+	v)	set -x;;
 	\?)	echo "$usage" >&2; exit 1;;
 	esac
 done
 shift `expr $OPTIND - 1`

-first=${first:-1}
-# XXX: check if $last is set
-
+read flags || exit 1
 [ -r "$workspace"/evaluate.conf ] && . "$workspace"/evaluate.conf

-genotype=$first
-while [ $genotype -le $last ]
+echo 'phenotype(['
+
+#
+# Print the list of compile times.
+#
+i=1
+while [ $i -le $num_progs ]
 do
-	read flags || exit 1
+	eval "prog=\${prog$i}"
+	eval "clean=\${clean$i}"
+	eval "compile=\${compile$i}"
+	
+	cd `dirname "$prog"`
+	eval "$clean"
+	"$workspace"/dotime eval "$compile" | tail -n 1 | awk '{ print $1 }' |
+	    sed 's/u$//'
+	
+	[ $i -lt $num_progs ] && echo ','
+	i=`expr $i + 1`
+done
+	
+echo '], ['
+	
+#
+# Print the list of executable sizes.
+#
+i=1
+while [ $i -le $num_progs ]
+do
+	eval "prog=\${prog$i}"

-	[ $genotype -gt $first ] && echo ','
-	echo 'phenotype(['
+	if [ -x "$prog" ]
+	then
+		ls -l "$prog" | awk '{ print $5 }'
+	else
+		echo '99999999999999999999999999999999999999999999999999999999'
+	fi
+	
+	[ $i -lt $num_progs ] && echo ','
+	i=`expr $i + 1`
+done
+	
+echo '], ['

-	#
-	# Print the list of compile times.  If for some reason, an
-	# executable was not produced, print a large number, so that the
-	# genes do not get passed on to the next generation (and cause
-	# further compilation errors).
-	#
-	i=1
-	while [ $i -le $num_dirs ]
-	do
-		eval "dir=\${dir$i}"
-		eval "prog=\${prog$i}"
-	
-		cd "$dir"
-		mmc --make "$prog".realclean
-		rm -rf Mercury
-		compile_time=`"$workspace"/dotime mmc --make -O0 $flags \
-		    "$prog" | tail -n 1 | awk '{ print $1 }' | sed 's/u$//'`
-		[ -x "$prog" ] || compile_time='9999999999999999999999999999.9'
-		echo "$compile_time"
-	
-		[ $i -lt $num_dirs ] && echo ','
-		i=`expr $i + 1`
-	done
-	
-	echo '], ['
-	
-	#
-	# Print the list of executable sizes.
-	#
-	i=1
-	while [ $i -le $num_dirs ]
-	do
-		eval "dir=\${dir$i}"
-		eval "prog=\${prog$i}"
-
-		cd "$dir"
-		if [ -x "$prog" ]
-		then
-			ls -l "$prog" | awk '{ print $5 }'
-		else
-			echo '999999999999999999999999999999999999999999999999'
-		fi
-	
-		[ $i -lt $num_dirs ] && echo ','
-		i=`expr $i + 1`
-	done
-	
-	echo '], ['
-	
-	#
-	# Print the list of run times.
-	#
-	i=1
-	while [ $i -le $num_dirs ]
-	do
-		eval "dir=\${dir$i}"
-		eval "prog=\${prog$i}"
-		eval "args=\${args$i}"
-		eval "input=\${input$i}"
-	
-		cd "$dir"
-		if [ -x "$prog" ]
-		then
-			"$workspace"/dotime ./"$prog" $args \
-			    <"${input:-/dev/stdin}" | tail -n 1 |
-			    awk '{ print $1 }' | sed 's/u$//'
-		else
-			echo '9999999999999999999999999999999999999999999999.9'
-		fi
-	
-		[ $i -lt $num_dirs ] && echo ','
-		i=`expr $i + 1`
-	done
+#
+# Print the list of run times.
+#
+i=1
+while [ $i -le $num_progs ]
+do
+	eval "prog=\${prog$i}"
+	eval "run=\${run$i}"

-	echo '])'
+	if [ -x "$prog" ]
+	then
+		cd `dirname "$prog"`
+		"$workspace"/dotime eval "$run" | tail -n 1 |
+		    awk '{ print $1 }' | sed 's/u$//'
+	else
+		echo '999999999999999999999999999999999999999999999999999999.9'
+	fi

-	genotype=`expr $genotype + 1`
+	[ $i -lt $num_progs ] && echo ','
+	i=`expr $i + 1`
 done

+echo '])'
+
 exit 0
Index: evaluate.conf
===================================================================
RCS file: /home/sam/src/master/mcflags/evaluate.conf,v
retrieving revision 1.3
diff -u -r1.3 evaluate.conf
--- evaluate.conf	3 Feb 2006 01:33:20 -0000	1.3
+++ evaluate.conf	10 Feb 2006 05:48:32 -0000
@@ -1,46 +1,48 @@
 #
-# num_dirs: the number of programs used for benchmarking.
+# num_progs: the number of programs used for benchmarking.
 #
-num_dirs=5
+num_progs=5

 #
 # For each program, the following variables need to be defined.
 #
-# dir$i: the path to the directory containing the source code for the
-#	program.  This may or may not be contained under "$benchmarks"
-#	(see mcflags.conf).
-#
-# prog$i: the name of the main executable for the program.
-#
-# args$i: any command-line arguments to be passed to the program.  By
-#	default this evaluates to "".
-#
-# input$i: the file to be used as the standard input stream.  By default
-#	this evaluates to "/dev/stdin".
-#
-dir1="$benchmarks"/progs/icfp2000
-prog1="main"
-input1="dice.cpp"
-
-dir2="$benchmarks"/progs/icfp2001
-prog2="smlngopt"
-args2="OUTPUT"
-input2="103-the-random-returns.txt"
-
-dir3="$benchmarks"/progs/nuc
-prog3="nuc5"
-args3=""
-input3="/dev/null"
-
-dir4="$benchmarks"/progs/ray
-prog4="proj"
-args4="-f 100 -S -s 0.4 2 -a 0.1 dh.scene 140 140 0 0 0 > dh.ppm"
-input4="/dev/null"
-
-dir5="$benchmarks"/progs/tree234
-prog5="treetest"
-args5=""
-input5="/dev/null"
+# prog$i: the full path to the executable for the program (which may not
+#	yet be built).  This may or may not be contained under
+#	"$benchmarks" (see gator.conf).
+#
+# clean$i: the command used to completely clean up the source directory.
+#
+# compile$i: the command used to compile the program.  Note that you may
+#	assume there is a $flags shell variable which gives the
+#	optimization flags passed to the compiler.
+#
+# run$i: the command used to run the program.
+#
+
+prog1="$benchmarks"/progs/icfp2000/main
+clean1="mmc --make main.realclean; rm -rf Mercury"
+compile1="mmc --make -O0 $flags main"
+run1="./main <dice.cpp"
+
+prog2="$benchmarks"/progs/icfp2001/smlngopt
+clean2="mmc --make smlngopt.realclean; rm -rf Mercury"
+compile2="mmc --make -O0 $flags smlngopt"
+run2="./smlngopt OUTPUT <103-the-random-returns.txt"
+
+prog3="$benchmarks"/progs/nuc/nuc5
+clean3="mmc --make nuc5.realclean; rm -rf Mercury"
+compile3="mmc --make -O0 $flags nuc5"
+run3="./nuc5"
+
+prog4="$benchmarks"/progs/ray/proj
+clean4="mmc --make proj.realclean; rm -rf Mercury"
+compile4="mmc --make -O0 $flags proj"
+run4="./proj -f 100 -S -s 0.4 2 -a 0.1 dh.scene 140 140 0 0 0 >dh.ppm"
+
+prog5="$benchmarks"/progs/tree234/treetest
+clean5="mmc --make treetest.realclean; rm -rf Mercury"
+compile5="mmc --make -O0 $flags treetest"
+run5="./treetest"

 #
 # Some of the benchmarks can overflow the detstack.  Set it high enough
Index: evolve.m
===================================================================
RCS file: /home/sam/src/master/mcflags/evolve.m,v
retrieving revision 1.5
diff -u -r1.5 evolve.m
--- evolve.m	3 Feb 2006 01:33:20 -0000	1.5
+++ evolve.m	10 Feb 2006 05:48:32 -0000
@@ -17,16 +17,16 @@
 % list of genotypes, where each genotype is a set of strings representing
 % optimisation flags.  The second is a list of phenotypes, where each
 % phenotype is a list of benchmarks.  These are read from the files
-% MCFLAGS/$n/genotypes and MCFLAGS/$n/phenotypes, respectively.
+% generations/$n/genotypes and generations/$n/phenotypes, respectively.
 %
 % The program will then determine the next set of genotypes, which it
-% will write in the file MCFLAGS/$n+1/genotypes.  It will also create
-% the file MCFLAGS/$n+1/flags, which contains the flags passed to mmc
-% to compile a benchmark program.
+% will write in the file generations/$n+1/genotypes.  It will also create
+% the file generations/$n/ladder, which contains a table with all of the
+% genotypes and their fitness values.
 %
 % Note that this program does not perform the actual benchmarking tests,
 % nor does it control the evolution over multiple generations.  These
-% tasks are handled by the mcflags shell script.
+% tasks are handled by the evaluate and gator scripts, respectively.
 %
 %-----------------------------------------------------------------------------%

@@ -35,8 +35,7 @@

 :- import_module io.

-:- pred main(io, io).
-:- mode main(di, uo) is det.
+:- pred main(io::di, io::uo) is det.

 %-----------------------------------------------------------------------------%
 %-----------------------------------------------------------------------------%
@@ -54,7 +53,6 @@
 :- import_module int.
 :- import_module list.
 :- import_module require.
-:- import_module set.
 :- import_module std_util.
 :- import_module string.

@@ -112,7 +110,7 @@
 			% elements if the two lists (Mothers and Fathers) are of
 			% unequal lengths.
 			%
-		list.map(phenotype.fitness(Weightings), Phenotypes, Fitness),
+		Fitness = list.map(phenotype.fitness(Weightings), Phenotypes),
 		list.map_foldl(phenotype.selection(Genotypes, Fitness),
 				Genotypes, Parents, !RNG),
 		list.det_split_list(length(Parents) / 2, Parents, Mothers, Fathers),
@@ -120,9 +118,7 @@
 				Sons, Daughters, !RNG),
 		list.append(Sons, Daughters, Children),
 		list.map_foldl(genotype.mutation(Flags), Children, NextGenotypes,
-                !RNG),
-
-		_ = !.RNG
+                !.RNG, _)
 	),

 		% Print the output files.
@@ -150,8 +146,7 @@
 	;		second_seed
 	;		third_seed.

-:- pred short_option(char, option).
-:- mode short_option(in, out) is semidet.
+:- pred short_option(char::in, option::out) is semidet.

 short_option('c', config_file).
 short_option('g', genotypes).
@@ -162,8 +157,7 @@
 short_option('t', second_seed).
 short_option('u', third_seed).

-:- pred long_option(string, option).
-:- mode long_option(in, out) is semidet.
+:- pred long_option(string::in, option::out) is semidet.

 long_option("config-file", config_file).
 long_option("genotypes", genotypes).
@@ -174,25 +168,24 @@
 long_option("second-seed", second_seed).
 long_option("third-seed", third_seed).

-:- pred option_default(option, option_data).
-:- mode option_default(out, out) is multi.
+:- pred option_default(option::out, option_data::out) is multi.

 option_default(config_file, string("evolve.conf")).
-option_default(genotypes, string("MCFLAGS/1/genotypes")).
-option_default(next_genotypes, string("MCFLAGS/2/genotypes")).
-option_default(ladder, string("MCFLAGS/1/ladder")).
-option_default(phenotypes, string("MCFLAGS/1/phenotypes")).
+option_default(genotypes, string("generations/1/genotypes")).
+option_default(next_genotypes, string("generations/2/genotypes")).
+option_default(ladder, string("generations/1/ladder")).
+option_default(phenotypes, string("generations/1/phenotypes")).
 option_default(first_seed, int(0)).
 option_default(second_seed, int(0)).
 option_default(third_seed, int(0)).

 %-----------------------------------------------------------------------------%
 %
-% Input/Ouput predicates.
+% Code for reading configuration files.
 %

-:- pred read_config_file(string, list(float), list(string), io, io).
-:- mode read_config_file(in, out, out, di, uo) is det.
+:- pred read_config_file(string::in, list(weighting)::out, list(flag)::out,
+        io::di, io::uo) is det.

 read_config_file(Path, Weightings, Flags, !IO) :-
     io.open_input(Path, OpenResult, !IO),
@@ -227,8 +220,8 @@
         require.error(ErrorMessage)
     ).

-:- pred print_ladder(string, list(fitness), list(genotype), io, io).
-:- mode print_ladder(in, in, in, di, uo) is det.
+:- pred print_ladder(string::in, list(fitness)::in, list(genotype)::in,
+        io::di, io::uo) is det.

 print_ladder(Path, Fitness, Genotypes, !IO) :-
     io.open_output(Path, OpenResult, !IO),
@@ -236,8 +229,8 @@
         OpenResult = ok(Stream),

         list.map(string.int_to_string, 1..list.length(Fitness), C1),
-        list.map(phenotype.fitness_to_string, Fitness, C2),
-        list.map(genotype.genotype_to_string, Genotypes, C3),
+        C2 = list.map(phenotype.fitness_to_string, Fitness),
+        C3 = list.map(genotype.genotype_to_string, Genotypes),

         Table = string.format_table([right(C1), right(C2), left(C3)], " * "),
         io.write_string(Stream, Table, !IO),
@@ -259,10 +252,10 @@
     % This predicate is the same as list.map_foldl, except that it takes
     % two input lists and two output lists.
     %
-:- pred map_2in_2out_foldl(pred(L, M, N, O, A, A),
-		list(L), list(M), list(N), list(O), A, A).
-:- mode map_2in_2out_foldl(pred(in, in, out, out, in, out) is det,
-		in, in, out, out, in, out) is det.
+:- pred map_2in_2out_foldl(
+		pred(L, M, N, O, A, A)::(pred(in, in, out, out, in, out) is det),
+		list(L)::in, list(M)::in, list(N)::out, list(O)::out, A::in, A::out)
+		is det.

 map_2in_2out_foldl(_, [],        [],        [],        [],        !A).
 map_2in_2out_foldl(_, [],        [_H | _T], [],        [],        !A).
Index: genotype.m
===================================================================
RCS file: /home/sam/src/master/mcflags/genotype.m,v
retrieving revision 1.6
retrieving revision 1.8
diff -u -r1.6 -r1.8
--- genotype.m	2 Feb 2006 02:44:00 -0000	1.6
+++ genotype.m	7 Feb 2006 04:48:59 -0000	1.8
@@ -21,6 +21,8 @@

 :- type genotype.

+:- type flag.
+
 	% read_genotypes(Path, Genotypes, !IO):
 	%
 	% Reads in a list of genotypes from the given file.  A genotype is
@@ -28,8 +30,7 @@
 	% separated by a newline character, and each flag within a genotype
 	% is separated by one or more spaces.
 	%
-:- pred read_genotypes(string, list(genotype), io, io).
-:- mode read_genotypes(in, out, di, uo) is det.
+:- pred read_genotypes(string::in, list(genotype)::out, io::di, io::uo) is det.

 	% crossover(Mother, Father, Son, Daughter, !RNG).
 	%
@@ -44,9 +45,8 @@
 	% used.  Many common crossover methods assume the genotype is a
 	% fixed-length bit-array.
 	%
-:- pred crossover(genotype, genotype, genotype, genotype, RNG, RNG)
-        <= random(RNG, Seed).
-:- mode crossover(in, in, out, out, in, out) is det.
+:- pred crossover(genotype::in, genotype::in, genotype::out,
+        genotype::out, RNG::in, RNG::out) is det <= random(RNG, Seed).

 	% mutation(Flags, Child, Mutant, !RNG).
 	%
@@ -55,27 +55,25 @@
 	% This predicate is implemented by choosing a compiler flag at
 	% random and toggling that flag in the child genotype.
 	%
-:- pred mutation(list(string), genotype, genotype, RNG, RNG)
-        <= random(RNG, Seed).
-:- mode mutation(in, in, out, in, out) is det.
+:- pred mutation(list(flag)::in, genotype::in, genotype::out, RNG::in,
+        RNG::out) is det <= random(RNG, Seed).

 	% print_genotypes(Path, Genotypes, !IO):
 	%
 	% Prints out a list of genotypes to the given file, in the same
 	% format as expected by read_genotypes/4.
 	%
-:- pred print_genotypes(string, list(genotype), io, io).
-:- mode print_genotypes(in, in, di, uo) is det.
+:- pred print_genotypes(string::in, list(genotype)::in, io::di, io::uo) is det.

-    % genotype_to_string(Genotype, String).
+    % genotype_to_string(Genotype) = String.
     %
     % Returns a string representation of the genotype.
     %
-:- pred genotype_to_string(genotype, string).
-:- mode genotype_to_string(in, out) is det.
+:- func genotype_to_string(genotype) = string is det.

 %-----------------------------------------------------------------------------%
 %-----------------------------------------------------------------------------%
+
 :- implementation.

 :- import_module char.
@@ -84,6 +82,7 @@
 :- import_module set.
 :- import_module std_util.
 :- import_module string.
+:- import_module svset.

 %-----------------------------------------------------------------------------%

@@ -133,12 +132,11 @@
 		require.error(ErrorMessage)
 	).

-:- pred many(pred(T, list(char), list(char)), list(T), list(char), list(char)).
-:- mode many(pred(out, in, out) is semidet, out, in, out) is semidet.
+:- pred many(pred(T, list(char), list(char))::(pred(out, in, out) is semidet),
+		list(T)::out, list(char)::in, list(char)::out) is semidet.

 many(P, Ps) -->
-	( if
-		P(X)
+	( if P(X)
 	then
 		many(P, Xs),
 		{ Ps = [X | Xs] }
@@ -146,8 +144,7 @@
 		{ Ps = [] }
 	).

-:- pred genotype(genotype, list(char), list(char)).
-:- mode genotype(out, in, out) is semidet.
+:- pred genotype(genotype::out, list(char)::in, list(char)::out) is semidet.

 genotype(Genotype) -->
 	many(pred(' '::out, in, out) is semidet --> [' '], _DiscardLeadingSpaces),
@@ -155,23 +152,21 @@
 	['\n'],
 	{ set.list_to_set(list.map(string.strip, Flags), Genotype) }.

-:- pred flag(flag, list(char), list(char)).
-:- mode flag(out, in, out) is semidet.
+:- pred flag(flag::out, list(char)::in, list(char)::out) is semidet.

 flag(Flag) -->
 	double_dash(DoubleDash),
 	many(other, Others),
 	{ Flag = string.from_char_list(DoubleDash ++ Others) }.

-:- pred double_dash(list(char), list(char), list(char)).
-:- mode double_dash(out, in, out) is semidet.
+:- pred double_dash(list(char)::out, list(char)::in, list(char)::out) is
+        semidet.

 double_dash(DoubleDash) -->
 	['-', '-'],
 	{ DoubleDash = ['-', '-'] }.

-:- pred other(char, list(char), list(char)).
-:- mode other(out, in, out) is semidet.
+:- pred other(char::out, list(char)::in, list(char)::out) is semidet.

 other(Other) -->
 	\+ ['-', '-'],
@@ -194,8 +189,8 @@
 	Son = set.union_list(PartsOfSon),
 	Daughter = set.union_list(PartsOfDaughter).

-:- pred cut(genotype, genotype, genotype, RNG, RNG) <= random(RNG, Seed).
-:- mode cut(in, out, out, in, out) is det.
+:- pred cut(genotype::in, genotype::out, genotype::out, RNG::in,
+        RNG::out) is det <= random(RNG, Seed).

 cut(Parent, PartOfSon, PartOfDaughter, !RNG) :-
 	( if
@@ -222,9 +217,9 @@
 	( if
 		set.member(Flag, !.Genotype)
 	then
-		set.delete(!.Genotype, Flag, !:Genotype)
+		svset.delete(Flag, !Genotype)
 	else
-		set.insert(!.Genotype, Flag, !:Genotype)
+		svset.insert(Flag, !Genotype)
 	).

 %-----------------------------------------------------------------------------%
@@ -242,7 +237,7 @@
 	io.open_output(Path, Result, !IO),
 	(
 		Result = ok(Stream),
-		map(genotype_to_string, Genotypes, Strings),
+		Strings = map(genotype_to_string, Genotypes),
 		io.write_list(Stream, Strings, "\n", io.write_string(Stream), !IO),
 		io.nl(Stream, !IO),
 		io.close_output(Stream, !IO)
@@ -252,7 +247,7 @@
 		require.error(ErrorMessage)
 	).

-genotype_to_string(Genotype, String) :-
+genotype_to_string(Genotype) = String :-
 	set.to_sorted_list(Genotype, List),
 	String = string.join_list(" ", List).

Index: mcflags
===================================================================
RCS file: /home/sam/src/master/mcflags/mcflags,v
retrieving revision 1.13
diff -u -r1.13 mcflags
--- mcflags	3 Feb 2006 01:33:20 -0000	1.13
+++ mcflags	10 Feb 2006 05:48:32 -0000
@@ -16,8 +16,8 @@
 # Mercury modules is shown below.  The subprograms above call the ones
 # directly below them, but not the other way around.
 #
-#	mcflags
-#		mcflags.conf
+#	gator
+#		gator.conf
 #		evaluate
 #			evaluate.conf
 #		evolve.m
@@ -27,23 +27,31 @@
 #			tausworthe3.m
 #

-set -x
+prog=`basename "$0"`
+usage="usage: $prog [-g generation] [-k] [-v]"

-WORKSPACE=/home/mercury/samrith/mcflags
-export WORKSPACE
-
-[ -r "$WORKSPACE"/mcflags.conf ] && . "$WORKSPACE"/mcflags.conf
-
-while getopts g:k f
+while getopts g:kv f
 do
 	case $f in
 	g)	generation="$OPTARG";;
 	k)	kill=true;;
+	v)	set -x;;
 	\?)	echo "$usage" >&2; exit 1;;
 esac
 done
 shift `expr $OPTIND - 1`

+. gator.conf || exit 1
+
+#
+# We need to have ssh-agent(1) running so that the user doesn't have to
+# type a password/passphrase every time we connect to another server.
+#
+while ! ssh-add
+do
+	eval `ssh-agent`
+done
+
 kill=${kill:-false}
 if $kill
 then
@@ -55,70 +63,94 @@
 		i=`expr $i + 1`
 	done

-	exec pkill mcflags
+	exec pkill gator
 fi

+mmc --make evolve || exit 1
+
 generation=${generation:-1}
 while true
 do
-	genotypes="$WORKSPACE"/MCFLAGS/$generation/genotypes
-	phenotypes="$WORKSPACE"/MCFLAGS/$generation/phenotypes
-
 	#
-	# Calculate the number of genotypes that are evaluated per host.
-	# Make sure that the last host in the list doesn't get left with
-	# a much larger number of genotypes to evaluate than the others.
+	# We maintain a FIFO queue which contains (the index of) all the
+	# hosts available for us to use, and which are not busy
+	# evaluating a genotype.
 	#
-	num_genotypes=`wc -l <"$genotypes"`
-	genotypes_per_host=`expr $num_genotypes / $num_hosts`
-	remainder=`expr $num_genotypes % $num_hosts`
-	if [ $remainder -ne 0 ]
-	then
-		genotypes_per_host=`expr $genotypes_per_host + 1`
-	fi
+
+	fifo="${TMPDIR:-/tmp}"/"$prog"$$
+	rm -rf "$fifo" || exit 1
+	mkfifo "$fifo"
+	trap 'cd /; /bin/rm -f "$fifo"; exit' 0 1 2 3 15

 	#
-	# Log into each host in turn and execute the evaluate script for
-	# each set of genotypes.
+	# Note that throughout the program, the shell variable $i is
+	# used as an index to the host, and $j is used as an index to
+	# the genotype.
 	#
+
 	i=1
 	while [ $i -le $num_hosts ]
 	do
+		echo "$i" >>"$fifo" &
+		i=`expr $i + 1`
+	done
+
+	#
+	# For each genotype, find a host to evaluate it on.  Once we're
+	# done evaluating the genotype, put the host back into the queue.
+	#
+
+	genotypes=generations/$generation/genotypes
+	num_genotypes=`wc -l <"$genotypes"` || exit 1
+
+	j=1
+	while [ $j -le $num_genotypes ]
+	do
+		out=generations/$generation/evaluate.out.$j
+		err=generations/$generation/evaluate.err.$j
+
+		read i
+
 		eval "host=\${host$i}"
 		eval "workspace=\${workspace$i}"
 		eval "benchmarks=\${benchmarks$i}"
 		eval "path=\${path$i}"

-		first=`expr \( $i - 1 \) \* $genotypes_per_host + 1`
-		last=`expr $i \* $genotypes_per_host`
+		(
+			sed -n ${j}p <"$genotypes" | ssh "$host" nice -n 19 \
+			    "$workspace"/evaluate -b "$benchmarks" -p "$path" \
+			    -v -w "$workspace" >"$out" 2>"$err"

-		out="$WORKSPACE"/MCFLAGS/$generation/evaluate.out.$i
-		err="$WORKSPACE"/MCFLAGS/$generation/evaluate.err.$i
+			echo "$i"
+		) &

-		sed -n ${first},${last}p <"$genotypes" |
-		    ssh "$host" nice -n 19 "$workspace"/evaluate -a $first \
-		    -b "$benchmarks" -p "$path" -w "$workspace" -z $last \
-		    >"$out" 2>"$err" &
-
-		i=`expr $i + 1`
-	done
+		j=`expr $j + 1`
+	done <"$fifo" >>"$fifo"

 	#
 	# After all the genotypes have been evaluated, combine the
 	# fragments that make up the "$phenotypes" file.
 	#

-	wait
+	phenotypes=generations/$generation/phenotypes

 	echo '[' >"$phenotypes"

-	i=1
-	while [ $i -le $num_hosts ]
+	wait
+
+	j=1
+	while [ $j -le $num_genotypes ]
 	do
-		out="$WORKSPACE"/MCFLAGS/$generation/evaluate.out.$i
+		out=generations/$generation/evaluate.out.$j
+		err=generations/$generation/evaluate.err.$j
+
 		cat "$out" >>"$phenotypes"
-		[ $i -lt $num_hosts ] && echo ',' >>"$phenotypes"
-		i=`expr $i + 1`
+		[ $j -lt $num_genotypes ] && echo ',' >>"$phenotypes"
+
+		# These files can be around 6 MB per generation (uncompressed).
+		bzip2 "$err"
+
+		j=`expr $j + 1`
 	done

 	echo '].' >>"$phenotypes"
@@ -129,15 +161,15 @@

 	next_generation=`expr $generation + 1`

-	next_genotypes="$WORKSPACE"/MCFLAGS/$next_generation/genotypes
-	ladder="$WORKSPACE"/MCFLAGS/$generation/ladder
+	next_genotypes=generations/$next_generation/genotypes
+	ladder=generations/$generation/ladder

-	mkdir "$WORKSPACE"/MCFLAGS/$next_generation
-	"$WORKSPACE"/evolve -g "$genotypes" -h "$next_genotypes" \
-	    -l "$ladder" -p "$phenotypes" || exit 1
+	mkdir generations/$next_generation
+	./evolve -g "$genotypes" -h "$next_genotypes" -l "$ladder" \
+	    -p "$phenotypes" || exit 1

 	generation="$next_generation"
 done

-# XXX: should catch ^C and print the best genotype (or something similar)
+# NOTREACHED
 exit 1
Index: mcflags.conf
===================================================================
RCS file: /home/sam/src/master/mcflags/mcflags.conf,v
retrieving revision 1.8
diff -u -r1.8 mcflags.conf
--- mcflags.conf	3 Feb 2006 01:33:20 -0000	1.8
+++ mcflags.conf	10 Feb 2006 05:48:32 -0000
@@ -1,6 +1,6 @@
 #
 # num_hosts: the number of hosts available for benchmarking.  This may
-#	include the host from which mcflags is being run, if it is also
+#	include the host from which gator is being run, if it is also
 #	being used to run the benchmarks.
 #

@@ -11,9 +11,9 @@
 #
 # host$i: the name of the host we are connecting to.  This is passed as a
 #	command-line argument to ssh(1).  Make sure you have a copy of
-#	the host's SSH public key in your cache before you run mcflags.
+#	the host's SSH public key in your cache before you run gator.
 #
-# workspace$i: the path to a directory containing mcflags.  This is used
+# workspace$i: the path to a directory containing gator.  This is used
 #	to access the dotime and evaluate.conf files.  Note that this is
 #	a read-only workspace, and the only files that are read are
 #	dotime and evaluate.conf.
@@ -27,31 +27,31 @@
 #

 host1=sophie
-workspace1=/home/mercury/samrith/mcflags
+workspace1=/home/mercury/samrith/gator
 benchmarks1=/home/sophie/samrith/benchmarks
 path1=/home/sophie/public/mercury-latest/i686-pc-linux-gnu/bin

 host2=boadicea
-workspace2=/home/mercury/samrith/mcflags
+workspace2=/home/mercury/samrith/gator
 benchmarks2=/home/boadicea/samrith/benchmarks
 path2=/home/boadicea/public/mercury-latest/i686-pc-linux-gnu/bin

 host3=surprise
-workspace3=/home/mercury/samrith/mcflags
+workspace3=/home/mercury/samrith/gator
 benchmarks3=/home/surprise/samrith/benchmarks
 path3=/home/surprise/public/mercury-latest/i686-pc-linux-gnu/bin

 host4=lively
-workspace4=/home/mercury/samrith/mcflags
+workspace4=/home/mercury/samrith/gator
 benchmarks4=/home/lively/samrith/benchmarks
 path4=/home/lively/public/mercury-latest/i686-pc-linux-gnu/bin

 host5=polychrest
-workspace5=/home/mercury/samrith/mcflags
+workspace5=/home/mercury/samrith/gator
 benchmarks5=/home/polychrest/samrith/benchmarks
 path5=/home/polychrest/public/mercury-latest/i686-pc-linux-gnu/bin

 host6=leopard
-workspace6=/home/mercury/samrith/mcflags
+workspace6=/home/mercury/samrith/gator
 benchmarks6=/home/leopard/samrith/benchmarks
 path6=/home/leopard/public/mercury-latest/i686-pc-linux-gnu/bin
Index: phenotype.m
===================================================================
RCS file: /home/sam/src/master/mcflags/phenotype.m,v
retrieving revision 1.4
retrieving revision 1.6
diff -u -r1.4 -r1.6
--- phenotype.m	2 Feb 2006 02:44:00 -0000	1.4
+++ phenotype.m	7 Feb 2006 04:48:59 -0000	1.6
@@ -22,6 +22,8 @@

 :- type phenotype.

+:- type weighting.
+
 :- type fitness.

 	% read_phenotypes(Path, Phenotypes, !IO).
@@ -29,20 +31,19 @@
 	% Reads a list of phenotypes from the given file, and unifies
 	% the list with Phenotypes.
 	%
-:- pred read_phenotypes(string, list(phenotype), io, io).
-:- mode read_phenotypes(in, out, di, uo) is det.
+:- pred read_phenotypes(string::in, list(phenotype)::out, io::di,
+        io::uo) is det.

-	% fitness(Weightings, Phenotype, Fitness).
+	% fitness(Weightings, Phenotype) = Fitness.
 	%
-	% This predicate evaluates the fitness of a genotype given its
+	% This function evaluates the fitness of a genotype given its
 	% phenotype.
 	%
-	% Fitness is defined here as a weighted sum of each element in the
+	% Fitness is defined here as a weighted sum of each number in the
 	% phenotype.  The weightings can be modified by changing the
 	% "Weightings" variable.
 	%
-:- pred fitness(list(float), phenotype, fitness).
-:- mode fitness(in, in, out) is det.
+:- func fitness(list(weighting), phenotype) = fitness is det.

 	% selection(Genotypes, Fitness, _, Parent, !RNG).
 	%
@@ -57,16 +58,14 @@
 	% The third argument is ignored.  It is just there to make it easy
 	% to control the number of parents to be selected.
 	%
-:- pred selection(list(genotype), list(fitness), T, genotype, RNG, RNG)
-		<= random(RNG, Seed).
-:- mode selection(in, in, in, out, in, out) is det.
+:- pred selection(list(genotype)::in, list(fitness)::in, T::in,
+		genotype::out, RNG::in, RNG::out) is det <= random(RNG, Seed).

-    % fitness_to_string(Fitness, String).
+    % fitness_to_string(Fitness) = String.
     %
     % Gives a string representation of the fitness value.
     %
-:- pred fitness_to_string(fitness, string).
-:- mode fitness_to_string(in, out) is det.
+:- func fitness_to_string(fitness) = string is det.

 %-----------------------------------------------------------------------------%
 %-----------------------------------------------------------------------------%
@@ -93,6 +92,8 @@

 :- type run_time == float.

+:- type weighting == float.
+
 :- type fitness == float.

 read_phenotypes(Path, Phenotypes, !IO) :-
@@ -117,7 +118,7 @@
 		require.error(ErrorMessage)
 	).

-fitness(Weightings, Phenotype, Fitness) :-
+fitness(Weightings, Phenotype) = Fitness :-

 	CompileTimes = Phenotype ^ compile_times,
 	ExecutableSizes = list.map(float.float, Phenotype ^ executable_sizes),
@@ -167,8 +168,7 @@
 	Index = list.det_index0_of_first_occurrence(CumulativeNormalFitness, Head),
 	list.index0_det(Genotypes, Index, Parent).

-fitness_to_string(Fitness, String) :-
-    string.float_to_string(Fitness, String).
+fitness_to_string(Fitness) = string.float_to_string(Fitness).

 %-----------------------------------------------------------------------------%
 %

--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list