#!/usr/bin/perl -w
# copyright 2008
#  Andreas Romeyke (romeyke@cbs.mpg.de)
#  MPI for Human Cognitive and Brain Sciences
#  Department of Psychology
#  Stephanstrasse 1a, D-04103 Leipzig, Germany
#
use strict;
use Time::HiRes;
sub count_lines ($) {
	my $fh = $_[0];
	my $lines = 0;
	while (sysread $fh, my $buffer, 4096) {
		$lines += ($buffer =~ tr/\n//);
	}
	seek ($fh, 0, 0); # rewind
	return $lines
}

sub print_filtered ($$$$) {
	my $str1 = $_[0];
	my $str2 = $_[1];
	my $code = $_[2];
	my $OUT = $_[3];
	#	print "code='$code'\n";
	my $res0=join("",$str1,$str2);
	if (! &$code($res0)) {
		print $OUT $res0, "\n";
		return 1;
	}
	return 0;
}

sub fsize($) {
	my $fh = $_[0];
	my @tmp = stat($fh);
	return $tmp[7];
}

sub iterate ($$$$$) {
	my $in1=$_[0];
	my $in2=$_[1];
	my $out=$_[2];
	my $filter=$_[3];
	my $recdepth=$_[4];
	my $IN1; open($IN1, "<", $in1) || die "could not open '$in1' for reading, $!\n";
	my $IN2; open($IN2, "<", $in2) || die "could not open '$in2' for reading, $!\n";
	my $OUT; open($OUT, ">", $out) || die "could not open '$out' for writing, $!\n";
	my $clines1=count_lines($IN1);
	my $clines2=count_lines($IN2);
	my $setsize= $clines1*$clines2;
	my $size1 = fsize($in1);
	my $size2 = fsize($in2);
	printf "%s%.0f possible solutions (around %5.1f MByte)\n", " "x$recdepth, $setsize,($size1 * $size2 / (1024*1024));
	my $realsetsize=0;
	foreach my $line1 (grep {$_!~/^$/} <$IN1>) {
		chomp $line1;
		foreach my $line2 (grep {$_!~/^$/} <$IN2>) {
			chomp $line2;
			$realsetsize+=print_filtered($line1, $line2, $filter,$OUT);
			#if ($line1 ne $line2) {
			#	$realsetsize+=print_filtered($line2, $line1, $filter,$OUT); 
			#}
		}
		seek ($IN2, 0, 0);
	}
	printf "%s%.0f real solutions found (around %5.1f Mbyte)\n", " "x$recdepth, $realsetsize,(fsize($OUT) / (1024*1024));
	printf "%sscutoff: %3.1f %%\n",  " "x$recdepth, (100 / $setsize * $realsetsize);

	close $IN1;
	close $IN2;
	close $OUT;
	return ($setsize, $realsetsize);
}
sub permute ($$$$) {
	my $n = $_[0];
	my $alpha1=$_[1];
	my $code=$_[2];
	my $recdepth=$_[3];
	#print "recursion $n\n";
	my $startt=Time::HiRes::time();
	my $filen = "tmp_$n";
	if ($n < 1) {die "len should be greater then 0\n";}
	elsif ($n==1) { $filen= $alpha1 }
	else { 
		if (! -e $filen ) {
			print " "x$recdepth,"recursion $n: (\n";
			my $a=int($n/2);
			my $b=$n-$a;
			my $filea = permute($a, $alpha1, $code, $recdepth+1);
			my $fileb = permute($b, $alpha1, $code, $recdepth+1);
			my ($mset, $rset)=iterate($filea, $fileb, $filen, $code, $recdepth);
			my $endt=Time::HiRes::time();
			printf "%s%0.3fs for search needed\n", (" "x$recdepth),$endt-$startt;
			print " "x$recdepth, ")\n";
		}
	}
	return $filen;
}

### Main ###
print "Super Red Sonja\n";
print "(c) 2008 by Andreas Romeyke\n";
print "call:\n";
print "\t$0 <iterations> <infile1> <infile2> [<regex> ..]\n";
print "if you need sequences of length l then you should\n";
print "choose <iterations> = log2(l)+1\n";
print "The <regex>.. will be or-combined and these filters out in\n";
print "every iteration step.\n";
print "If you want to have results where the string is\n";
print "4 chars long and should start with the '1' you\n";
print "should call:\n";
print "\tperl super_redsonja.pl 3 1 2 \"^.{1,3}\$\" \"^1.{4}\$\"\n";
print "##################################################\n";
print "starting...\n";
#TODO: Calc explosion of filesize...
#TODO: filter duplicates... ... if possible...
my $len = shift @ARGV;
my $alpha1 = shift @ARGV;
my $alpha2 = $alpha1;
my $regex = join(" ||\n\t", @ARGV); # filter all constraints or combined
print "regex:\n\t$regex\n\n";
my $code=eval("
	sub {\$a=\$_[0];\n
	".join( "\n", map {'if ($a=~m/'.$_.'/) {return 1;}' }@ARGV). "return undef;\n};");

### TODO: make loop recursive to avoid unnecessary work
#foreach my $i (1 .. $len) {
#	print "iteration $i:\n\t($alpha1, $alpha2, tmp_$i);\n\t";
#	my $startt=Time::HiRes::time();
#	iterate($alpha1, $alpha2, "tmp_$i", $code);
#	my $endt=Time::HiRes::time();
#	printf "\n\tneeded time:%0.3fs\n\n",$endt-$startt;
#	my $prev1=int(($i)/2);
#	my $prev2=($i)-$prev1;
#	if ($prev1 > 0) {$alpha1="tmp_$prev1";}
#	$alpha2="tmp_$prev2";
#}
#print "\n\nThe result is in file named 'tmp_$len'\n\n";

permute ($len, $alpha1, $code, 1);
#print "\n\nThe result is in file named 'tmp_$len'\n\n";



