#!/usr/bin/perl

use 5.006;
use strict;
use warnings;
use Carp;

use POSIX 'setsid';
use Net::Server;
use vars qw(@ISA $VERSION);

@ISA = qw(Net::Server);
$VERSION = '0.0.1';

use Net::Telnet;
use IPC::Run qw(start finish run pump);

use AppConfig qw/:argcount :expand/;

use Digest::MD5 qw(md5);

use File::Temp qw/tempfile/;

use IO::All qw/io/;

my $config = AppConfig->new(
                            "host|h=s" => {DEFAULT => "localhost"},
                            "port|p=s" => {DEFAULT => 8999},
                            "parser=s" => {DEFAULT => "frmgtel"},
                            "rparser=s" => {DEFAULT => "frmgtelr"},
                            "test|t=s" => {DEFAULT => "sentences.txt"},
			    "display|d=s" => {DEFAULT => ''},
			    "verbose|v!" => {DEFAULT => 1},
			    "range|r=s@" => {DEFAULT => []},
			    "stats|s!" => {DEFAULT => 0},
			    "forest!" => {DEFAULT => 0},
			    "grammar!" => {DEFAULT => 0},
			    "tagger!" => {DEFAULT => 0},
			    "xmldep!" => {DEFAULT => 0},
			    "lpdep!" => {DEFAULT => 0},
			    "dotdep!" => {DEFAULT => 0},
			    "easy!" => {DEFAULT => 0},
			    "easyhtml!" => {DEFAULT => 0},
			    "sigfile=s" => {DEFAULT => ".signatures"},
			    "errfile=s" => {DEFAULT => "errors"},
			    "setsid!" => { DEFAULT => 0 },
			    "log_file=f" =>  {DEFAULT => '/tmp/testparser.log'},
			    "status!" => {DEFAULT => 0},
			    "time!" => {DEFAULT => 0},
			    "robust!" => {DEFAULT => 0},
			    "easyinput" => {DEFAULT => 0},
			    "colldir=f" => {DEFAULT => "./easy_results"},
			    "timeout=i" => {DEFAULT => 200},
			    "collsave!" => {DEFAULT => 0},
			    "allparse!" => {DEFAULT => 0},
			    "showdag!" => {DEFAULT => 0},
			    "collbase=f",
			    "onlyrobust!" => {DEFAULT => 0},
			    "run!" => {DEFAULT => 1},
			    "watch=f" => {DEFAULT => ''},
			    "short!" => {DEFAULT => 0},
			    "final!" => {DEFAULT => 1}
                           );

my %display = (dep => 'dependency', tree => 'tree' );

$config->args();

my $host = $config->host;
my $port = $config->port;

if ($host =~ /(\S+):(\S+)/) {
  $host = $1;
  $port = $2;
}

my $log = io->stdout;

my $parser = $config->parser;
my $rparser = $config->rparser;
my $test = $config->test;
my $display = get_display($config->display);
my $verbose = $config->verbose;
my @ranges = map(range_expand($_),@{$config->range});
my %range = (map {$_ => 1} @ranges);
my $errfile = $config->errfile();

my $section = "s0";

my %stats = 
  ( tried => 0,
    skipped => 0,
    failed => 0,
    time => 0,
    clusters => 0,
    nodes => 0,
    edges => 0
  );

my %exts = ('easy' => 'ph2.xml',
	    'xmldep' => 'dep.xml',
	    'xml' => 'forest.xml',
	    'tagger' => 'tag.txt',
	    'dotdep' => '.dot'
	   );

my %errors = ();

my %Kid_Status = ();

use POSIX ":sys_wait_h";
sub REAPER {
  my $child;
  # If a second child dies while in the signal handler caused by the
  # first death, we won't get another signal. So must loop here else
  # we will leave the unreaped child as a zombie. And the next time
  # two children die we get another zombie. And so on.
  while (($child = waitpid(-1,WNOHANG)) > 0) {
    $Kid_Status{$child} = $?;
  }
  $SIG{CHLD} = \&REAPER;  # still loathe sysV
}
# $SIG{CHLD} = \&REAPER;

$SIG{INT} = sub {
  exit;				# to go trough END block
};

STDOUT->autoflush;

## reading error files
if (-r $errfile) {
  open(ERRFILE,"<$errfile") || die "can't open $errfile: $!";
  while(<ERRFILE>) {
    $errors{$_} = 1;
  }
  close(ERRFILE);
}

my %handlers = 
  ( display => \&display_handler,
    tagger => canonical_display_handler('tagger','TAGGER'),
    forest => canonical_display_handler('xml','FOREST'),
    xmldep => canonical_display_handler('xmldep','XMLDEP'),
    lpdep => canonical_display_handler('lpdep','LPDEP'),
    dotdep => canonical_display_handler('dep','DOTDEP'),
    stats => \&stats_handler,
    easy => canonical_display_handler('easy','EASY'),
    grammar => \&grammar_handler,
    easyhtml => \&easyhtml_handler
  );

## setting output handlers
my @handlers = ();
my @exts = ();

foreach my $key (sort keys %handlers) {
  if ($config->get($key)) {
    push(@handlers, $handlers{$key});
    push(@exts, $exts{$key}) if (exists $exts{$key});
  }
}

## Starting connection
my $connect = new Net::Telnet;
$connect->max_buffer_length(3*$connect->max_buffer_length);

start_connection();

sub start_connection {
  $connect->open(Host => $host,
		 Port=>$port) || die "can't connect: $!";
  $connect->prompt('/\s*Command\?\s*/');
  $connect->cmd("");
  $connect->cmd("timeout ".$config->timeout());
}

## Opening signature files if needed
my %signatures = ();
my $sigfile = $config->sigfile();

if ($config->stats() && -r $sigfile) {
  append $log "** Reading signature file $sigfile\n" if $verbose;
  open(SIG,"<$sigfile") || die "can't read $sigfile: $!";
  $signatures{$1} = $2 while (<SIG> =~ /^(\S+)\s+(\S+)/);
  close SIG || die "can't close $sigfile: $!";
}


## Setting status bar

my $report;
my $statusBar;

if ($config->status) {
  use Term::Report;

  $report = Term::Report->new( fh => \*STDERR
			     );
#  $statusBar = $report->{statusBar};
  $report->savePoint('total', "Total: ", 0);
  $report->savePoint('success', "\nSuccess: ", 0);
  $report->savePoint('failed', "\nFailed: ", 0);
}

## Reading sentences

my $io;
my $client = 0;


if ($test eq '-') {
  $io = io->stdin;
} elsif ($test =~ /:\d+$/) {
  $client = 1;
} else {
  if ($config->easyinput || $test =~ /^\.xml$/) {
    $test  = "xsltproc easyinput2txt.xsl $test |";
  }
  $io = io("$test")
}

sub connect_as_client {
  return unless $client;
##  print STDERR "*** Trying connection\n";
  $io->close if ($io);
  my $fail = 0;
  $io=io($test);
  $io->print("next $host\n");
}

my @sentences = ();
my $label;
my @failed = ();
my $collection;
my $collbase;


if ($config->collbase) {
  $collbase = $config->collbase;
}

my $saggl='';			# to agglutinate more than one sentence

my $dagmode = 0;
my @dagedges = ();
my %dagwords = ();
my $daglabel;

my $watch_info='';
my $watch = $config->watch;

if ($watch && (-f $watch)) {
  open(WATCH,"<$watch") || die "can't access watch file $watch";
  $watch_info = <WATCH>;
  chomp $watch_info;
}

connect_as_client;

while (defined($io) && ($_ = $io->readline)){
  if ($watch && !(-f $watch)) {
    system("echo \"$watch_info $label\" > $watch.stopped");
    last;
  }
  if (/^##\s*(\d+)(\..*)/) {
    append $log "\n## Section $1$2\n\n" if ($verbose);
    $section = "s$1";
    next;
  }
  if (/^##\s*E(\d+)$/) {
    $label = $1 - 1 unless ($saggl);
    next;
  }
  if (/^##\s*AGGL$/) {
    my $next = $io->getline;
    chomp $next;
    $saggl .= " $next";
    next;
  }
  if (/^##\s+LOG=(\S+)/) {
    switch_log($1);
    next;
  }
  if (/^##\s*CORPUS=(\S+)\s+COLLDIR=(\S+)/) {
    corpus_start($1,$2);
    next;
  }
  if (/^##\s*(FICHIER|FILE)=(\S+)/) {
    collection_start($1);
    next;
  }
  if (/^##\s*DAG\s+begin/io) {
    $dagmode = 1;
    @dagedges = ();
    %dagwords = ();
##    append $log "STARTING DAG\n";
    next;
  }

  if ($dagmode && /^##\s*DAG\s+end/io) {
    $dagmode = 0;
    my $sentence = '';
    foreach (sort mysort keys %dagwords) {
      $sentence .= " $dagwords{$_}";
    }
    my $dag = join("\n","##DAG BEGIN $sentence",@dagedges,'##DAG END');
##    $label++;
    $label = $daglabel;
    handle_sentence($label,'YES',$sentence,$dag);
    connect_as_client;
    next;
  }
  if (/^##\s*MAF\s+BEGIN/oi) {
    my $maf = $_;
    @dagedges = ();
    my $sentence = '';
    my %tokens = ();
    while ($_ = $io->readline) {
      $maf .= $_;
      $tokens{$1} = $2 if (m{<token.*?id="(.+?)".*?>(.+)</token>});
      last if /^##\s*MAF\s+END/oi;
    }
    foreach (sort mysort2 keys %tokens) {
      $sentence .= " $tokens{$_}";
    }
    $label++;
##    print "MAF $label $sentence\n$maf\n";
    handle_sentence($label,'YES',$sentence,$maf);
    connect_as_client;
    next;
  }

  if (/^<\?xml/oi) {
      my $maf = "##MAF BEGIN\n$_";
      my $sentence = '';
      my %tokens = ();
      while ($_ = $io->readline) {
	$maf .= $_;
	$tokens{$1} = $2 if (m{<token.*?id="(.+?)".*?>(.+)</token>});
	last if m{^</maf>}oi;
      }
      $maf .= "##MAF END";
      foreach (sort mysort2 keys %tokens) {
	$sentence .= " $tokens{$_}";
      }
      $label++;
      ## print "MAF $label '$sentence'\n$maf\n";
      handle_sentence($label,'YES',$sentence,$maf);
      connect_as_client;
      next;
    }

  next if /^#/;
  next if /^\s*$/;
  if ($dagmode) {
    my $edge = $_;
    chomp $edge;
##    append $log "Read edge $edge\n";
    push(@dagedges,dag_decode($edge));
    next;
  }
  my $status = 'YES';
  my $sentence = $_;
  if ($saggl) {
    $sentence = "$saggl $sentence";
    $saggl = '';
  }
  $label++;
  chomp $sentence;
  unless ($config->allparse) {
    if ($sentence =~ s/^>\s*//) {
      $status = 'SKIP';
    } elsif ($sentence =~ s/^\*\s*//) {
      $status = 'NO';
    }
  }
  handle_sentence($label,$status,$sentence);
  connect_as_client;
}


sub mysort {
  my ($ea,$fa) = ($a =~ /E(\d+)F(\d+)/);
  my ($eb,$fb) = ($b =~ /E(\d+)F(\d+)/);
  return ($ea <=> $eb || $fa <=> $fb);
}

sub mysort2 {
  my ($ea) = ($a =~ /(\d+)$/);
  my ($eb) = ($b =~ /(\d+)$/);
  return ($ea <=> $eb);
}


sub dag_decode {
  my $edge = shift;
  my ($left,$comment,$token,$right) = ($edge =~ /^(\d+)\s+\{(.*?)\}\s+(\S+)\s+(\d+)/);
  my @comments = ();
  while ($comment =~ m{<F\s+id="(.+?)"\s*>\s*(.*?)\s*</F>}og) {
    my ($id,$w) = ($1,$2);
    $dagwords{$1} ||= $2;
    my $idw = "$id|$w";
    push(@comments,$idw) unless grep( $_ eq $idw,@comments);
  }
  $comment = join(' ',@comments);
  --$left;
  --$right;
  if ($left eq 0) {
    ($daglabel) = ($comment =~ /^E(\d+)/);
  }
  return "$left {$comment} $token $right";
}

$io->close;

$connect->close;

sub final_stats {
  return unless ($config->final);
  appendf $log "\nStats: %3u tried %3u failed %3u skipped %3.2f%% coverage %.2fs avtime\n", 
    $stats{tried}, 
      $stats{failed}, 
	$stats{skipped},
	  100 * (1 - $stats{failed} / ($stats{tried} || 0.01)),
	  $stats{time} / ($stats{tried} || 0.01);

  if ($config->stats) {
    my $clusters = $stats{clusters};
    $clusters ||= 1;
    appendf $log "Stats:  %1.1f ambiguity\n",
      ($stats{edges} - $clusters) / ($clusters || 0.01);
  }

  if (@failed) {
    my $failed = @failed;
    append $log <<EOF;

*** $failed tests failed !
EOF
} else {
  append $log <<EOF;

All tests successul !
EOF

}
}

END {
  closing();
  final_stats();
}

sub closing {

  collection_stop();

  if ($config->stats()) {
    ## Saving signatures
    append $log "** Saving signature file $sigfile\n" if $verbose;
    rename($sigfile,"$sigfile.bak") if (-r $sigfile);
    open(SIG,">$sigfile") || die "can't open $sigfile: $!";
    foreach my $sigs (keys %signatures) {
      print SIG "$sigs\t$signatures{$sigs}\n";
    }
    close SIG || die "can't close $sigfile: $!";
  }

  open(ERRORS,">$errfile") || die "can't open $errfile: $!";
  foreach my $sentence (sort keys %errors) {
    print ERRORS "$sentence\n";
  }
  close ERRORS;
}

sub handle_sentence {
  my ($label,$status,$sentence,$dag) = @_;
  return if (@ranges && !$range{$label} && !$range{"$section"});
  if ($collbase && $config->collsave) {
    my $flag = 0;
    $flag ||= !(-f outfile($label, $_)) foreach (@exts); 
    return unless ($flag);
  }
  if (!$config->run) {
    append $log "Simulate running on $label $sentence\n" if ($verbose);
    return;
  }
  my $xsentence = $sentence;
  $xsentence =~ s/\{\{(.+?)\}\}//og;
  my $short = $xsentence;
  $short = substr($short,0,60).' ...' if ($config->short && length($short) > 60);
  if ($status eq 'SKIP') {
    $stats{skipped}++;
    append $log <<EOF  if $verbose;
ok $label\t> $short
EOF
    return;
  }
  $stats{tried}++;
  my $mark = ($status eq 'YES') ? ' ' : '*';
  my $answer;

  if (!$config->onlyrobust) {
    $connect->cmd("set forest yesno");
    ##  append $log "GO $sentence\n";
    my $localparser = ($config->robust) ? "$rparser" : "$parser";
    my $cmd =  "$label $localparser $sentence";
    append $log "$dag\n" if ($config->showdag && defined $dag);
    $cmd = "dag $cmd\n$dag" if (defined $dag);
    my @lines =   $connect->cmd( String => "$cmd",
				 Timeout => undef,
				 Errmode => \&handle_parsing_error	      
			       );
    ##  append $log "HERE @lines\n";
    pop @lines;
    $answer = 'NO';
    if (@lines) {
      $answer = 'YES' if $lines[0] =~ /Success/;
      $answer = 'PARTIAL' if $lines[0] =~ /Partial/;
    }
    if ($answer eq 'PARTIAL') {
      append $log <<EOF if ($verbose) ;
robust $label\t$mark $short
EOF
}
    if ($answer eq $status) {
      append $log <<EOF if ($verbose) ;
ok $label\t$mark $short
EOF
    } else {
      $stats{failed}++;
      push(@failed,{label=>$label,status=>$status,sentence=>$sentence});
      append $log <<EOF ;
**********************************************
fail $label\t$mark $xsentence
**********************************************
EOF
      handle_error($xsentence,$status); 
    }
    ##  @lines = $connect->cmd("last time");
    ##  append $log @lines;
    ##  my ($time) = ($lines[0] =~ /^\s*last\s+time=(\S+)/);
    ##  pop @lines;
    ##  $stats{time} += $time;
    append $log " <host> $host\n";
    if ($config->time()) {
      my @lines = $connect->cmd( String => "last time",
				 Timeout => undef,
				 Errmode => \&handle_parsing_error
			       );
      pop @lines;
      my ($time) = ($lines[0] =~ /=(.+)/);
      my $to = "";
      if ($time == 0) {
	$time = $config->timeout;
	$to = " **timeout**"
      }
      append $log " <time> $time$to\n";
      $stats{time} += $time;
    }
  }
  if ($config->onlyrobust) {
    ## append $log "** try robust parsing\n";
    $connect->cmd("set forest yesno");
    ##  append $log "GO $sentence\n";
    my $cmd =  "$label $rparser $sentence";
    ##    append $log "$dag\n" if ($config->showdag && defined $dag);
    $cmd = "dag $cmd\n$dag" if (defined $dag);
    my @lines =   $connect->cmd( String => "$cmd",
				 Timeout => undef,
				 Errmode => \&handle_parsing_error	      
			       );
    ##  append $log "HERE @lines\n";
    pop @lines;
    $answer = (@lines && $lines[0] =~ /Success/) ? 'YES' :  'NO';
    
    if ($config->onlyrobust) {
      append $log <<EOF if ($verbose) ;
robust $label\t$mark $short
EOF
     };


    if ($config->time()) {
      my @lines = $connect->cmd( String => "last time",
				 Timeout => undef,
				 Errmode => \&handle_parsing_error
			       );
      pop @lines;
      my ($time) = ($lines[0] =~ /=(.+)/);
      my $to = "";
      if ($time == 0) {
	$time = $config->timeout;
	$to = " **timeout**"
      }
      append $log " <time2> $time$to\n";
      $stats{time} += $time;
    }
    
  }

  if ($answer ne 'NO') {
    my @processes = map { $_->($xsentence,$label) } @handlers;
    $_->finish foreach (@processes);
  }

  if ($collection && $answer ne 'NO') {
    collection_handler($xsentence,$label);
  }

  report_stats() if ($config->status);

}

sub get_display {
  my $display = shift;
  return $display{$display} || $display;
}

sub range_expand {
 my $r = shift;
 my @r = split(',',$r);
 my @rr = ();
 foreach my $x (@r) {
   if ($x =~ /(\d+)\.\.(\d+)/) {
     push(@rr,$1..$2);
   } elsif ($x =~ /(s\d+)\.\.(s\d+)/) {
     push(@rr,$1..$2);
   } else {
     push(@rr,$x)
   }
 }
## append $log "RANGE @rr\n";
 return @rr;
}

## Recording error sentence
sub handle_error {
  my $sentence = shift;
  my $status = shift;
  $sentence = "* $sentence" if ($status eq 'NO');
  $errors{$sentence} = 1;
}

sub handle_parsing_error {
  my $msg = shift;
  print STDERR "Parsing error somewhere: $msg\n";
##  $connect->cmd("quit");
##  $connect->close;
  start_connection();
}

sub report_stats {
  $report->finePrint('total',0,$stats{tried});
  $report->finePrint('success',10,$stats{tried} - $stats{failed});
  $report->finePrint('failed',20,$stats{failed});
}

sub last_forest {
  my $type = shift;
  $connect->cmd( String => "set forest $type",
		 Timeout => undef,
		 Errmode => \&handle_parsing_error);
  return $connect->cmd( String => "last forest",
			Timeout => undef,
			Errmode => \&handle_parsing_error	      
		      );
}

######################################################################
# all display handlers

sub canonical_display_handler {
  my ($type,$info) = @_;
  my $ext = $exts{$type} || $type;
  ## build and return a simple display handler
  sub {
    my ($sentence,$label) = @_;
    my @lines = last_forest($type);
    pop @lines;
    if ($collbase && $config->collsave) {
      my $file =  outfile($label, $ext);
      open(COLLS,">$file") || die "can't save in $file";
      print COLLS @lines;
      close COLLS;
      return;
    } else {
      append $log "------------  START $info ---------\n";
      append $log @lines;
      append $log "------------  END $info ---------\n";
      return;
    }
  }
}


sub old_canonical_display_handler {
  my ($type,$info) = @_;
  ## build and return a simple display handler
  sub {
    my ($sentence,$label) = @_;
    $connect->cmd( String => "set forest $type",
		   Timeout => undef,
		   Errmode => \&handle_parsing_error);
    my @lines = $connect->cmd( String => "last forest",
			       Timeout => undef,
			       Errmode => \&handle_parsing_error	      
			     );
    pop @lines;
    append $log "------------  START $info ---------\n";
    append $log @lines;
    append $log "------------  END $info ---------\n";
    return;
  }
}

## Specific handlers

sub stats_handler {
  my ($sentence) = @_;
  my @lines = last_forest('stats');
  my $sigs = md5($sentence);
  my $sigstats = md5($lines[0]);
  my $info = $lines[0];
  if ($info =~ s/^\s*Dependency stats:\s*//o) {
    my ($clusters,$nodes,$edges) = (split(/\s+/,$info))[2,4,6];
    $stats{clusters} += $clusters;
    $stats{nodes} += $nodes;
    $stats{edges} += $edges;
    append $log " <ambiguity> $info";
    if (exists $signatures{$sigs} && $signatures{$sigs} ne $sigstats) {
      append $log <<EOF;
     *** signature mismatch !
EOF
    }
    $signatures{$sigs} = $sigstats;
  } else {
    chomp $info;
    append $log " <ambiguity> ***pbm*** $info\n";
  }
  return;
}

sub display_handler {
  my @lines = last_forest($display);
  pop @lines;
  my $h = start ['recode','..u8'],
    '<pipe',\*DOT,,
      '|', ['dot','-Tgif',"-Glabel=test $label"],
	'|', ['display'],
	  '2>/dev/null'
	    or die "dot returned $?";
  print DOT @lines;
  close DOT;
  return $h;
}

sub grammar_handler {
  my @lines = last_forest('html');
  pop @lines;
  my ($fh,$filename) = tempfile( DIR => '/tmp/',
				 SUFFIX => '.html',
##				 UNLINK => 1,
			       );
  print $fh @lines;
  close $fh;
  my $h = start ['dillo',$filename],'>/dev/null','2>/dev/null';
##  run ['firefox','-remote',"openFile($filename,new-window)"],'>/dev/null','2>/dev/null';
  return $h;
}

sub easyhtml_handler {
  my @lines = last_forest('easy');
  pop @lines;
  my ($fh,$filename) = tempfile( DIR => '/tmp/',
				 SUFFIX => '.html',
##				 UNLINK => 1,
			       );
  close $fh;
  my $h = start 
    ['xsltproc','-o',$filename,'easy2html.xsl','-'],'<pipe',\*EASY,
      ,'&',
##	['konqueror',$filename],'>/dev/null','2>/dev/null';
	['dillo','-f','-l',$filename],'>/dev/null','2>/dev/null';
##	['firefox','-remote',"openFile($filename)"] ;
  print EASY @lines;
  close EASY;
  return $h;
}

sub collection_handler {
  my @lines = last_forest('easy');
  pop @lines;
  foreach (@lines) {
    next if (m{^<[/]?DOCUMENT});
    next if (m{<\?xml});
    print COLLECTION $_;
  }
}

sub collection_stop {
  if (defined $collection) {
    print COLLECTION "</DOCUMENT>\n";
    close COLLECTION || die "can't close $collection";
  }
}

sub switch_log {
  my $newlog = shift;
  $log->close;
  $log = io("$newlog")
    || die "can't open new log file $newlog";
  $log->mode('>>')->open;
  autoflush $log 1;
}

sub corpus_start {
  my $base = shift;
  my $colldir = shift;
  ## close old collection (if open) and start new one;
  $collbase = $base;
  $config->colldir($colldir);
}

sub collection_start {
  my $base = shift;
  my $colldir = shift || $config->colldir();
  ## close old collection (if open) and start new one;
  collection_stop;
  $collbase = $base;
  $config->colldir($colldir);
  $collection = "$colldir/$base.ph2.xml";
  open(COLLECTION,">$collection") || die "can't open $collection";
  autoflush COLLECTION;
  print COLLECTION <<EOF;
<?xml version="1.0"  encoding="latin1"?>
<DOCUMENT fichier="$base" xmlns:xlink="http://www.w3.org/1999/xlink">
EOF
}

sub outfile {
  my ($label, $ext) = @_;
##  print STDERR "TRY OUTFILE '$collbase' '$label' '$ext'\n";
  my $file = $config->colldir(). "/$collbase.E$label.$ext";
##  print STDERR "OUTFILE $file\n";
  return $file;
} 

sub all { $_ || return 0 for @_; 1 }

## The doc of IO::All mention this method, which is actually not defined !
sub IO::All::appendf {
  my $self = shift;
  $self->assert_open('>>');
  $self->printf(@_);
}
