#!/usr/bin/perl
# -*- perl -*-
eval 'exec @PERL@ -S $0 ${1+"$@"}'
    if 0;

use IPC::Open2;
use strict;

my @lexer = ("/usr/bin/lexer","/usr/share/frmg/small_header.tag",'-');

my @sentence = @ARGV ? @ARGV : (<STDIN>);

my $i=0;

$| = 1; # $| = 0; # Flush
open2(*LEXOUT,*LEXIN,@lexer) || die "couldn't run lexer";

segmenter(*LEXIN);
close LEXIN || die "can't close: $!";

print <LEXOUT>;
close LEXOUT || die "can't close: $!";

sub segmenter {
    my $out = shift;

    print $out <<EOF;
%% Token database generated by $0

EOF

    foreach my $line (@sentence) {
      $line =~ s/\'/\'\' /og;
      $line =~ s/-t-/ t-/og;
      $line =~ s/([.,!;?]+)(?!:)/ $1:punct/og;
##      print STDERR $line;
      foreach ($line =~ /\S+/g) {
       
            last if (/\#/);

            if (/^(\d+)$/) {
              print $out "\'C\'($i,$1,",++$i,").\n";
              next;
            }

##            s/\'/\'\'/og;

##            $_ = ",:punct" if $_ eq ',';

            my ($lex,$cat,$ht,$top) = split(':',$_);
	    $lex = "\'$lex\'";
	    $cat = (defined $cat && $cat) ? "\'$cat\'" : "_";
	    $ht = (defined $ht && $ht) ? fs_analyze('ht',$ht) : "_";
	    $top = (defined $top && defined $cat) ? top_analyze($cat,$top) : "_";

            print $out "\'C\'($i,$lex : $cat : $ht : $top,",++$i,").\n" ;
 
	}
    }

    print $out "'N'(",$i,").\n";

}

sub fs_analyze {
  my $type = shift;
  my $fs = shift;
  my @fs = map {f_analyze($_)} split(/\&/,$fs);
  $fs = join(", ",@fs);
  return "$type"."{$fs}";
}

sub top_analyze {
  my $type = shift;
  my $top = shift;
  $top = fs_analyze('lfg',$top);
  return "$type"."{lfg=>$top}";
}

sub f_analyze {
  my $fv = shift;
  my $f;
  my $v;
  if ($fv =~ /^(\S+)=(\S+)$/) {
    $f = $1;
    $v = $2;
  } elsif ($fv =~ /^([+-])(\S+)$/) {
    $f = $2;
    $v = $1;
  } else {
    $f = quote($fv);
    return "$f => $f"."[~ -]";
  }
  $f = quote($f);
  $v = quote($v);
  return "$f => $v";
}

sub quote {
  my $string = shift;
  return $string if ($string =~ /^[a-z_]\w*$/o);
  return $string if ($string =~ /^\d+$/);
  #    $string =~ s/\'/\'\'/og;
  $string =~ s/\'(?!\')/\'\'/og;
  return "\'$string\'";
}

sub segmenter_word {
    my $out = shift;
    shift;
}

__END__

=head1 NAME

lex2db - group several lexers for dyalog

=head1 SYNOPSIS

B<lex2db> B<-<lexer>> F<filenames>

=head1 DESCRIPTION

B<lex2db> is a wrapper to call several small useful lexers in order to
build a database of connecting facts B<'C'(<pos>,<token>,<pos+1>)> to
be parsed by a B<dyalog> executable.

=head1 OPTIONS

=over 5

=item B<-letter> lex a stream of characters

=item B<-word> lex a stream of words

=item B<-token> lex a stream of lines (one token per line)

=back

=head1 SEE ALSO

dyalog as man pages or as info entries.

=head1 AUTHORS

Eric de la Clergerie <Eric.De_La_Clergerie@inria.fr>

=cut

### Local Variables: 
### comment-column:0 
### comment-start: "### "  
### comment-end:"" 
### mode: perl
### End: 
