#! /opt/gnu/bin/perl ############################################################################## #RUN: cat | parser.pl #AUTHORS: Bridget McInnes and William Schuler ############################################################################## print stderr ("Reading CFG rules...\n"); open(RULES, $ARGV[$args++]); # read transition matrix... $i=0; while ( ) { chop; if ( $_ =~ /^\#/ ) { } # binary branching (non-preterminal in CNF)... elsif ( $_ =~ /([^ ]+) -> ([^ ]+) ([^ \#]+)/ ) { $BinaryRules{"$2 $3"}{$1} = 1 ; } # unary branching (preterminal in CNF)... elsif ( $_ =~ /([^ ]+) -> ([^ ]+)/ ) { $UnaryRules{$2}{$1} = 1 ; } } close(RULES); ################################################################################ print stderr ("Reading input...\n"); while() { # Clear chart... for ($di=1; $di<$n; $di++) { for ($i=0; $i<($n-$di); $i++) { $j = $i+$di; %{$Chart{"$i $j"}} = () ; } } my %B = (); # Consume sentence... chop; @Words=split(/[ ]+/,$_); for ($i=0; $i<@Words; $i++) { $j = $i+1; $Chart{"$i $j"}{$Words[$i]} = 1 ; foreach $nont (keys %{$UnaryRules{$Words[$i]}}) { $Chart{"$i $j"}{$nont} = 1 ; #print "$i $j $nont -> $Words[$i]\n"; push @{$B{$i}{$j}{$nont}}, "/$Words[$i]/"; foreach $nont2 (keys %{$UnaryRules{$nont}}) { $Chart{"$i $j"}{$nont2} = 1 ; #print "$i $j $nont2 -> $nont\n"; if($nont ne $nont2) { push @{$B{$i}{$j}{$nont2}}, "[$nont /$Words[$i]/]"; } } } } $n=@Words+1; # Fill in chart... for ($di=2; $di<$n; $di++) { for ($i=0; $i<($n-$di); $i++) { $j = $i+$di; for ($k=$i+1; $k<$i+$di; $k++) { foreach $lc (keys %{$Chart{"$i $k"}}) { foreach $rc (keys %{$Chart{"$k $j"}}) { foreach $nont (keys %{$BinaryRules{"$lc $rc"}}) { $Chart{"$i $j"}{$nont} = 1 ; push@{$B{$i}{$j}{$nont}}, "$k $lc $rc"; #print "$nont -> $lc $rc\n"; foreach $nont2 (keys %{$UnaryRules{$nont}}) { $Chart{"$i $j"}{$nont2} = 1 ; #print "$nont2 -> $lc $rc\n"; push@{$B{$i}{$j}{$nont2}}, "$k $lc $rc"; } } } } } } } # for ($i=0; $i<$n-1; ) { # $any=0; # for ($j=$n-1; $j>$i && $any==0; $j-- ) { # foreach $nont (keys %{$Chart{"$i $j"}}) { # if ($any==0) { # print " [$nont"; # $any=1; # } # else { # print "|$nont"; # } # } # if ($any==1) { # for ($k=$i; $k<$j && $i<$j-1; $k++) { # print " $Words[$k]"; # } # print "]"; # } # } # if ($any==1) { # $i=$j+1; # } # } # # print "\n"; my @parses = extract(\%B, 0, $n-1, "S"); #print "$#parses\n"; my %hash = (); foreach $p (@parses) { print "$p\n"; } } sub extract { my ($B, $i, $j, $nt) = @_; my @rx = (); for my $back (@{$$B{$i}{$j}{$nt}}) { if ($back =~ m/\/.*\//) { @rx = ("[$nt $back]"); } else { my ($k, $nt1, $nt2) = split ' ', $back; my @r1 = extract($B, $i, $k, $nt1); my @r2 = extract($B, $k, $j, $nt2); for my $r1 (@r1) { for my $r2 (@r2) { push @rx, "[$nt $r1 $r2]"; } } } } return @rx; }