#!/usr/local/bin/perl # RUN: perl earley.pl #Note the backtracking is flawed. my $debug = 0; my $TRUE = 1; my $FALSE = 0; my $w_file = shift; my $r_file = shift; my $l_file = shift; # read in grammar rules my %rules = (); open(RULES, $r_file) || die "Could not open rule file\n"; while () { chomp; # store the rule $rules{$_}++; } close RULES; my %pos = (); my %lexicon = (); open(LEX, $l_file) || die "Could not open lexicon\n"; while() { chomp; $lexicon{$_}++; my ($ls, $rs) = split/\s+->\s+/, $_; $pos{$ls}++; } # read in sentence my @words = (); open(SENT, $w_file) || die "Could not open sentence file\n"; while($line = lc()) { chomp; @array = split/\s/, $line; for my $i(0..$#array) { push @words, ("/" . $array[$i] . "/"); } } close SENT; my @chart = (); my $start = 0; my $dot = 0; my $loc = 0; my $index = 0; my $rule = "x -> S"; my $parse = ""; my $p_counter = 0; %B = (); # earley parser enqueue($rule, $start, $dot, $loc, $index); for my $i (0..$#words+1) { if($debug) { print "CHART[$i]\n"; } foreach (@{$chart[$i]}) { if(incomplete($_)) { if(!(next_cat($_))) { if($debug) { print "PREDICTOR: $_\n"; } predictor($_); } else { if($debug) { print "SCANNER: $_\n"; } scanner($_); } } else { if($debug) { print "COMPLETER: $_\n"; } completer($_); } if($debug) { print "\n\n"; } } } foreach $state (@{$chart[$#words+1]}) { my($rule, $start, $dot, $loc, $index) = split/\,/, $state; my ($ls, $rs) = split/\s+->\s+/, $rule; if($ls eq "S" && $start == 0 && $dot == $#words+1) { print "GRAMMATICAL :\n"; #my $parse = "[$ls [ $rs ] ]"; #my $ms = $rule; #print extract($ms, $parse), "\n"; exit; } } print "UNGRAMMATICAL\n"; exit; sub extract { my $state = shift; my $parse = shift; my($rule, $index) = split/\,/, $state; my ($left, $right) = split/\s+->\s+/, $rule; my $temp = "[$left [ $right ]] "; $parse=~s/\s$left\s/$temp/; foreach $ns (@{$B{$state}}) { my ($r, $i) = split/\,/, $ns; if($r=~m/\/.*\//) { my ($lc, $rc) = split/\s+->\s+/, $r; my $temp = "[$lc [ $rc ]] "; $parse=~s/\s$lc\s/$temp/; } else { $parse = extract($ns, $parse); } } return $parse; } ######################################################################## # PREDICTOR ######################################################################## sub predictor { my $state = shift; my($rule, $start, $dot, $loc, $index) = split/\,/, $state; my ($ls, $rs) = split/\s+->\s+/, $rule; my @right = split/\s+/, $rs; my $unit = $right[$loc]; foreach (sort keys %rules) { my ($l, $r) = split/\s+->\s+/, $_; if($unit eq $l) { enqueue($_, $dot, $dot, 0, $index); } } } ######################################################################## # SCANNER ######################################################################## sub scanner { my $state = shift; my($rule, $start, $dot, $loc, $index) = split/\,/, $state; my ($ls, $rs) = split/\s+->\s+/, $rule; my @cats = split/\s+/, $rs; my $pos = $cats[$loc]; my $unit = $words[$dot]; foreach (sort keys %lexicon ) { my ($l, $r) = split/\s+->\s+/, $_; if($l eq $pos && $r eq $unit) { enqueue($_, $dot, $dot+1, $loc+1, ++$index); } } } ######################################################################## # COMPLETER ######################################################################## sub completer { my $state = shift; my($rule, $start, $dot, $loc, $index) = split/\,/, $state; my ($ls, $rs) = split/\s+->\s+/, $rule; foreach (@{$chart[$start]}) { my ($r, $s, $l, $d, $i) = split/\,/, $_; my ($left, $right) = split/\s+->\s+/, $r; my @array = split/\s+/, $right; if($array[$d] eq $ls) { enqueue($r, $s, $dot, ++$d, $index); push @{$B{$r}}, $rule; } } } ######################################################################## # NEXT_CAT ######################################################################## sub next_cat { my $state = shift; my($rule, $start, $dot, $loc, $index) = split/\,/, $state; my ($ls, $rs) = split/\s+->\s+/, $rule; my @right = split/\s+/, $rs; my $unit = $right[$loc]; if(exists $pos{$unit}) { return 1; } else { return 0; } } ######################################################################## # INCOMPLETE ######################################################################## sub incomplete { my $state = shift; my($rule, $start, $dot, $loc, $index) = split/\,/, $state; # split the left and right sides of the rule my($ls, $rs) = split/\s+->\s+/, $rule; # split the elements of the right side of the rule my @right = split/\s+/, $rs; if($#right >= $loc) { return 1; } else { return 0; } } ######################################################################## # ENQUEUE ######################################################################## sub enqueue { my $rule = shift; my $start = shift; my $dot = shift; my $loc = shift; my $index = shift; foreach (@{$chart[$index]}) { my ($r, $s, $d, $l, $i) = split/\,/, $_; if($rule eq $r && $start eq $s) { return; } } my $state = $rule . "," . $start . "," . $dot . "," . $loc . "," . $index; #print "ENQUEUE CHART $index: $state\n"; push @{$chart[$index]}, $state; }