#!/usr/local/bin/perl -w ######################################################################### #Name: Bridget Thomson-McInnes #Program: frequency.pl ######################################################################### #Program Description: # Determines the frequency of words in a text and prints the top n. # It also prints the number of tokens, types and the token/type ratio. # #Usage: # ./frequency.pl n ########################################################################## $n = shift @ARGV; %hash = (); $tokens = 0; $types = 0; while() { @words = split(/[^a-zA-Z0-9]+/); foreach $word (@words) { if(! exists $hash{lc($word)}) { $types++; } $hash{lc($word)}++; $tokens++; } } print "WORD FREQ \n"; print "------------------------------\n"; format STDOUT = @<<<<<<<<<<<<<<<<<< @######## $word, $freq . $num = 0; foreach $word (sort { $hash{$b} <=> $hash{$a} } keys %hash) { $freq = $hash{$word}; write; $num++; if($num == $n) { last; } } print"\n"; print"\n"; print "TOKENS: $tokens \n"; print "TYPES : $types \n"; print "RATIO : "; print $tokens/$types; print "\n";