#!/usr/local/bin/perl ################################################################################ # Name: Bridget Thomson-McInnes # # Summary: Takes in a file and parses it at the '.START'. Then uses regular # expressions to find dates/times and tags them accordingly. # # Usage: This program takes in a file on the command line. # Ex: ./tagging.pl ################################################################################# #variables $timeTag = " [time]"; $timeEnd = "[/time] "; $dateTag = " [date] "; $dateEnd = "[/date] "; $week = "(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|Mon|Tue|Wed|Thur|Fri|Sat|Sun).?"; $month = "(January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sept|Sep|Oct|Nov|Dec).?"; $holidays = "(Christmas|Easter|Halloween|Hanauka|Thanksgiving|Valentine Day|President\'s Day|Memorial Day|Labor Day|Father\'s Day|Mother\'s Day|Independence Day|New Years)"; $deicticExp = "(yesterday|tomorrow)"; $writtenNum = "(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)"; $writtencom = "(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth)"; #Load file $inputLine = ""; while (<>) { $inputLine = $inputLine . $_; } #Split at the .START expression $expression = ".START"; @blockArray = split /$expression/, $inputLine; #for every block find the time/dates and tag them foreach (@blockArray) { #midnight afternoon or morning s/(M|m)idnight|(A|a)fternoon|(M|m)orning/$timeTag$&$timeEnd/g; #time s/1?[0-9]:[0-5][0-9] (am|pm)?/$timeTag$&$timeEnd/g; #time o clock s/[0-9] o\'clock|1[012] o\'clock|$writtenNum o\'clock /$timeTag$&$timeEnd/g; #quarter to or half past an hour s/quarter to $writtenNum|half past $writtenNum/$timeTag$&$timeEnd/g; #date s/([123]?[0-9])? ($week|$month.?|$week $month) ([123]?[0-9],?)? ([123]?[0-9]?[0-9][0-9])?/$dateTag$&$dateEnd/g; #years s/[123][0-9][0-9][0-9][.,\s]?\s/$dateTag$&$dateEnd/g; #year s/('[0-9][0-9])[^(yrs|years)]?/$dateTag$&$dateEnd/g; #month the number s/(T|t)he [123]?[0-9](rd|st|nd|th) of $month/$dateTag$&$dateEnd/g; #holidays #s/$holdiay/$dateTag$&$dateEnd/g; #the day after or before tomorrow or today s/((T|t)he day before)? (yesterday|tomorrow)/$dateTag$&$dateEnd/g; #number dates s/[123]?[0-9][\/][0123]?[0-9][\/][123]?[0-9]?[0-9][0-9]/$dateTag$&$dateEnd/g; } print "@blockArray\n";