Name : Sanjay Patil NetID : sap71 Course : Natural Language Processing Instructor : Dr. Picone Assignment # 1: 1. Using find and grep, find all files ending in .html on the website. Return the name of any file containing the words and", "speech" and a number on at least one line linux command: find -name '*.html' grep -l -n -E '(and){1,}|(speech){1,}|[0-9]{1,}' -r *.html -E denotes regular expression -l to print the name of the file -r to recursively 2. Repeat (1) with perl. Compare the clock time it takes to execute the command # this is a assigment # 1 # # specify the location of perl # hash indicates a comment statement # #!/usr/bin/perl -w use strict; use Time::Local; my $start_time = gmtime(); #code my $end_time = gmtime(); # I am not sure how to pipe the output of list of files to the perl # script # assuming that the first argument is the first file # open(FILE1, ARGV[0]); while() { if ($_ =~ m/^and$/) { $and_count++; } if ($_ =~ m/^speech$/) { $speech_count++; } if ($_ =~ m/[0-9]/) { $digit_count++; } if ($and_count >= 1) && ($speech_count >= 1) && ($digit_count >= 1)) { printf FILE1; } } # this should continue for all the files iteratively. # i do not know how to do this step sub to_seconds { use integer; my $x = $_[0]; my $mo = substr($x,0,2); my $day = substr($x,2,2); my $year = substr($x,4,4); my $hour = substr($x,8,2); my $minute = substr($x,10,2); my $second = substr($x,12,2); my $t = timelocal($second,$minute,$hour,$day,$mo - 1,$year - 1900); return($t); } my $diff = to_seconds($start_time) - to_seconds($end_time); printf("Diff = %d seconds\n",$diff); # part 3 # using perl, grep our research expepriments directory tree for any # experiment on TIDigits that gave a word error rate less than 1.0% # task is to search for "WER:" or "Percent Total Error" followed by # [0].[0-9]% using grep command to to search the AAREADME.txt file only # which contains all the experiments list # part 4 # find every function in the IFC that takes at least one floating # point argument and uses a variable named sum within the method # part 5: Write a simple perl program to locate all words within the switchboard lexicon that contains at least three vowels if ($string =~ m/[AEIOUaeiou]{3,}/) { print "$string\n"; }