Name : Sanjay Patil
NetID : sap71
Course : Natural Language Processing
Instructor : Dr. Picone

Assignment # 1:

1. Using find and grep, find all files ending in .html on the website.
Return the name of any file containing the words and", "speech" and a
number on at least one line

linux command:
find -name '*.html'
grep -l -n -E '(and){1,}|(speech){1,}|[0-9]{1,}' -r *.html

-E denotes regular expression
-l to print the name of the file
-r to recursively

2. Repeat (1) with perl. Compare the clock time it takes to execute
   the command


# this is a assigment # 1
#
# specify the location of perl 
# hash indicates a comment statement
#
#!/usr/bin/perl -w
use strict;
use Time::Local;

my $start_time = gmtime();

#code

my $end_time = gmtime();

# I am not sure how to pipe the output of list of files to the perl
# script

# assuming that the first argument is the first file
#
open(FILE1, ARGV[0]);

while(<FILE1>) {
     
     if ($_ =~ m/^and$/) {
       $and_count++;
     }
     if ($_ =~ m/^speech$/) {
       $speech_count++;
     }
     if ($_ =~ m/[0-9]/) {
       $digit_count++;
     }
     if ($and_count >= 1) && ($speech_count >= 1) && ($digit_count >= 1)) {
     printf FILE1;
    }
}

# this should continue for all the files iteratively.
# i do not know how to do this step


sub to_seconds
{
use integer;
my $x = $_[0];

my $mo = substr($x,0,2);
my $day = substr($x,2,2);
my $year = substr($x,4,4);
my $hour = substr($x,8,2);
my $minute = substr($x,10,2);
my $second = substr($x,12,2);

my $t = timelocal($second,$minute,$hour,$day,$mo - 1,$year - 1900);
return($t);
}

my $diff = to_seconds($start_time) - to_seconds($end_time);
printf("Diff = %d seconds\n",$diff);

# part 3
# using perl, grep our research expepriments directory tree for any
# experiment on TIDigits that gave a word error rate less than 1.0%

# task is to search for "WER:" or "Percent Total Error" followed by
# [0].[0-9]% using grep command to to search the AAREADME.txt file only
# which contains all the experiments list	   


# part 4
# find every function in the IFC that takes at least one floating
# point argument and uses a variable named sum within the method


# part 5: 
Write a simple perl program to locate all words within the switchboard
lexicon that contains at least three vowels


if ($string =~ m/[AEIOUaeiou]{3,}/) {
   print "$string\n";
}