#!/usr/bin/env python # file: $PLAGCHECK/src/plagcheck.py # # revision history # # 20221226 (JP): reviewed # 20221222 (PM): reviewed and refactored # 20220202 (JL): initial version # # Purpose: Python script to recursively run through directories # and search for files that match the specified language, to be # submitted to the Moss plagiarism checker written by Stanford University. #------------------------------------------------------------------------------ # import system modules # import argparse import os import subprocess import sys # import specialized modules: # urllib: a package that collects several modules for working with URLs # from urllib.request import urlopen from urllib.error import * # import NEDC modules # scraper: a wrapper package that integrates several modules that # facilitate working with URLs # import scraper as scr #------------------------------------------------------------------------------ # # global variables are listed here # #------------------------------------------------------------------------------ # set the filename using basename # __FILE__ = os.path.basename(__file__) # define the program options: # note that you cannot separate them by spaces # ARG_HELP = "--help" ARG_ABRV_HELP = "-h" ARG_DIR = "--directory" ARG_ABRV_DIR = "-d" ARG_LANGUAGE = "--language" ARG_ABRV_LANGUAGE = "-l" ARG_THRESHOLD = "--threshold" ARG_ABRV_THRESHOLD = "-t" # define default argument values # MIN_NARGS = int(3) DEF_SIMILARITY_THRESHOLD = int(75) # define the languages supported and the associated file extensions # DEF_LANG_C = "c" DEF_LANG_C_EXTS = [".cc",".c",".cpp"] DEF_LANG_PY = "python" DEF_LANG_PY_EXTS = ".py" DEF_LANG_ML = "matlab" DEF_LANG_ML_EXTS = [".m",".mat"] # define default argument values # DEF_LANGUAGE = DEF_LANG_PY DEF_DIRECTORY = "./" #------------------------------------------------------------------------------ # # functions are listed here # #------------------------------------------------------------------------------ # function: nedc_web_checker # # arguments: # link: a link to be checked # # return: a boolean value indicating status # # This function opens the link and returns the result if the link is valid. # If the link is not valid, it displays an error message. # def nedc_web_checker(link): # Try opening the url # try: html = urlopen(link) # exit with error checking # except HTTPError as e: print("Error (%s): HTTP error (%s)" % (__FILE__, e)) return False except URLError as e: print("Error (%s): could not find website (%s)" % (__FILE__, e)) return False else: return True # # end of function # function: main # def main(argv): # check the number of arguments and display usage file if necessary # if len(sys.argv) < MIN_NARGS: subprocess.call("cat $PLAGCHECK/src/plagcheck.usage", shell= True) sys.exit(os.EX_SOFTWARE) # create a command line parser # parser = argparse.ArgumentParser(add_help=False) parser.add_argument(ARG_ABRV_HELP, ARG_HELP, action='store_true') parser.add_argument(ARG_ABRV_LANGUAGE, ARG_LANGUAGE, type=str) parser.add_argument(ARG_ABRV_DIR, ARG_DIR, type=str) parser.add_argument(ARG_ABRV_THRESHOLD, ARG_THRESHOLD,type=int) # parse the command line # args=parser.parse_args() # get the parameter values # if args.language is None: args.language = DEF_LANGUAGE if args.directory is None: args.directory = DEF_DIRECTORY if args.threshold == None: args.threshold = DEF_SIMILARITY_THRESHOLD # check for a help flag or condition # if args.help: subprocess.call("cat $PLAGCHECK/src/plagcheck.help", shell=True) sys.exit(os.EX_SOFTWARE) # create a list of acceptable file extensions for the given language # if args.language == DEF_LANG_C: extensions = DEF_LANG_C_EXTS elif args.language == DEF_LANG_PY: extensions = DEF_LANG_PY_EXTS elif args.language == DEF_LANG_ML: extensions = DEF_LANG_ML_EXTS else: print("Error (%s): the language (%s) is not supported" % (__FILE__, args.language)) sys.exit(os.EX_SOFTWARE) # create a list to hold all of the paths to each file # codeFiles = [] # find all the files by traversing the specified directory # for root, dirs, files in os.walk(args.directory): for file in files: if file.endswith(tuple(extensions)): # when someone is currently working on a program, there is a # placeholder that always starts with ".#" which creates # a problem for the perl script # if file.startswith('.#'): break else: filepath = os.path.join(root, file) codeFiles.append(filepath) if not codeFiles: print("Error (%s): error opening the directory (%s)" % (__FILE__, args.directory)) sys.exit(os.EX_SOFTWARE) # run the perl script and suppresses output # pipe = subprocess.Popen(["perl", f"{os.environ['PLAGCHECK']}/src/moss.pl", ARG_ABRV_LANGUAGE, args.language, ARG_ABRV_DIR, *codeFiles], stdout=subprocess.PIPE) # collect the output of the perl script # programOutput = pipe.communicate()[0] # split the output by newline and grab the link from the last line # lines = programOutput.splitlines() final_link = str(lines[-1]) # The HTML tag adds two characters at the beginning, and one at the end, # so I had to strip them off # final_link = final_link[2:] final_link = final_link[:-1] # Check for one of the error codes from the website checker # if nedc_web_checker(final_link) is False: print("Error (%s): %s (%s)" % (__FILE__, "could not access the site", final_link)) sys.exit(os.EX_SOFTWARE) # Send the generated link to the web scraper, so that it can be used to # display the data in a user-friendly manner # scr.nedc_scraper(final_link, args.threshold) # exit gracefully # return True # # end of code # begin gracefully # if __name__ == '__main__': main(sys.argv[0:]) # # end of file