Source code for mtpy.utils.edi_folders

# -*- coding: utf-8 -*-
# ! /usr/bin/env python
"""
Description:
    Find path to all the directories which contain a given type of files: .edi, .py .jpg, .pdf

How to Run:
    python mtpy/utils/edi_folders.py  . EDI
    python mtpy/utils/edi_folders.py  /e/Data/ EDI 2
    python mtpy/utils/edi_folders.py  /e/Data/ PY

CreationDate:   26/11/2017
Developer:      fei.zhang@ga.gov.au

Revision History:
    LastUpdate:  26/11/2017   FZ started the first version

"""

import os
import sys


[docs] def recursive_glob(dirname, ext="*.edi"): """Under the dirname recursively find all files with extension ext. Return a list of the full-path to the types of files of interest. This function is useful to handle a nested directories of EDI files. :param dirname: A single dir OR a list of dirs. :param ext: Eg, ".edi", ".xml", defaults to "*.edi". :return: A list of path2files. """ import fnmatch if isinstance(dirname, list): # the input argument is a list of directory filelist = [] for adir in dirname: filelist.extend(recursive_glob(adir)) return filelist else: # input variable is a single dir matches = [] for root, dirnames, filenames in os.walk(dirname): for filename in fnmatch.filter(filenames, ext): matches.append(os.path.join(root, filename)) return matches
[docs] class EdiFolders(object): def __init__(self, startDir, edifiles_threshold=1, filetype=".edi"): """Init function.""" self.startDir = startDir # the top level dir to be searched self.edifiles_threshold = ( edifiles_threshold # at least 1 file is of the specified type. ) self.filetype = filetype self.folders_of_interest = [] # initial empty list
[docs] def find_edi_folders(self, aStartDir): """Find directories containing the file of type self.filetype. :param aStartDir: The directory to start from. :return: A list of full path to folders of interest. """ for dirName, subdirList, fileList in os.walk(aStartDir): edi_files_count = 0 for fname in fileList: if fname.lower().endswith(self.filetype): # if fname.endswith('.py'): edi_files_count = edi_files_count + 1 else: pass if edi_files_count >= self.edifiles_threshold: print( ( "Found directory: %s ==> %s *.%s files" % (dirName, edi_files_count, self.filetype) ) ) self.folders_of_interest.append(dirName) # If it's a folder then recursive call for asubdir in subdirList: self.find_edi_folders(os.path.join(dirName, asubdir)) return self.folders_of_interest
[docs] def get_all_edi_files(self): """Get all edi files.""" return recursive_glob(self.startDir)
######################################################## if __name__ == "__main__": if len(sys.argv) <= 1: print( ( "USAGE: %s %s [%s] [%s]" % (sys.argv[0], "start_path_dir", "ftype", "edi_threshold") ) ) sys.exit(1) else: root_dir = sys.argv[1] if len(sys.argv) > 2: ftype = sys.argv[2].lower() else: ftype = "edi" if len(sys.argv) > 3: edifiles_th = int(sys.argv[3]) else: edifiles_th = 1 sObj = EdiFolders(root_dir, edifiles_threshold=edifiles_th, filetype=ftype) edi_dirs_list = sObj.find_edi_folders(root_dir) print(("Number of interesting folders found =", len(edi_dirs_list))) print(edi_dirs_list)