Documentation ¶
Overview ¶
Package matches provides types and functions intended to help with collecting and validating file search results against required criteria.
Index ¶
- Constants
- type DuplicateFilesSummary
- type FileChecksumIndex
- func (fi FileChecksumIndex) GenerateCSVHeaderRow() []string
- func (fi FileChecksumIndex) GetDuplicateFilesCount() int
- func (fi FileChecksumIndex) GetTotalFilesCount() int
- func (fi FileChecksumIndex) GetWastedSpace() int64
- func (fi FileChecksumIndex) PrintFileMatches(blankLineBetweenSets bool)
- func (fi FileChecksumIndex) PruneFileChecksumIndex(duplicatesThreshold int)
- func (fi FileChecksumIndex) WriteFileMatchesCSV(filename string, blankLineBetweenSets bool) error
- func (fi FileChecksumIndex) WriteFileMatchesWorkbook(filename string, summary DuplicateFilesSummary) error
- type FileMatch
- type FileMatches
- type FileSizeIndex
- func MergeFileSizeIndexes(fileSizeIndexes ...FileSizeIndex) FileSizeIndex
- func NewFileSizeIndex(recursiveSearch bool, ignoreErrors bool, fileSizeThreshold int64, ...) (FileSizeIndex, error)
- func ProcessPath(recursiveSearch bool, ignoreErrors bool, fileSizeThreshold int64, path string) (FileSizeIndex, error)
Constants ¶
const ( CSVDirectoryColumnHeaderName string = "directory" CSVFileColumnHeaderName string = "file" CSVSizeColumnHeaderName string = "size" CSVSizeInBytesDirectoryColumnHeaderName string = "size_in_bytes" CSVChecksumColumnHeaderName string = "checksum" CSVRemoveFileColumnHeaderName string = "remove_file" )
CSV header names referenced from both inside and outside of the package
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type DuplicateFilesSummary ¶
type DuplicateFilesSummary struct { TotalEvaluatedFiles int // Number of sets based on identical file size FileSizeMatchSets int // Number of sets based on identical file hash FileHashMatchSets int // Identical files count based on file size FileSizeMatches int // Identical files count based on file hash FileHashMatches int // Wasted space for duplicate file sets in bytes WastedSpace int64 // DuplicateCount represents the number of duplicated files DuplicateCount int }
DuplicateFilesSummary is a collection of the metadata calculated from evaluating duplicate files. This metadata is displayed via a variety of methods, notably just prior to application exit via console and the first sheet in the generated workbook.
func (DuplicateFilesSummary) PrintSummary ¶
func (dfs DuplicateFilesSummary) PrintSummary()
PrintSummary is used to generate a basic summary report of file metadata collected while evaluating files for potential duplicates.
type FileChecksumIndex ¶
type FileChecksumIndex map[checksums.SHA256Checksum]FileMatches
FileChecksumIndex is an index of files based on their checksums (SHA256 hash) to FileMatches. This data structure is created from a pruned FileSizeIndex. After additional pruning to remove any single-entry FileMatches "values", this data structure represents confirmed duplicate files.
func NewFileChecksumIndex ¶
func NewFileChecksumIndex(fi FileSizeIndex) FileChecksumIndex
NewFileChecksumIndex takes in a FileSizeIndex, generates checksums for FileMatch objects and then returns a FileChecksumIndex and an error, if one was encountered.
func (FileChecksumIndex) GenerateCSVHeaderRow ¶
func (fi FileChecksumIndex) GenerateCSVHeaderRow() []string
GenerateCSVHeaderRow returns a string slice for use with a CSV Writer as a header row.
func (FileChecksumIndex) GetDuplicateFilesCount ¶
func (fi FileChecksumIndex) GetDuplicateFilesCount() int
GetDuplicateFilesCount returns the number of non-original files in a checksum-based file index
func (FileChecksumIndex) GetTotalFilesCount ¶
func (fi FileChecksumIndex) GetTotalFilesCount() int
GetTotalFilesCount returns the total number of files in a checksum-based file index
func (FileChecksumIndex) GetWastedSpace ¶
func (fi FileChecksumIndex) GetWastedSpace() int64
GetWastedSpace calculates the wasted space from all confirmed duplicate files
func (FileChecksumIndex) PrintFileMatches ¶
func (fi FileChecksumIndex) PrintFileMatches(blankLineBetweenSets bool)
PrintFileMatches prints duplicate files recorded in a FileChecksumIndex to stdout for development or troubleshooting purposes. See also WriteFileMatches for the expected production output method.
func (FileChecksumIndex) PruneFileChecksumIndex ¶
func (fi FileChecksumIndex) PruneFileChecksumIndex(duplicatesThreshold int)
PruneFileChecksumIndex removes map entries with single-entry slices which do not reflect duplicate files.
func (FileChecksumIndex) WriteFileMatchesCSV ¶
func (fi FileChecksumIndex) WriteFileMatchesCSV(filename string, blankLineBetweenSets bool) error
WriteFileMatchesCSV writes duplicate files recorded in a FileChecksumIndex to the specified CSV file.
func (FileChecksumIndex) WriteFileMatchesWorkbook ¶
func (fi FileChecksumIndex) WriteFileMatchesWorkbook(filename string, summary DuplicateFilesSummary) error
WriteFileMatchesWorkbook is a prototype method to generate an Excel workbook from duplicate file details
type FileMatch ¶
type FileMatch struct { // File metadata used in various calculations os.FileInfo // The full path to the file FullPath string // Directory containing the file; analogue to Name() method ParentDirectory string // Checksum calculated for files meeting the duplicates threshold Checksum checksums.SHA256Checksum }
FileMatch represents a superset of statistics (including os.FileInfo) for a file matched by provided search criteria. This allows us to record the original full path while also recording file metadata used in later calculations.
func (FileMatch) GenerateCSVDataRow ¶
GenerateCSVDataRow returns a string slice for use with a CSV Writer as a data (non-header) row
type FileMatches ¶
type FileMatches []FileMatch
FileMatches is a slice of FileMatch objects that represents the search results based on user-specified criteria.
func (FileMatches) GenerateEmptyCSVDataRow ¶
func (fm FileMatches) GenerateEmptyCSVDataRow() []string
GenerateEmptyCSVDataRow returns a string slice for use with a CSV Writer as a empty data (non-header) row. This is used as a separator between sets of duplicate files.
func (FileMatches) SortByModTimeAsc ¶
func (fm FileMatches) SortByModTimeAsc()
SortByModTimeAsc sorts slice of FileMatch objects in ascending order with older values listed first.
func (FileMatches) SortByModTimeDesc ¶
func (fm FileMatches) SortByModTimeDesc()
SortByModTimeDesc sorts slice of FileMatch objects in descending order with newer values listed first.
func (FileMatches) TotalFileSize ¶
func (fm FileMatches) TotalFileSize() int64
TotalFileSize returns the cumulative size of all files in the slice in bytes
func (FileMatches) TotalFileSizeHR ¶
func (fm FileMatches) TotalFileSizeHR() string
TotalFileSizeHR returns a human-readable string of the cumulative size of all files in the slice of bytes
func (FileMatches) UpdateChecksums ¶
func (fm FileMatches) UpdateChecksums(ignoreErrors bool) error
UpdateChecksums generates checksum values for each file tracked by a FileMatch entry and updates the associated FileMatch.Checksum field value
type FileSizeIndex ¶
type FileSizeIndex map[int64]FileMatches
FileSizeIndex is an index of files based on their size (in bytes) to FileMatches. This data structure represents search results for duplicate files based on user-specified criteria before we confirm that multiple files of the same size are in fact duplicates. In many cases (e.g., a multi-part archive), they may not be.
func MergeFileSizeIndexes ¶
func MergeFileSizeIndexes(fileSizeIndexes ...FileSizeIndex) FileSizeIndex
MergeFileSizeIndexes receives one or more FileSizeIndex objects and merges entries between these objects, returning a combined FileSizeIndex object
func NewFileSizeIndex ¶
func NewFileSizeIndex(recursiveSearch bool, ignoreErrors bool, fileSizeThreshold int64, dirs ...string) (FileSizeIndex, error)
NewFileSizeIndex optionally recursively processes a provided path and returns a slice of FileMatch objects
func ProcessPath ¶
func ProcessPath(recursiveSearch bool, ignoreErrors bool, fileSizeThreshold int64, path string) (FileSizeIndex, error)
ProcessPath optionally recursively processes a provided path and returns a slice of FileMatch objects
func (FileSizeIndex) GetTotalFilesCount ¶
func (fi FileSizeIndex) GetTotalFilesCount() int
GetTotalFilesCount returns the total number of files in a checksum-based file index
func (FileSizeIndex) PruneFileSizeIndex ¶
func (fi FileSizeIndex) PruneFileSizeIndex(duplicatesThreshold int)
PruneFileSizeIndex removes map entries with single-entry slices which do not reflect potential duplicate files (i.e., duplicate file size != duplicate files)
func (FileSizeIndex) UpdateChecksums ¶
func (fi FileSizeIndex) UpdateChecksums(ignoreErrors bool) error
UpdateChecksums acts as a wrapper around the UpdateChecksums method for FileMatches objects