import sys from pathlib import Path import argparse import re import fnmatch def is_five_letter_word(token: str) -> bool: token = token.strip() return len(token) == 5 and token.isalpha() def main() -> int: parser = argparse.ArgumentParser(description="Filter 5-letter words from words.txt") parser.add_argument( "--only", metavar="LETTERS", help="Only include words composed solely of these letters (case-insensitive)", ) parser.add_argument( "--exclude", metavar="LETTERS", help="Exclude words containing any of these letters (case-insensitive)", ) parser.add_argument( "--mask", metavar="PATTERN", help=( "Position mask for 5 letters where '_' or '?' are wildcards, " "and letters fix positions (e.g., _A_E_)." ), ) pattern_group = parser.add_mutually_exclusive_group() pattern_group.add_argument( "--regex", metavar="REGEX", help="Python regular expression to match words (defaults to case-insensitive)", ) pattern_group.add_argument( "--glob", metavar="GLOB", help="Wildcard pattern (e.g., *A*E*) to match words (case-insensitive by default)", ) parser.add_argument( "--case-sensitive", action="store_true", help="Make regex/glob matching case-sensitive (default is case-insensitive)", ) args = parser.parse_args() only_set = set(args.only.lower()) if args.only else None exclude_set = set(args.exclude.lower()) if args.exclude else None # Prepare matchers # Validate and normalize mask mask_positions = None if args.mask is not None: mask = args.mask.strip() if len(mask) != 5: print("Invalid --mask: must be exactly 5 characters.", file=sys.stderr) return 2 mask_positions = [] for ch in mask: if ch in ("_", "?"): mask_positions.append(None) elif ch.isalpha(): mask_positions.append(ch.lower()) else: print( "Invalid --mask: use letters or '_'/'?' as wildcards.", file=sys.stderr, ) return 2 regex = None if args.regex: flags = 0 if args.case_sensitive else re.IGNORECASE try: regex = re.compile(args.regex, flags) except re.error as e: print(f"Invalid regex: {e}", file=sys.stderr) return 2 glob_pattern = None if args.glob: glob_pattern = args.glob if args.case_sensitive else args.glob.lower() def matches(word: str) -> bool: if only_set is not None and not (set(word.lower()) <= only_set): return False if exclude_set is not None and (set(word.lower()) & exclude_set): return False if mask_positions is not None: wl = word.lower() for i, ch in enumerate(mask_positions): if ch is not None and wl[i] != ch: return False if regex is not None and regex.search(word) is None: return False if glob_pattern is not None: if args.case_sensitive: if not fnmatch.fnmatchcase(word, glob_pattern): return False else: if not fnmatch.fnmatchcase(word.lower(), glob_pattern): return False return True words_path = Path("words.txt") if not words_path.exists(): print("Error: words.txt not found in current directory.", file=sys.stderr) return 1 # Read and stream 5-letter words to stdout try: with words_path.open("r", encoding="utf-8", errors="ignore") as f: for line in f: word = line.strip() if not word: continue # If lines may contain multiple tokens, split defensively tokens = word.split() if len(tokens) == 1: tok = tokens[0] if is_five_letter_word(tok) and matches(tok): print(tok) else: for tok in tokens: if is_five_letter_word(tok) and matches(tok): print(tok) except OSError as e: print(f"Error reading words.txt: {e}", file=sys.stderr) return 1 return 0 if __name__ == "__main__": raise SystemExit(main())