132 lines
4.1 KiB
Python
132 lines
4.1 KiB
Python
import sys
|
|
from pathlib import Path
|
|
import argparse
|
|
import re
|
|
import fnmatch
|
|
|
|
|
|
def is_five_letter_word(token: str) -> bool:
|
|
token = token.strip()
|
|
return len(token) == 5 and token.isalpha()
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description="Filter 5-letter words from words.txt")
|
|
parser.add_argument(
|
|
"--only",
|
|
metavar="LETTERS",
|
|
help="Only include words composed solely of these letters (case-insensitive)",
|
|
)
|
|
parser.add_argument(
|
|
"--mask",
|
|
metavar="PATTERN",
|
|
help=(
|
|
"Position mask for 5 letters where '_' or '?' are wildcards, "
|
|
"and letters fix positions (e.g., _A_E_)."
|
|
),
|
|
)
|
|
pattern_group = parser.add_mutually_exclusive_group()
|
|
pattern_group.add_argument(
|
|
"--regex",
|
|
metavar="REGEX",
|
|
help="Python regular expression to match words (defaults to case-insensitive)",
|
|
)
|
|
pattern_group.add_argument(
|
|
"--glob",
|
|
metavar="GLOB",
|
|
help="Wildcard pattern (e.g., *A*E*) to match words (case-insensitive by default)",
|
|
)
|
|
parser.add_argument(
|
|
"--case-sensitive",
|
|
action="store_true",
|
|
help="Make regex/glob matching case-sensitive (default is case-insensitive)",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
only_set = set(args.only.lower()) if args.only else None
|
|
|
|
# Prepare matchers
|
|
# Validate and normalize mask
|
|
mask_positions = None
|
|
if args.mask is not None:
|
|
mask = args.mask.strip()
|
|
if len(mask) != 5:
|
|
print("Invalid --mask: must be exactly 5 characters.", file=sys.stderr)
|
|
return 2
|
|
mask_positions = []
|
|
for ch in mask:
|
|
if ch in ("_", "?"):
|
|
mask_positions.append(None)
|
|
elif ch.isalpha():
|
|
mask_positions.append(ch.lower())
|
|
else:
|
|
print(
|
|
"Invalid --mask: use letters or '_'/'?' as wildcards.",
|
|
file=sys.stderr,
|
|
)
|
|
return 2
|
|
|
|
regex = None
|
|
if args.regex:
|
|
flags = 0 if args.case_sensitive else re.IGNORECASE
|
|
try:
|
|
regex = re.compile(args.regex, flags)
|
|
except re.error as e:
|
|
print(f"Invalid regex: {e}", file=sys.stderr)
|
|
return 2
|
|
|
|
glob_pattern = None
|
|
if args.glob:
|
|
glob_pattern = args.glob if args.case_sensitive else args.glob.lower()
|
|
|
|
def matches(word: str) -> bool:
|
|
if only_set is not None and not (set(word.lower()) <= only_set):
|
|
return False
|
|
if mask_positions is not None:
|
|
wl = word.lower()
|
|
for i, ch in enumerate(mask_positions):
|
|
if ch is not None and wl[i] != ch:
|
|
return False
|
|
if regex is not None and regex.search(word) is None:
|
|
return False
|
|
if glob_pattern is not None:
|
|
if args.case_sensitive:
|
|
if not fnmatch.fnmatchcase(word, glob_pattern):
|
|
return False
|
|
else:
|
|
if not fnmatch.fnmatchcase(word.lower(), glob_pattern):
|
|
return False
|
|
return True
|
|
|
|
words_path = Path("words.txt")
|
|
if not words_path.exists():
|
|
print("Error: words.txt not found in current directory.", file=sys.stderr)
|
|
return 1
|
|
|
|
# Read and stream 5-letter words to stdout
|
|
try:
|
|
with words_path.open("r", encoding="utf-8", errors="ignore") as f:
|
|
for line in f:
|
|
word = line.strip()
|
|
if not word:
|
|
continue
|
|
# If lines may contain multiple tokens, split defensively
|
|
tokens = word.split()
|
|
if len(tokens) == 1:
|
|
tok = tokens[0]
|
|
if is_five_letter_word(tok) and matches(tok):
|
|
print(tok)
|
|
else:
|
|
for tok in tokens:
|
|
if is_five_letter_word(tok) and matches(tok):
|
|
print(tok)
|
|
except OSError as e:
|
|
print(f"Error reading words.txt: {e}", file=sys.stderr)
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|