Files
2026-06-19 16:44:54 -04:00

248 lines
7.6 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import os
import sqlite3
import sys
from datetime import datetime
# Constants
DATABASE_PATH = os.path.join(os.environ.get("XDG_DATA_HOME", os.path.expanduser("~/.local/share")), "screenshot-gallery", "screenshot_ocr.db")
def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Search for text in OCR results from screenshots."
)
parser.add_argument(
"query", help="Text to search for in the OCR results", nargs="*"
)
parser.add_argument(
"-i",
"--case-insensitive",
action="store_true",
help="Perform case-insensitive search",
)
parser.add_argument(
"-e",
"--exact",
action="store_true",
help="Match exact text only (default is substring match)",
)
parser.add_argument(
"-l",
"--limit",
type=int,
default=25,
help="Limit number of results (default: 25, 0 for no limit)",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Show more details including file path and date",
)
parser.add_argument(
"-c", "--count", action="store_true", help="Only show the count of matches"
)
parser.add_argument(
"--list-all", action="store_true", help="List all files in the database"
)
parser.add_argument(
"-d", "--date", action="store_true", help="Sort results by date (newest first)"
)
parser.add_argument(
"-o",
"--open",
action="store_true",
help="Open the first matching file (requires xdg-open)",
)
return parser.parse_args()
def check_database():
"""Check if the database exists and has the expected structure."""
if not os.path.exists(DATABASE_PATH):
print(f"Error: Database not found at {DATABASE_PATH}")
print("Run the OCR script first to create and populate the database.")
sys.exit(1)
def search_ocr_database(args):
"""Search the OCR database for the given query."""
conn = sqlite3.connect(DATABASE_PATH)
cursor = conn.cursor()
# Join all query words
query_text = " ".join(args.query)
if args.list_all:
cursor.execute(
"""
SELECT filename, ocr_date, created_date, length(ocr_text), full_path
FROM ocr_results
ORDER BY filename ASC
"""
)
results = cursor.fetchall()
conn.close()
return results
# Construct the SQL query based on arguments
if args.exact:
if args.case_insensitive:
sql_query = "SELECT * FROM ocr_results WHERE LOWER(ocr_text) = LOWER(?)"
else:
sql_query = "SELECT * FROM ocr_results WHERE ocr_text = ?"
else:
if args.case_insensitive:
sql_query = "SELECT * FROM ocr_results WHERE LOWER(ocr_text) LIKE LOWER(?)"
query_text = f"%{query_text}%"
else:
sql_query = "SELECT * FROM ocr_results WHERE ocr_text LIKE ?"
query_text = f"%{query_text}%"
# Add ordering
if args.date:
sql_query += " ORDER BY created_date DESC"
else:
sql_query += " ORDER BY filename ASC"
# Add limit
if args.limit > 0:
sql_query += f" LIMIT {args.limit}"
# Execute the query
cursor.execute(sql_query, (query_text,))
results = cursor.fetchall()
# Close the database connection
conn.close()
return results
def display_results(results, args):
"""Display the search results."""
if not results or len(results) == 0:
print("No matches found.")
return
if args.count:
print(f"Found {len(results)} matches.")
return
print(
f"Found {len(results)} matches{' (showing first ' + str(args.limit) + ')' if args.limit > 0 and len(results) == args.limit else ''}:"
)
print("-" * 80)
# Handle list_all format differently
if args.list_all:
for filename, ocr_date, created_date, text_length, full_path in results:
created_str = (
created_date.split("T")[0] if "T" in created_date else created_date
)
if args.verbose:
print(f"File: {filename}")
print(f"Path: {full_path}")
print(f"Created: {created_date}")
print(f"OCR Date: {ocr_date}")
print(f"Text Length: {text_length} chars")
print("-" * 40)
else:
print(f"{filename} | {created_str} | {text_length} chars")
return
# Display regular search results
for row in results:
id, filename, full_path, ocr_text, file_size, created_date, ocr_date = row
# Display the result
if args.verbose:
print(f"File: {filename}")
print(f"Path: {full_path}")
print(f"Created: {created_date}")
print(f"OCR Date: {ocr_date}")
print(f"Size: {file_size} bytes")
print("Text:")
print("-" * 40)
print(ocr_text)
print("-" * 80)
else:
print(f"[{filename}]")
# Show just a snippet of text around the match if not exact
if not args.exact and args.query:
search_term = " ".join(args.query).lower()
text_lower = ocr_text.lower()
pos = text_lower.find(search_term)
if pos >= 0:
start = max(0, pos - 40)
end = min(len(ocr_text), pos + len(search_term) + 40)
# Find word boundaries
if start > 0:
while start > 0 and ocr_text[start].isalnum():
start -= 1
if end < len(ocr_text):
while end < len(ocr_text) and ocr_text[end].isalnum():
end += 1
snippet = ocr_text[start:end]
if start > 0:
snippet = "..." + snippet
if end < len(ocr_text):
snippet = snippet + "..."
print(snippet)
else:
# If can't find the term (which is odd), just show first bit
print(ocr_text[:80] + "..." if len(ocr_text) > 80 else ocr_text)
else:
# Just show the first bit of text
print(ocr_text[:80] + "..." if len(ocr_text) > 80 else ocr_text)
print("-" * 40)
def open_file(path):
"""Open a file using the default application."""
try:
import subprocess
subprocess.Popen(
["xdg-open", path], stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
print(f"Opening: {path}")
return True
except Exception as e:
print(f"Error opening file: {e}")
return False
def main():
"""Main function to run the OCR search."""
args = parse_arguments()
check_database()
if not args.list_all and len(args.query) == 0:
print("Error: Please provide a search query or use --list-all")
sys.exit(1)
results = search_ocr_database(args)
# Open the first matching file if requested
if args.open and results and len(results) > 0:
if args.list_all:
open_file(results[0][4]) # full_path is at index 4 for list_all
else:
open_file(results[0][2]) # full_path is at index 2 for regular search
# Always display results unless we're only opening
if not (args.open and not args.verbose):
display_results(results, args)
if __name__ == "__main__":
main()