248 lines
7.5 KiB
Python
Executable File
248 lines
7.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
from datetime import datetime
|
|
|
|
# Constants
|
|
DATABASE_PATH = os.path.expanduser("~/screenshot_ocr.db")
|
|
|
|
|
|
def parse_arguments():
|
|
"""Parse command line arguments."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Search for text in OCR results from screenshots."
|
|
)
|
|
parser.add_argument(
|
|
"query", help="Text to search for in the OCR results", nargs="*"
|
|
)
|
|
parser.add_argument(
|
|
"-i",
|
|
"--case-insensitive",
|
|
action="store_true",
|
|
help="Perform case-insensitive search",
|
|
)
|
|
parser.add_argument(
|
|
"-e",
|
|
"--exact",
|
|
action="store_true",
|
|
help="Match exact text only (default is substring match)",
|
|
)
|
|
parser.add_argument(
|
|
"-l",
|
|
"--limit",
|
|
type=int,
|
|
default=25,
|
|
help="Limit number of results (default: 25, 0 for no limit)",
|
|
)
|
|
parser.add_argument(
|
|
"-v",
|
|
"--verbose",
|
|
action="store_true",
|
|
help="Show more details including file path and date",
|
|
)
|
|
parser.add_argument(
|
|
"-c", "--count", action="store_true", help="Only show the count of matches"
|
|
)
|
|
parser.add_argument(
|
|
"--list-all", action="store_true", help="List all files in the database"
|
|
)
|
|
parser.add_argument(
|
|
"-d", "--date", action="store_true", help="Sort results by date (newest first)"
|
|
)
|
|
parser.add_argument(
|
|
"-o",
|
|
"--open",
|
|
action="store_true",
|
|
help="Open the first matching file (requires xdg-open)",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def check_database():
|
|
"""Check if the database exists and has the expected structure."""
|
|
if not os.path.exists(DATABASE_PATH):
|
|
print(f"Error: Database not found at {DATABASE_PATH}")
|
|
print("Run the OCR script first to create and populate the database.")
|
|
sys.exit(1)
|
|
|
|
|
|
def search_ocr_database(args):
|
|
"""Search the OCR database for the given query."""
|
|
conn = sqlite3.connect(DATABASE_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
# Join all query words
|
|
query_text = " ".join(args.query)
|
|
|
|
if args.list_all:
|
|
cursor.execute(
|
|
"""
|
|
SELECT filename, ocr_date, created_date, length(ocr_text), full_path
|
|
FROM ocr_results
|
|
ORDER BY filename ASC
|
|
"""
|
|
)
|
|
|
|
results = cursor.fetchall()
|
|
conn.close()
|
|
return results
|
|
|
|
# Construct the SQL query based on arguments
|
|
if args.exact:
|
|
if args.case_insensitive:
|
|
sql_query = "SELECT * FROM ocr_results WHERE LOWER(ocr_text) = LOWER(?)"
|
|
else:
|
|
sql_query = "SELECT * FROM ocr_results WHERE ocr_text = ?"
|
|
else:
|
|
if args.case_insensitive:
|
|
sql_query = "SELECT * FROM ocr_results WHERE LOWER(ocr_text) LIKE LOWER(?)"
|
|
query_text = f"%{query_text}%"
|
|
else:
|
|
sql_query = "SELECT * FROM ocr_results WHERE ocr_text LIKE ?"
|
|
query_text = f"%{query_text}%"
|
|
|
|
# Add ordering
|
|
if args.date:
|
|
sql_query += " ORDER BY created_date DESC"
|
|
else:
|
|
sql_query += " ORDER BY filename ASC"
|
|
|
|
# Add limit
|
|
if args.limit > 0:
|
|
sql_query += f" LIMIT {args.limit}"
|
|
|
|
# Execute the query
|
|
cursor.execute(sql_query, (query_text,))
|
|
results = cursor.fetchall()
|
|
|
|
# Close the database connection
|
|
conn.close()
|
|
|
|
return results
|
|
|
|
|
|
def display_results(results, args):
|
|
"""Display the search results."""
|
|
if not results or len(results) == 0:
|
|
print("No matches found.")
|
|
return
|
|
|
|
if args.count:
|
|
print(f"Found {len(results)} matches.")
|
|
return
|
|
|
|
print(
|
|
f"Found {len(results)} matches{' (showing first ' + str(args.limit) + ')' if args.limit > 0 and len(results) == args.limit else ''}:"
|
|
)
|
|
print("-" * 80)
|
|
|
|
# Handle list_all format differently
|
|
if args.list_all:
|
|
for filename, ocr_date, created_date, text_length, full_path in results:
|
|
created_str = (
|
|
created_date.split("T")[0] if "T" in created_date else created_date
|
|
)
|
|
if args.verbose:
|
|
print(f"File: {filename}")
|
|
print(f"Path: {full_path}")
|
|
print(f"Created: {created_date}")
|
|
print(f"OCR Date: {ocr_date}")
|
|
print(f"Text Length: {text_length} chars")
|
|
print("-" * 40)
|
|
else:
|
|
print(f"{filename} | {created_str} | {text_length} chars")
|
|
return
|
|
|
|
# Display regular search results
|
|
for row in results:
|
|
id, filename, full_path, ocr_text, file_size, created_date, ocr_date = row
|
|
|
|
# Display the result
|
|
if args.verbose:
|
|
print(f"File: {filename}")
|
|
print(f"Path: {full_path}")
|
|
print(f"Created: {created_date}")
|
|
print(f"OCR Date: {ocr_date}")
|
|
print(f"Size: {file_size} bytes")
|
|
print("Text:")
|
|
print("-" * 40)
|
|
print(ocr_text)
|
|
print("-" * 80)
|
|
else:
|
|
print(f"[{filename}]")
|
|
# Show just a snippet of text around the match if not exact
|
|
if not args.exact and args.query:
|
|
search_term = " ".join(args.query).lower()
|
|
text_lower = ocr_text.lower()
|
|
pos = text_lower.find(search_term)
|
|
if pos >= 0:
|
|
start = max(0, pos - 40)
|
|
end = min(len(ocr_text), pos + len(search_term) + 40)
|
|
# Find word boundaries
|
|
if start > 0:
|
|
while start > 0 and ocr_text[start].isalnum():
|
|
start -= 1
|
|
if end < len(ocr_text):
|
|
while end < len(ocr_text) and ocr_text[end].isalnum():
|
|
end += 1
|
|
|
|
snippet = ocr_text[start:end]
|
|
if start > 0:
|
|
snippet = "..." + snippet
|
|
if end < len(ocr_text):
|
|
snippet = snippet + "..."
|
|
|
|
print(snippet)
|
|
else:
|
|
# If can't find the term (which is odd), just show first bit
|
|
print(ocr_text[:80] + "..." if len(ocr_text) > 80 else ocr_text)
|
|
else:
|
|
# Just show the first bit of text
|
|
print(ocr_text[:80] + "..." if len(ocr_text) > 80 else ocr_text)
|
|
print("-" * 40)
|
|
|
|
|
|
def open_file(path):
|
|
"""Open a file using the default application."""
|
|
try:
|
|
import subprocess
|
|
|
|
subprocess.Popen(
|
|
["xdg-open", path], stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
|
)
|
|
print(f"Opening: {path}")
|
|
return True
|
|
except Exception as e:
|
|
print(f"Error opening file: {e}")
|
|
return False
|
|
|
|
|
|
def main():
|
|
"""Main function to run the OCR search."""
|
|
args = parse_arguments()
|
|
check_database()
|
|
|
|
if not args.list_all and len(args.query) == 0:
|
|
print("Error: Please provide a search query or use --list-all")
|
|
sys.exit(1)
|
|
|
|
results = search_ocr_database(args)
|
|
|
|
# Open the first matching file if requested
|
|
if args.open and results and len(results) > 0:
|
|
if args.list_all:
|
|
open_file(results[0][4]) # full_path is at index 4 for list_all
|
|
else:
|
|
open_file(results[0][2]) # full_path is at index 2 for regular search
|
|
|
|
# Always display results unless we're only opening
|
|
if not (args.open and not args.verbose):
|
|
display_results(results, args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|