#!/usr/bin/env python3 import argparse import os import sqlite3 import sys from datetime import datetime # Constants DATABASE_PATH = os.path.expanduser("~/screenshot_ocr.db") def parse_arguments(): """Parse command line arguments.""" parser = argparse.ArgumentParser( description="Search for text in OCR results from screenshots." ) parser.add_argument( "query", help="Text to search for in the OCR results", nargs="*" ) parser.add_argument( "-i", "--case-insensitive", action="store_true", help="Perform case-insensitive search", ) parser.add_argument( "-e", "--exact", action="store_true", help="Match exact text only (default is substring match)", ) parser.add_argument( "-l", "--limit", type=int, default=25, help="Limit number of results (default: 25, 0 for no limit)", ) parser.add_argument( "-v", "--verbose", action="store_true", help="Show more details including file path and date", ) parser.add_argument( "-c", "--count", action="store_true", help="Only show the count of matches" ) parser.add_argument( "--list-all", action="store_true", help="List all files in the database" ) parser.add_argument( "-d", "--date", action="store_true", help="Sort results by date (newest first)" ) parser.add_argument( "-o", "--open", action="store_true", help="Open the first matching file (requires xdg-open)", ) return parser.parse_args() def check_database(): """Check if the database exists and has the expected structure.""" if not os.path.exists(DATABASE_PATH): print(f"Error: Database not found at {DATABASE_PATH}") print("Run the OCR script first to create and populate the database.") sys.exit(1) def search_ocr_database(args): """Search the OCR database for the given query.""" conn = sqlite3.connect(DATABASE_PATH) cursor = conn.cursor() # Join all query words query_text = " ".join(args.query) if args.list_all: cursor.execute( """ SELECT filename, ocr_date, created_date, length(ocr_text), full_path FROM ocr_results ORDER BY filename ASC """ ) results = cursor.fetchall() conn.close() return results # Construct the SQL query based on arguments if args.exact: if args.case_insensitive: sql_query = "SELECT * FROM ocr_results WHERE LOWER(ocr_text) = LOWER(?)" else: sql_query = "SELECT * FROM ocr_results WHERE ocr_text = ?" else: if args.case_insensitive: sql_query = "SELECT * FROM ocr_results WHERE LOWER(ocr_text) LIKE LOWER(?)" query_text = f"%{query_text}%" else: sql_query = "SELECT * FROM ocr_results WHERE ocr_text LIKE ?" query_text = f"%{query_text}%" # Add ordering if args.date: sql_query += " ORDER BY created_date DESC" else: sql_query += " ORDER BY filename ASC" # Add limit if args.limit > 0: sql_query += f" LIMIT {args.limit}" # Execute the query cursor.execute(sql_query, (query_text,)) results = cursor.fetchall() # Close the database connection conn.close() return results def display_results(results, args): """Display the search results.""" if not results or len(results) == 0: print("No matches found.") return if args.count: print(f"Found {len(results)} matches.") return print( f"Found {len(results)} matches{' (showing first ' + str(args.limit) + ')' if args.limit > 0 and len(results) == args.limit else ''}:" ) print("-" * 80) # Handle list_all format differently if args.list_all: for filename, ocr_date, created_date, text_length, full_path in results: created_str = ( created_date.split("T")[0] if "T" in created_date else created_date ) if args.verbose: print(f"File: {filename}") print(f"Path: {full_path}") print(f"Created: {created_date}") print(f"OCR Date: {ocr_date}") print(f"Text Length: {text_length} chars") print("-" * 40) else: print(f"{filename} | {created_str} | {text_length} chars") return # Display regular search results for row in results: id, filename, full_path, ocr_text, file_size, created_date, ocr_date = row # Display the result if args.verbose: print(f"File: {filename}") print(f"Path: {full_path}") print(f"Created: {created_date}") print(f"OCR Date: {ocr_date}") print(f"Size: {file_size} bytes") print("Text:") print("-" * 40) print(ocr_text) print("-" * 80) else: print(f"[{filename}]") # Show just a snippet of text around the match if not exact if not args.exact and args.query: search_term = " ".join(args.query).lower() text_lower = ocr_text.lower() pos = text_lower.find(search_term) if pos >= 0: start = max(0, pos - 40) end = min(len(ocr_text), pos + len(search_term) + 40) # Find word boundaries if start > 0: while start > 0 and ocr_text[start].isalnum(): start -= 1 if end < len(ocr_text): while end < len(ocr_text) and ocr_text[end].isalnum(): end += 1 snippet = ocr_text[start:end] if start > 0: snippet = "..." + snippet if end < len(ocr_text): snippet = snippet + "..." print(snippet) else: # If can't find the term (which is odd), just show first bit print(ocr_text[:80] + "..." if len(ocr_text) > 80 else ocr_text) else: # Just show the first bit of text print(ocr_text[:80] + "..." if len(ocr_text) > 80 else ocr_text) print("-" * 40) def open_file(path): """Open a file using the default application.""" try: import subprocess subprocess.Popen( ["xdg-open", path], stdout=subprocess.PIPE, stderr=subprocess.PIPE ) print(f"Opening: {path}") return True except Exception as e: print(f"Error opening file: {e}") return False def main(): """Main function to run the OCR search.""" args = parse_arguments() check_database() if not args.list_all and len(args.query) == 0: print("Error: Please provide a search query or use --list-all") sys.exit(1) results = search_ocr_database(args) # Open the first matching file if requested if args.open and results and len(results) > 0: if args.list_all: open_file(results[0][4]) # full_path is at index 4 for list_all else: open_file(results[0][2]) # full_path is at index 2 for regular search # Always display results unless we're only opening if not (args.open and not args.verbose): display_results(results, args) if __name__ == "__main__": main()