helper scripts
This commit is contained in:
Executable
+247
@@ -0,0 +1,247 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sqlite3
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
# Constants
|
||||
DATABASE_PATH = os.path.expanduser("~/screenshot_ocr.db")
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Search for text in OCR results from screenshots."
|
||||
)
|
||||
parser.add_argument(
|
||||
"query", help="Text to search for in the OCR results", nargs="*"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--case-insensitive",
|
||||
action="store_true",
|
||||
help="Perform case-insensitive search",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--exact",
|
||||
action="store_true",
|
||||
help="Match exact text only (default is substring match)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-l",
|
||||
"--limit",
|
||||
type=int,
|
||||
default=25,
|
||||
help="Limit number of results (default: 25, 0 for no limit)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
help="Show more details including file path and date",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c", "--count", action="store_true", help="Only show the count of matches"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--list-all", action="store_true", help="List all files in the database"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d", "--date", action="store_true", help="Sort results by date (newest first)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--open",
|
||||
action="store_true",
|
||||
help="Open the first matching file (requires xdg-open)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def check_database():
|
||||
"""Check if the database exists and has the expected structure."""
|
||||
if not os.path.exists(DATABASE_PATH):
|
||||
print(f"Error: Database not found at {DATABASE_PATH}")
|
||||
print("Run the OCR script first to create and populate the database.")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def search_ocr_database(args):
|
||||
"""Search the OCR database for the given query."""
|
||||
conn = sqlite3.connect(DATABASE_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Join all query words
|
||||
query_text = " ".join(args.query)
|
||||
|
||||
if args.list_all:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT filename, ocr_date, created_date, length(ocr_text), full_path
|
||||
FROM ocr_results
|
||||
ORDER BY filename ASC
|
||||
"""
|
||||
)
|
||||
|
||||
results = cursor.fetchall()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
# Construct the SQL query based on arguments
|
||||
if args.exact:
|
||||
if args.case_insensitive:
|
||||
sql_query = "SELECT * FROM ocr_results WHERE LOWER(ocr_text) = LOWER(?)"
|
||||
else:
|
||||
sql_query = "SELECT * FROM ocr_results WHERE ocr_text = ?"
|
||||
else:
|
||||
if args.case_insensitive:
|
||||
sql_query = "SELECT * FROM ocr_results WHERE LOWER(ocr_text) LIKE LOWER(?)"
|
||||
query_text = f"%{query_text}%"
|
||||
else:
|
||||
sql_query = "SELECT * FROM ocr_results WHERE ocr_text LIKE ?"
|
||||
query_text = f"%{query_text}%"
|
||||
|
||||
# Add ordering
|
||||
if args.date:
|
||||
sql_query += " ORDER BY created_date DESC"
|
||||
else:
|
||||
sql_query += " ORDER BY filename ASC"
|
||||
|
||||
# Add limit
|
||||
if args.limit > 0:
|
||||
sql_query += f" LIMIT {args.limit}"
|
||||
|
||||
# Execute the query
|
||||
cursor.execute(sql_query, (query_text,))
|
||||
results = cursor.fetchall()
|
||||
|
||||
# Close the database connection
|
||||
conn.close()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def display_results(results, args):
|
||||
"""Display the search results."""
|
||||
if not results or len(results) == 0:
|
||||
print("No matches found.")
|
||||
return
|
||||
|
||||
if args.count:
|
||||
print(f"Found {len(results)} matches.")
|
||||
return
|
||||
|
||||
print(
|
||||
f"Found {len(results)} matches{' (showing first ' + str(args.limit) + ')' if args.limit > 0 and len(results) == args.limit else ''}:"
|
||||
)
|
||||
print("-" * 80)
|
||||
|
||||
# Handle list_all format differently
|
||||
if args.list_all:
|
||||
for filename, ocr_date, created_date, text_length, full_path in results:
|
||||
created_str = (
|
||||
created_date.split("T")[0] if "T" in created_date else created_date
|
||||
)
|
||||
if args.verbose:
|
||||
print(f"File: {filename}")
|
||||
print(f"Path: {full_path}")
|
||||
print(f"Created: {created_date}")
|
||||
print(f"OCR Date: {ocr_date}")
|
||||
print(f"Text Length: {text_length} chars")
|
||||
print("-" * 40)
|
||||
else:
|
||||
print(f"{filename} | {created_str} | {text_length} chars")
|
||||
return
|
||||
|
||||
# Display regular search results
|
||||
for row in results:
|
||||
id, filename, full_path, ocr_text, file_size, created_date, ocr_date = row
|
||||
|
||||
# Display the result
|
||||
if args.verbose:
|
||||
print(f"File: {filename}")
|
||||
print(f"Path: {full_path}")
|
||||
print(f"Created: {created_date}")
|
||||
print(f"OCR Date: {ocr_date}")
|
||||
print(f"Size: {file_size} bytes")
|
||||
print("Text:")
|
||||
print("-" * 40)
|
||||
print(ocr_text)
|
||||
print("-" * 80)
|
||||
else:
|
||||
print(f"[{filename}]")
|
||||
# Show just a snippet of text around the match if not exact
|
||||
if not args.exact and args.query:
|
||||
search_term = " ".join(args.query).lower()
|
||||
text_lower = ocr_text.lower()
|
||||
pos = text_lower.find(search_term)
|
||||
if pos >= 0:
|
||||
start = max(0, pos - 40)
|
||||
end = min(len(ocr_text), pos + len(search_term) + 40)
|
||||
# Find word boundaries
|
||||
if start > 0:
|
||||
while start > 0 and ocr_text[start].isalnum():
|
||||
start -= 1
|
||||
if end < len(ocr_text):
|
||||
while end < len(ocr_text) and ocr_text[end].isalnum():
|
||||
end += 1
|
||||
|
||||
snippet = ocr_text[start:end]
|
||||
if start > 0:
|
||||
snippet = "..." + snippet
|
||||
if end < len(ocr_text):
|
||||
snippet = snippet + "..."
|
||||
|
||||
print(snippet)
|
||||
else:
|
||||
# If can't find the term (which is odd), just show first bit
|
||||
print(ocr_text[:80] + "..." if len(ocr_text) > 80 else ocr_text)
|
||||
else:
|
||||
# Just show the first bit of text
|
||||
print(ocr_text[:80] + "..." if len(ocr_text) > 80 else ocr_text)
|
||||
print("-" * 40)
|
||||
|
||||
|
||||
def open_file(path):
|
||||
"""Open a file using the default application."""
|
||||
try:
|
||||
import subprocess
|
||||
|
||||
subprocess.Popen(
|
||||
["xdg-open", path], stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
print(f"Opening: {path}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error opening file: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to run the OCR search."""
|
||||
args = parse_arguments()
|
||||
check_database()
|
||||
|
||||
if not args.list_all and len(args.query) == 0:
|
||||
print("Error: Please provide a search query or use --list-all")
|
||||
sys.exit(1)
|
||||
|
||||
results = search_ocr_database(args)
|
||||
|
||||
# Open the first matching file if requested
|
||||
if args.open and results and len(results) > 0:
|
||||
if args.list_all:
|
||||
open_file(results[0][4]) # full_path is at index 4 for list_all
|
||||
else:
|
||||
open_file(results[0][2]) # full_path is at index 2 for regular search
|
||||
|
||||
# Always display results unless we're only opening
|
||||
if not (args.open and not args.verbose):
|
||||
display_results(results, args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user