helper scripts
This commit is contained in:
Executable
+97
@@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# direct_rofi_ocr.sh - Direct script to search OCR'd screenshots with rofi
|
||||
# This script displays OCR data from SQLite and allows opening files with rofi
|
||||
|
||||
# Database path
|
||||
DB_PATH="/home/master/screenshot_ocr.db"
|
||||
|
||||
# Check dependencies
|
||||
check_deps() {
|
||||
local missing=0
|
||||
|
||||
if ! command -v rofi &> /dev/null; then
|
||||
echo "Error: rofi is not installed. Please install it with: sudo pacman -S rofi"
|
||||
missing=1
|
||||
fi
|
||||
|
||||
if ! command -v sqlite3 &> /dev/null; then
|
||||
echo "Error: sqlite3 is not installed. Please install it with: sudo pacman -S sqlite"
|
||||
missing=1
|
||||
fi
|
||||
|
||||
if ! [ -f "$DB_PATH" ]; then
|
||||
echo "Error: Database not found at $DB_PATH"
|
||||
echo "Please run the OCR script first to create and populate the database."
|
||||
missing=1
|
||||
fi
|
||||
|
||||
return $missing
|
||||
}
|
||||
|
||||
# Format OCR text (remove newlines, limit length)
|
||||
format_text() {
|
||||
local text="$1"
|
||||
text=$(echo "$text" | tr '\n' ' ' | tr -s ' ')
|
||||
|
||||
if [ ${#text} -gt 80 ]; then
|
||||
text="${text:0:80}..."
|
||||
fi
|
||||
|
||||
echo "$text"
|
||||
}
|
||||
|
||||
# Main function
|
||||
main() {
|
||||
# Check dependencies
|
||||
if ! check_deps; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create temporary file for mapping
|
||||
TEMP_FILE=$(mktemp)
|
||||
trap 'rm -f $TEMP_FILE' EXIT
|
||||
|
||||
# Extract data from database
|
||||
echo "Querying database..."
|
||||
sqlite3 "$DB_PATH" "SELECT filename, full_path, ocr_text FROM ocr_results" | \
|
||||
while IFS='|' read -r filename path text; do
|
||||
# Format the text for display
|
||||
formatted=$(format_text "$text")
|
||||
# Write to temp file: display_text|file_path
|
||||
echo "$filename | $formatted|$path" >> "$TEMP_FILE"
|
||||
done
|
||||
|
||||
# Check if we got any results
|
||||
if [ ! -s "$TEMP_FILE" ]; then
|
||||
echo "No OCR data found in database. Run ocr_screenshots.py first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Display in rofi
|
||||
echo "Opening rofi dialog..."
|
||||
selection=$(cat "$TEMP_FILE" | cut -d'|' -f1-2 | rofi -dmenu -i -p "Screenshot OCR" -width 80)
|
||||
|
||||
# Check if user made a selection
|
||||
if [ -z "$selection" ]; then
|
||||
echo "No selection made."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Find the corresponding path
|
||||
display_text=$(echo "$selection" | sed 's/|.*$//')
|
||||
path=$(grep -F "$display_text" "$TEMP_FILE" | cut -d'|' -f3)
|
||||
|
||||
# Open the file
|
||||
if [ -n "$path" ] && [ -f "$path" ]; then
|
||||
echo "Opening: $path"
|
||||
xdg-open "$path" &
|
||||
else
|
||||
echo "Error: Could not find file path for selection."
|
||||
echo "Selected: $display_text"
|
||||
echo "Path: $path"
|
||||
fi
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main
|
||||
Executable
+92
@@ -0,0 +1,92 @@
|
||||
#!/bin/bash
|
||||
|
||||
# ocr_rofi.sh - Search OCR'd screenshots with rofi
|
||||
# This script displays OCR data from SQLite and allows opening files with rofi
|
||||
|
||||
# Database path
|
||||
DB_PATH="$HOME/screenshot_ocr.db"
|
||||
SCREENSHOTS_DIR="$HOME/Screenshots"
|
||||
MAX_TEXT_LENGTH=100
|
||||
|
||||
# Check dependencies
|
||||
if ! command -v rofi &> /dev/null; then
|
||||
echo "Error: rofi is not installed. Please install it with: sudo pacman -S rofi"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v sqlite3 &> /dev/null; then
|
||||
echo "Error: sqlite3 is not installed. Please install it with: sudo pacman -S sqlite"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$DB_PATH" ]; then
|
||||
echo "Error: Database not found at $DB_PATH"
|
||||
echo "Run the OCR script first to create and populate the database."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create temporary files
|
||||
ENTRIES_FILE=$(mktemp)
|
||||
PATHS_FILE=$(mktemp)
|
||||
trap "rm -f $ENTRIES_FILE $PATHS_FILE" EXIT
|
||||
|
||||
# Query the database and format for rofi
|
||||
echo "Preparing OCR data for search..."
|
||||
sqlite3 -separator "|" "$DB_PATH" "SELECT filename, ocr_text, full_path FROM ocr_results ORDER BY filename" | while IFS="|" read -r filename ocr_text path; do
|
||||
# Clean up text (remove newlines, limit length)
|
||||
clean_text=$(echo "$ocr_text" | tr '\n' ' ' | tr -s ' ')
|
||||
if [ ${#clean_text} -gt $MAX_TEXT_LENGTH ]; then
|
||||
clean_text="${clean_text:0:$MAX_TEXT_LENGTH}..."
|
||||
fi
|
||||
|
||||
# Save formatted entry for rofi
|
||||
echo "$filename | $clean_text" >> "$ENTRIES_FILE"
|
||||
|
||||
# Save path in corresponding line
|
||||
echo "$path" >> "$PATHS_FILE"
|
||||
done
|
||||
|
||||
# Count entries
|
||||
entry_count=$(wc -l < "$ENTRIES_FILE")
|
||||
if [ "$entry_count" -eq 0 ]; then
|
||||
echo "No OCR data found in database. Run ocr_screenshots.py first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Found $entry_count screenshots with OCR data."
|
||||
|
||||
# Display rofi menu
|
||||
selected_line=$(cat "$ENTRIES_FILE" | rofi -dmenu -i -p "Screenshot OCR" \
|
||||
-width 80 -lines 15 -font "mono 10")
|
||||
|
||||
# Exit if no selection
|
||||
if [ -z "$selected_line" ]; then
|
||||
echo "No selection made."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Get filename from selection
|
||||
selected_filename=$(echo "$selected_line" | cut -d '|' -f 1 | sed 's/ *$//')
|
||||
|
||||
# Find corresponding line number
|
||||
line_num=1
|
||||
while IFS= read -r line; do
|
||||
if [[ "$line" == "$selected_filename"* ]]; then
|
||||
break
|
||||
fi
|
||||
line_num=$((line_num + 1))
|
||||
done < "$ENTRIES_FILE"
|
||||
|
||||
# Get full path from paths file
|
||||
selected_path=$(sed "${line_num}q;d" "$PATHS_FILE")
|
||||
|
||||
# Open the file
|
||||
if [ -n "$selected_path" ] && [ -f "$selected_path" ]; then
|
||||
echo "Opening: $selected_path"
|
||||
xdg-open "$selected_path" &
|
||||
else
|
||||
echo "Error: Could not find file: $selected_path"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
||||
Executable
+169
@@ -0,0 +1,169 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import glob
|
||||
import os
|
||||
import sqlite3
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
|
||||
# Configuration
|
||||
SCREENSHOTS_DIR = os.path.expanduser("~/Screenshots")
|
||||
DATABASE_PATH = os.path.expanduser("~/screenshot_ocr.db")
|
||||
|
||||
|
||||
def create_database():
|
||||
"""Create SQLite database and table if they don't exist."""
|
||||
conn = sqlite3.connect(DATABASE_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Create table for OCR results
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS ocr_results (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
filename TEXT UNIQUE,
|
||||
full_path TEXT,
|
||||
ocr_text TEXT,
|
||||
file_size INTEGER,
|
||||
created_date TEXT,
|
||||
ocr_date TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print(f"Database initialized at {DATABASE_PATH}")
|
||||
|
||||
|
||||
def get_processed_files():
|
||||
"""Get a set of filenames that have already been processed."""
|
||||
conn = sqlite3.connect(DATABASE_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("SELECT filename FROM ocr_results")
|
||||
processed_files = {row[0] for row in cursor.fetchall()}
|
||||
|
||||
conn.close()
|
||||
return processed_files
|
||||
|
||||
|
||||
def perform_ocr(image_path):
|
||||
"""Perform OCR on an image file using tesseract."""
|
||||
try:
|
||||
# Create a temporary output file
|
||||
temp_output = f"/tmp/{os.path.basename(image_path)}.txt"
|
||||
temp_base = temp_output.replace(".txt", "")
|
||||
|
||||
# Run tesseract
|
||||
subprocess.run(
|
||||
["tesseract", image_path, temp_base],
|
||||
check=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
|
||||
# Read OCR text from the output file
|
||||
with open(temp_output, "r", encoding="utf-8") as f:
|
||||
ocr_text = f.read().strip()
|
||||
|
||||
# Clean up temporary file
|
||||
os.remove(temp_output)
|
||||
|
||||
return ocr_text
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error running tesseract on {image_path}: {str(e)}")
|
||||
return ""
|
||||
except Exception as e:
|
||||
print(f"Error processing {image_path}: {str(e)}")
|
||||
return ""
|
||||
|
||||
|
||||
def add_to_database(filename, full_path, ocr_text, file_size, created_date):
|
||||
"""Add OCR result to the database."""
|
||||
conn = sqlite3.connect(DATABASE_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO ocr_results
|
||||
(filename, full_path, ocr_text, file_size, created_date, ocr_date)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
filename,
|
||||
full_path,
|
||||
ocr_text,
|
||||
file_size,
|
||||
created_date,
|
||||
datetime.now().isoformat(),
|
||||
),
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
print(f"Added {filename} to database")
|
||||
except sqlite3.IntegrityError:
|
||||
print(f"File {filename} already exists in database")
|
||||
except Exception as e:
|
||||
print(f"Error adding {filename} to database: {str(e)}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to process screenshot images."""
|
||||
print("Starting OCR process for screenshots...")
|
||||
|
||||
# Create database if it doesn't exist
|
||||
create_database()
|
||||
|
||||
# Get list of already processed files
|
||||
processed_files = get_processed_files()
|
||||
print(f"Found {len(processed_files)} already processed files")
|
||||
|
||||
# Get list of PNG and JPG files
|
||||
image_files = glob.glob(os.path.join(SCREENSHOTS_DIR, "*.png"))
|
||||
image_files.extend(glob.glob(os.path.join(SCREENSHOTS_DIR, "*.jpg")))
|
||||
image_files.extend(glob.glob(os.path.join(SCREENSHOTS_DIR, "*.jpeg")))
|
||||
print(f"Found {len(image_files)} image files")
|
||||
|
||||
# Process each image file
|
||||
processed_count = 0
|
||||
skipped_count = 0
|
||||
error_count = 0
|
||||
|
||||
for image_path in image_files:
|
||||
filename = os.path.basename(image_path)
|
||||
|
||||
# Skip if already processed
|
||||
if filename in processed_files:
|
||||
print(f"Skipping {filename} (already processed)")
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
print(f"Processing {filename}...")
|
||||
|
||||
# Get file information
|
||||
file_stats = os.stat(image_path)
|
||||
file_size = file_stats.st_size
|
||||
created_date = datetime.fromtimestamp(file_stats.st_mtime).isoformat()
|
||||
|
||||
# Perform OCR
|
||||
ocr_text = perform_ocr(image_path)
|
||||
|
||||
if ocr_text:
|
||||
# Add to database
|
||||
add_to_database(filename, image_path, ocr_text, file_size, created_date)
|
||||
processed_count += 1
|
||||
else:
|
||||
print(f"No OCR text extracted from {filename}")
|
||||
error_count += 1
|
||||
|
||||
print("\nOCR process completed:")
|
||||
print(f"- Processed: {processed_count}")
|
||||
print(f"- Skipped (already in database): {skipped_count}")
|
||||
print(f"- Errors: {error_count}")
|
||||
print(f"- Total files in database: {len(processed_files) + processed_count}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Executable
+58
@@ -0,0 +1,58 @@
|
||||
#!/bin/bash
|
||||
|
||||
# OCR screenshot images and store results in SQLite database
|
||||
# This script is a wrapper for the ocr_screenshots.py Python script
|
||||
|
||||
# Set up variables
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PYTHON_SCRIPT="${SCRIPT_DIR}/ocr_screenshots.py"
|
||||
DATABASE_PATH="$HOME/screenshot_ocr.db"
|
||||
SCREENSHOTS_DIR="$HOME/Screenshots"
|
||||
|
||||
# Check if Python script exists
|
||||
if [ ! -f "$PYTHON_SCRIPT" ]; then
|
||||
echo "Error: Python script not found at $PYTHON_SCRIPT"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if tesseract is installed
|
||||
if ! command -v tesseract &> /dev/null; then
|
||||
echo "Error: tesseract not installed. Please install it with:"
|
||||
echo " sudo pacman -S tesseract tesseract-data-eng"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if SQLite is installed
|
||||
if ! command -v sqlite3 &> /dev/null; then
|
||||
echo "Error: sqlite3 not installed. Please install it with:"
|
||||
echo " sudo pacman -S sqlite"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if screenshots directory exists
|
||||
if [ ! -d "$SCREENSHOTS_DIR" ]; then
|
||||
echo "Error: Screenshots directory not found at $SCREENSHOTS_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Starting OCR process for screenshots..."
|
||||
echo "Database path: $DATABASE_PATH"
|
||||
echo "Screenshots directory: $SCREENSHOTS_DIR"
|
||||
|
||||
# Run the Python script
|
||||
python "$PYTHON_SCRIPT"
|
||||
|
||||
# Check return code
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error: OCR process failed"
|
||||
exit 1
|
||||
else
|
||||
# Count entries in database
|
||||
if [ -f "$DATABASE_PATH" ]; then
|
||||
count=$(sqlite3 "$DATABASE_PATH" "SELECT COUNT(*) FROM ocr_results")
|
||||
echo "Database contains $count entries"
|
||||
fi
|
||||
echo "OCR process completed successfully"
|
||||
fi
|
||||
|
||||
exit 0
|
||||
Executable
+179
@@ -0,0 +1,179 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
# Configuration
|
||||
DB_PATH = os.path.expanduser("~/screenshot_ocr.db")
|
||||
SCREENSHOTS_DIR = os.path.expanduser("~/Screenshots")
|
||||
ROFI_PROMPT = "OCR Search"
|
||||
|
||||
|
||||
def check_dependencies():
|
||||
"""Check if required dependencies are installed."""
|
||||
# Check if rofi is installed
|
||||
try:
|
||||
subprocess.run(
|
||||
["which", "rofi"],
|
||||
check=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
print(
|
||||
"Error: rofi is not installed. Please install it with: sudo pacman -S rofi"
|
||||
)
|
||||
return False
|
||||
|
||||
# Check if database exists
|
||||
if not os.path.exists(DB_PATH):
|
||||
print(f"Error: Database not found at {DB_PATH}")
|
||||
print("Run the OCR script first to create and populate the database.")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def format_text(text, max_length=100):
|
||||
"""Format OCR text for display in rofi."""
|
||||
# Replace newlines with spaces
|
||||
text = text.replace("\n", " ")
|
||||
|
||||
# Replace multiple spaces with a single space
|
||||
while " " in text:
|
||||
text = text.replace(" ", " ")
|
||||
|
||||
# Truncate if too long
|
||||
if len(text) > max_length:
|
||||
text = text[:max_length] + "..."
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def get_screenshot_data():
|
||||
"""Get data from the SQLite database."""
|
||||
try:
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get all records
|
||||
cursor.execute(
|
||||
"SELECT filename, ocr_text, full_path FROM ocr_results ORDER BY filename"
|
||||
)
|
||||
|
||||
results = []
|
||||
for filename, ocr_text, full_path in cursor.fetchall():
|
||||
formatted_text = format_text(ocr_text)
|
||||
# Format for display: filename | ocr_text
|
||||
display_text = f"{filename} | {formatted_text}"
|
||||
results.append((display_text, full_path))
|
||||
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
except sqlite3.Error as e:
|
||||
print(f"Database error: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def show_rofi_menu(items):
|
||||
"""Display rofi menu with the given items and return selection."""
|
||||
if not items:
|
||||
print("No items to display.")
|
||||
return None, None
|
||||
|
||||
# Create temporary file with menu items
|
||||
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file:
|
||||
for display_text, _ in items:
|
||||
temp_file.write(f"{display_text}\n")
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
try:
|
||||
# Run rofi command
|
||||
cmd = [
|
||||
"rofi",
|
||||
"-dmenu",
|
||||
"-i", # Case-insensitive matching
|
||||
"-p",
|
||||
ROFI_PROMPT,
|
||||
"-width",
|
||||
"80",
|
||||
"-lines",
|
||||
"15",
|
||||
"-font",
|
||||
"mono 10",
|
||||
]
|
||||
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=open(temp_file_path, "r"),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
universal_newlines=True,
|
||||
)
|
||||
|
||||
stdout, stderr = process.communicate()
|
||||
|
||||
if process.returncode != 0:
|
||||
print(f"Rofi error: {stderr}")
|
||||
return None, None
|
||||
|
||||
selection = stdout.strip()
|
||||
if not selection:
|
||||
return None, None
|
||||
|
||||
# Find the matching item
|
||||
for i, (display_text, full_path) in enumerate(items):
|
||||
if display_text == selection:
|
||||
return display_text, full_path
|
||||
|
||||
return None, None
|
||||
|
||||
finally:
|
||||
# Clean up temporary file
|
||||
os.unlink(temp_file_path)
|
||||
|
||||
|
||||
def open_file(file_path):
|
||||
"""Open the selected file with the default application."""
|
||||
if os.path.exists(file_path):
|
||||
try:
|
||||
subprocess.Popen(
|
||||
["xdg-open", file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
print(f"Opening: {file_path}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error opening file: {e}")
|
||||
return False
|
||||
else:
|
||||
print(f"Error: File not found: {file_path}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function."""
|
||||
if not check_dependencies():
|
||||
sys.exit(1)
|
||||
|
||||
# Get screenshot data
|
||||
items = get_screenshot_data()
|
||||
if not items:
|
||||
print("No OCR data found in the database.")
|
||||
sys.exit(1)
|
||||
|
||||
# Show rofi menu
|
||||
selection, file_path = show_rofi_menu(items)
|
||||
|
||||
# Open selected file
|
||||
if selection and file_path:
|
||||
open_file(file_path)
|
||||
else:
|
||||
print("No selection made.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Executable
+129
@@ -0,0 +1,129 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# rofi_ocr_search.sh - Search and open OCR'd screenshot files using rofi
|
||||
# This script displays OCR'd screenshot data in rofi and allows opening selected files
|
||||
|
||||
# Constants
|
||||
DATABASE_PATH="$HOME/screenshot_ocr.db"
|
||||
SCREENSHOTS_DIR="$HOME/Screenshots"
|
||||
ROFI_PROMPT="Screenshot OCR"
|
||||
MAX_DISPLAY_LENGTH=100
|
||||
|
||||
# Check if required programs are installed
|
||||
check_dependencies() {
|
||||
local missing=0
|
||||
|
||||
if ! command -v rofi >/dev/null 2>&1; then
|
||||
echo "Error: rofi is not installed. Please install it with:"
|
||||
echo " sudo pacman -S rofi"
|
||||
missing=1
|
||||
fi
|
||||
|
||||
if ! command -v sqlite3 >/dev/null 2>&1; then
|
||||
echo "Error: sqlite3 is not installed. Please install it with:"
|
||||
echo " sudo pacman -S sqlite"
|
||||
missing=1
|
||||
fi
|
||||
|
||||
if ! command -v xdg-open >/dev/null 2>&1; then
|
||||
echo "Error: xdg-open is not installed. Please install it with:"
|
||||
echo " sudo pacman -S xdg-utils"
|
||||
missing=1
|
||||
fi
|
||||
|
||||
if [ ! -f "$DATABASE_PATH" ]; then
|
||||
echo "Error: Database not found at $DATABASE_PATH"
|
||||
echo "Run the OCR script first to create and populate the database."
|
||||
missing=1
|
||||
fi
|
||||
|
||||
if [ $missing -eq 1 ]; then
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Format OCR text for display in rofi
|
||||
format_ocr_text() {
|
||||
local text="$1"
|
||||
|
||||
# Replace newlines with spaces
|
||||
text="${text//$'\n'/ }"
|
||||
|
||||
# Remove multiple spaces
|
||||
text=$(echo "$text" | tr -s ' ')
|
||||
|
||||
# Truncate if too long
|
||||
if [ ${#text} -gt $MAX_DISPLAY_LENGTH ]; then
|
||||
text="${text:0:$MAX_DISPLAY_LENGTH}..."
|
||||
fi
|
||||
|
||||
echo "$text"
|
||||
}
|
||||
|
||||
# Get entries from database and format for rofi
|
||||
get_entries_for_rofi() {
|
||||
sqlite3 -separator '|' "$DATABASE_PATH" "
|
||||
SELECT filename, ocr_text, full_path
|
||||
FROM ocr_results
|
||||
ORDER BY filename
|
||||
" | while IFS='|' read -r filename ocr_text path; do
|
||||
formatted_text=$(format_ocr_text "$ocr_text")
|
||||
echo "$filename | $formatted_text"
|
||||
echo "$path" >> /tmp/ocr_paths.$$
|
||||
done
|
||||
}
|
||||
|
||||
# Main function
|
||||
main() {
|
||||
check_dependencies
|
||||
|
||||
# Create temporary file for paths
|
||||
rm -f /tmp/ocr_paths.$$ 2>/dev/null
|
||||
touch /tmp/ocr_paths.$$
|
||||
|
||||
# Get all entries and display in rofi
|
||||
selection=$(get_entries_for_rofi | rofi -dmenu -i -p "$ROFI_PROMPT" \
|
||||
-width 80 \
|
||||
-lines 15 \
|
||||
-font "mono 10" \
|
||||
-matching fuzzy)
|
||||
|
||||
# Exit if no selection made
|
||||
if [ -z "$selection" ]; then
|
||||
rm -f /tmp/ocr_paths.$$ 2>/dev/null
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Extract filename from selection
|
||||
filename=$(echo "$selection" | cut -d '|' -f1 | xargs)
|
||||
|
||||
# Find the corresponding line number
|
||||
line_number=1
|
||||
sqlite3 -separator '|' "$DATABASE_PATH" "
|
||||
SELECT filename
|
||||
FROM ocr_results
|
||||
ORDER BY filename
|
||||
" | while read -r db_filename; do
|
||||
if [ "$db_filename" = "$filename" ]; then
|
||||
break
|
||||
fi
|
||||
line_number=$((line_number + 1))
|
||||
done
|
||||
|
||||
# Get the full path from our temporary file
|
||||
file_path=$(sed -n "${line_number}p" /tmp/ocr_paths.$$)
|
||||
|
||||
# Open the file if path exists
|
||||
if [ -n "$file_path" ] && [ -f "$file_path" ]; then
|
||||
xdg-open "$file_path" &
|
||||
echo "Opening: $file_path"
|
||||
else
|
||||
echo "Error: Could not find file path for $filename"
|
||||
fi
|
||||
|
||||
# Clean up
|
||||
rm -f /tmp/ocr_paths.$$ 2>/dev/null
|
||||
}
|
||||
|
||||
# Run the main function
|
||||
main
|
||||
Binary file not shown.
Executable
+247
@@ -0,0 +1,247 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sqlite3
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
# Constants
|
||||
DATABASE_PATH = os.path.expanduser("~/screenshot_ocr.db")
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Search for text in OCR results from screenshots."
|
||||
)
|
||||
parser.add_argument(
|
||||
"query", help="Text to search for in the OCR results", nargs="*"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--case-insensitive",
|
||||
action="store_true",
|
||||
help="Perform case-insensitive search",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--exact",
|
||||
action="store_true",
|
||||
help="Match exact text only (default is substring match)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-l",
|
||||
"--limit",
|
||||
type=int,
|
||||
default=25,
|
||||
help="Limit number of results (default: 25, 0 for no limit)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
help="Show more details including file path and date",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c", "--count", action="store_true", help="Only show the count of matches"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--list-all", action="store_true", help="List all files in the database"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d", "--date", action="store_true", help="Sort results by date (newest first)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--open",
|
||||
action="store_true",
|
||||
help="Open the first matching file (requires xdg-open)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def check_database():
|
||||
"""Check if the database exists and has the expected structure."""
|
||||
if not os.path.exists(DATABASE_PATH):
|
||||
print(f"Error: Database not found at {DATABASE_PATH}")
|
||||
print("Run the OCR script first to create and populate the database.")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def search_ocr_database(args):
|
||||
"""Search the OCR database for the given query."""
|
||||
conn = sqlite3.connect(DATABASE_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Join all query words
|
||||
query_text = " ".join(args.query)
|
||||
|
||||
if args.list_all:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT filename, ocr_date, created_date, length(ocr_text), full_path
|
||||
FROM ocr_results
|
||||
ORDER BY filename ASC
|
||||
"""
|
||||
)
|
||||
|
||||
results = cursor.fetchall()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
# Construct the SQL query based on arguments
|
||||
if args.exact:
|
||||
if args.case_insensitive:
|
||||
sql_query = "SELECT * FROM ocr_results WHERE LOWER(ocr_text) = LOWER(?)"
|
||||
else:
|
||||
sql_query = "SELECT * FROM ocr_results WHERE ocr_text = ?"
|
||||
else:
|
||||
if args.case_insensitive:
|
||||
sql_query = "SELECT * FROM ocr_results WHERE LOWER(ocr_text) LIKE LOWER(?)"
|
||||
query_text = f"%{query_text}%"
|
||||
else:
|
||||
sql_query = "SELECT * FROM ocr_results WHERE ocr_text LIKE ?"
|
||||
query_text = f"%{query_text}%"
|
||||
|
||||
# Add ordering
|
||||
if args.date:
|
||||
sql_query += " ORDER BY created_date DESC"
|
||||
else:
|
||||
sql_query += " ORDER BY filename ASC"
|
||||
|
||||
# Add limit
|
||||
if args.limit > 0:
|
||||
sql_query += f" LIMIT {args.limit}"
|
||||
|
||||
# Execute the query
|
||||
cursor.execute(sql_query, (query_text,))
|
||||
results = cursor.fetchall()
|
||||
|
||||
# Close the database connection
|
||||
conn.close()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def display_results(results, args):
|
||||
"""Display the search results."""
|
||||
if not results or len(results) == 0:
|
||||
print("No matches found.")
|
||||
return
|
||||
|
||||
if args.count:
|
||||
print(f"Found {len(results)} matches.")
|
||||
return
|
||||
|
||||
print(
|
||||
f"Found {len(results)} matches{' (showing first ' + str(args.limit) + ')' if args.limit > 0 and len(results) == args.limit else ''}:"
|
||||
)
|
||||
print("-" * 80)
|
||||
|
||||
# Handle list_all format differently
|
||||
if args.list_all:
|
||||
for filename, ocr_date, created_date, text_length, full_path in results:
|
||||
created_str = (
|
||||
created_date.split("T")[0] if "T" in created_date else created_date
|
||||
)
|
||||
if args.verbose:
|
||||
print(f"File: {filename}")
|
||||
print(f"Path: {full_path}")
|
||||
print(f"Created: {created_date}")
|
||||
print(f"OCR Date: {ocr_date}")
|
||||
print(f"Text Length: {text_length} chars")
|
||||
print("-" * 40)
|
||||
else:
|
||||
print(f"{filename} | {created_str} | {text_length} chars")
|
||||
return
|
||||
|
||||
# Display regular search results
|
||||
for row in results:
|
||||
id, filename, full_path, ocr_text, file_size, created_date, ocr_date = row
|
||||
|
||||
# Display the result
|
||||
if args.verbose:
|
||||
print(f"File: {filename}")
|
||||
print(f"Path: {full_path}")
|
||||
print(f"Created: {created_date}")
|
||||
print(f"OCR Date: {ocr_date}")
|
||||
print(f"Size: {file_size} bytes")
|
||||
print("Text:")
|
||||
print("-" * 40)
|
||||
print(ocr_text)
|
||||
print("-" * 80)
|
||||
else:
|
||||
print(f"[{filename}]")
|
||||
# Show just a snippet of text around the match if not exact
|
||||
if not args.exact and args.query:
|
||||
search_term = " ".join(args.query).lower()
|
||||
text_lower = ocr_text.lower()
|
||||
pos = text_lower.find(search_term)
|
||||
if pos >= 0:
|
||||
start = max(0, pos - 40)
|
||||
end = min(len(ocr_text), pos + len(search_term) + 40)
|
||||
# Find word boundaries
|
||||
if start > 0:
|
||||
while start > 0 and ocr_text[start].isalnum():
|
||||
start -= 1
|
||||
if end < len(ocr_text):
|
||||
while end < len(ocr_text) and ocr_text[end].isalnum():
|
||||
end += 1
|
||||
|
||||
snippet = ocr_text[start:end]
|
||||
if start > 0:
|
||||
snippet = "..." + snippet
|
||||
if end < len(ocr_text):
|
||||
snippet = snippet + "..."
|
||||
|
||||
print(snippet)
|
||||
else:
|
||||
# If can't find the term (which is odd), just show first bit
|
||||
print(ocr_text[:80] + "..." if len(ocr_text) > 80 else ocr_text)
|
||||
else:
|
||||
# Just show the first bit of text
|
||||
print(ocr_text[:80] + "..." if len(ocr_text) > 80 else ocr_text)
|
||||
print("-" * 40)
|
||||
|
||||
|
||||
def open_file(path):
|
||||
"""Open a file using the default application."""
|
||||
try:
|
||||
import subprocess
|
||||
|
||||
subprocess.Popen(
|
||||
["xdg-open", path], stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
print(f"Opening: {path}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error opening file: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to run the OCR search."""
|
||||
args = parse_arguments()
|
||||
check_database()
|
||||
|
||||
if not args.list_all and len(args.query) == 0:
|
||||
print("Error: Please provide a search query or use --list-all")
|
||||
sys.exit(1)
|
||||
|
||||
results = search_ocr_database(args)
|
||||
|
||||
# Open the first matching file if requested
|
||||
if args.open and results and len(results) > 0:
|
||||
if args.list_all:
|
||||
open_file(results[0][4]) # full_path is at index 4 for list_all
|
||||
else:
|
||||
open_file(results[0][2]) # full_path is at index 2 for regular search
|
||||
|
||||
# Always display results unless we're only opening
|
||||
if not (args.open and not args.verbose):
|
||||
display_results(results, args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Executable
+75
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# simple_rofi_ocr.sh - A simplified script to search OCR'd screenshots with rofi
|
||||
|
||||
# Database location
|
||||
DB_PATH="$HOME/screenshot_ocr.db"
|
||||
|
||||
# Check if database exists
|
||||
if [ ! -f "$DB_PATH" ]; then
|
||||
echo "Error: Database not found at $DB_PATH"
|
||||
echo "Run the OCR script first to create and populate the database."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if rofi exists
|
||||
if ! command -v rofi &> /dev/null; then
|
||||
echo "Error: rofi is not installed. Please install it with:"
|
||||
echo " sudo pacman -S rofi"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if sqlite3 exists
|
||||
if ! command -v sqlite3 &> /dev/null; then
|
||||
echo "Error: sqlite3 is not installed. Please install it with:"
|
||||
echo " sudo pacman -S sqlite"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create a temporary file to store data
|
||||
TMP_FILE=$(mktemp)
|
||||
trap 'rm -f $TMP_FILE' EXIT
|
||||
|
||||
# Get data from database and format for rofi
|
||||
sqlite3 -separator '|' "$DB_PATH" "
|
||||
SELECT full_path, filename, ocr_text
|
||||
FROM ocr_results
|
||||
ORDER BY filename
|
||||
" > "$TMP_FILE"
|
||||
|
||||
# Process each line to format for rofi and create menu items
|
||||
menu_items=""
|
||||
while IFS='|' read -r path filename ocr_text; do
|
||||
# Clean up text for display (replace newlines, truncate)
|
||||
clean_text=$(echo "$ocr_text" | tr '\n' ' ' | sed 's/ / /g')
|
||||
if [ ${#clean_text} -gt 80 ]; then
|
||||
clean_text="${clean_text:0:80}..."
|
||||
fi
|
||||
|
||||
# Add to menu items
|
||||
menu_items+="$filename | $clean_text\n"
|
||||
done < "$TMP_FILE"
|
||||
|
||||
# Display rofi menu
|
||||
selection=$(echo -e "$menu_items" | rofi -dmenu -i -p "Screenshot OCR" -width 80 -lines 15 -font "mono 10")
|
||||
|
||||
# Exit if no selection
|
||||
if [ -z "$selection" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Extract filename from selection
|
||||
selected_filename=$(echo "$selection" | cut -d'|' -f1 | sed 's/ *$//')
|
||||
|
||||
# Find the path for the selected filename
|
||||
selected_path=$(grep -F "|$selected_filename|" "$TMP_FILE" | cut -d'|' -f1)
|
||||
|
||||
# Open the file if found
|
||||
if [ -n "$selected_path" ] && [ -f "$selected_path" ]; then
|
||||
echo "Opening: $selected_path"
|
||||
xdg-open "$selected_path" &
|
||||
else
|
||||
echo "Error: Could not find file: $selected_path"
|
||||
fi
|
||||
|
||||
exit 0
|
||||
Reference in New Issue
Block a user