Files
ocr-screenshot-gallery/src/databasemanager.cpp
T
2025-11-16 01:42:27 -05:00

734 lines
24 KiB
C++

#include "databasemanager.h"
#include <QSqlQuery>
#include <QSqlError>
#include <QDebug>
#include <QVariant>
#include <QFileInfo>
#include <QTimer>
DatabaseManager::DatabaseManager(QObject *parent)
: QObject(parent)
, m_initialized(false)
, m_ftsEnabled(false)
, m_searchCancelled(false)
, m_cachedImageCount(-1) // Initialize to invalid value
, m_currentOffset(0)
, m_currentLimit(0)
{
// Initialize search cache
m_searchCache.clear();
m_allImagesCache.clear();
m_lastCacheUpdate = QDateTime::currentDateTime();
// Connect future watcher to handle search results
connect(&m_searchWatcher, &QFutureWatcher<void>::finished,
this, [this]() {
if (!m_searchCancelled) {
// Emit signal with the results only if not cancelled
QMutexLocker locker(&m_searchMutex);
QString searchText = m_currentSearchText;
int offset = m_currentOffset;
int limit = m_currentLimit;
if (!searchText.isEmpty()) {
QMutexLocker cacheLocker(&m_cacheMutex);
if (m_searchCache.contains(searchText) &&
m_searchCache[searchText].contains(qMakePair(offset, limit))) {
SearchCacheItem cacheItem = m_searchCache[searchText][qMakePair(offset, limit)];
emit searchResultsReady(cacheItem.results, searchText,
offset, limit, cacheItem.totalCount);
}
}
}
});
// Clean cache periodically
QTimer *cleanupTimer = new QTimer(this);
connect(cleanupTimer, &QTimer::timeout, this, &DatabaseManager::cleanupCache);
cleanupTimer->start(60000); // Clean cache every minute
}
DatabaseManager::~DatabaseManager()
{
// Cancel any ongoing search and wait for it to finish
cancelSearch();
if (m_db.isOpen()) {
m_db.close();
}
// Close any thread-specific database connections
QStringList connectionNames = QSqlDatabase::connectionNames();
for (const QString &connName : connectionNames) {
// Remove thread-specific database connections that start with "tdb_"
if (connName.startsWith("tdb_") && connName != QString("tdb_%1").arg((quintptr)QThread::currentThread())) {
QSqlDatabase::removeDatabase(connName);
}
}
}
bool DatabaseManager::initialize(const QString &dbPath)
{
// Check if database is already initialized
if (m_initialized) {
return true;
}
// Check if file exists
QFileInfo fileInfo(dbPath);
if (!fileInfo.exists() || !fileInfo.isFile()) {
qDebug() << "Database file does not exist:" << dbPath;
return false;
}
// Set up database connection
m_db = QSqlDatabase::addDatabase("QSQLITE");
m_db.setDatabaseName(dbPath);
// Open database
if (!m_db.open()) {
qDebug() << "Failed to open database:" << m_db.lastError().text();
return false;
}
// Verify required table exists
QSqlQuery query;
if (!query.exec("SELECT name FROM sqlite_master WHERE type='table' AND name='ocr_results'")) {
qDebug() << "Failed to execute query:" << query.lastError().text();
m_db.close();
return false;
}
if (!query.next()) {
qDebug() << "The required table 'ocr_results' does not exist in the database.";
m_db.close();
return false;
}
// Verify the table has the required columns
if (!query.exec("PRAGMA table_info(ocr_results)")) {
qDebug() << "Failed to get table info:" << query.lastError().text();
m_db.close();
return false;
}
bool hasId = false;
bool hasFullPath = false;
bool hasOcrText = false;
while (query.next()) {
QString columnName = query.value(1).toString();
if (columnName == "id") hasId = true;
if (columnName == "full_path") hasFullPath = true;
if (columnName == "ocr_text") hasOcrText = true;
}
if (!hasId || !hasFullPath || !hasOcrText) {
qDebug() << "Missing required columns in ocr_results table. Need 'id', 'full_path', and 'ocr_text'";
m_db.close();
return false;
}
// Initialize FTS5 if available
if (initializeFTS()) {
qDebug() << "FTS5 initialized successfully.";
m_ftsEnabled = true;
} else {
// Fallback to regular index if FTS5 is unavailable
qDebug() << "FTS5 not available, using standard index instead.";
query.exec("CREATE INDEX IF NOT EXISTS idx_ocr_text ON ocr_results(ocr_text)");
m_ftsEnabled = false;
}
m_initialized = true;
qDebug() << "Database initialized successfully.";
return true;
}
QList<DatabaseManager::ImageItem> DatabaseManager::getAllImages(int offset, int limit)
{
QList<ImageItem> images;
if (!m_initialized) {
qDebug() << "Database not initialized.";
return images;
}
// Check cache first
QPair<int, int> cacheKey(offset, limit);
QMutexLocker cacheLocker(&m_cacheMutex);
if (m_allImagesCache.contains(cacheKey)) {
// Use cached results if available and not expired
if (m_lastCacheUpdate.secsTo(QDateTime::currentDateTime()) < CACHE_LIFETIME_SECS) {
return m_allImagesCache[cacheKey];
}
}
cacheLocker.unlock();
// Verify database is still connected
if (!m_db.isOpen() && !m_db.open()) {
qDebug() << "Database connection lost and cannot be reopened:" << m_db.lastError().text();
m_initialized = false;
return images;
}
// Start transaction to speed up query
m_db.transaction();
QSqlQuery query;
if (limit > 0) {
// Use pagination
query.prepare("SELECT id, full_path, ocr_text FROM ocr_results ORDER BY id LIMIT :limit OFFSET :offset");
query.bindValue(":limit", limit);
query.bindValue(":offset", offset);
} else {
// Get all results
query.prepare("SELECT id, full_path, ocr_text FROM ocr_results ORDER BY id");
}
if (!query.exec()) {
qDebug() << "Failed to fetch images:" << query.lastError().text();
m_db.rollback();
return images;
}
// Reserve space for results to avoid reallocations
images.reserve(query.size() > 0 ? query.size() : 100);
while (query.next()) {
ImageItem item;
item.id = query.value(0).toInt();
item.filePath = query.value(1).toString();
item.ocrText = query.value(2).toString();
// Only add images that have a non-empty path
if (!item.filePath.isEmpty()) {
images.append(item);
}
}
m_db.commit();
// Update cache
cacheLocker.relock();
m_allImagesCache[cacheKey] = images;
m_lastCacheUpdate = QDateTime::currentDateTime();
cacheLocker.unlock();
return images;
}
int DatabaseManager::getImageCount()
{
// Return cached count if available
QMutexLocker cacheLocker(&m_cacheMutex);
if (m_cachedImageCount >= 0 &&
m_lastCacheUpdate.secsTo(QDateTime::currentDateTime()) < CACHE_LIFETIME_SECS) {
return m_cachedImageCount;
}
cacheLocker.unlock();
if (!m_initialized) {
qDebug() << "Database not initialized.";
return 0;
}
// Verify database is still connected
if (!m_db.isOpen() && !m_db.open()) {
qDebug() << "Database connection lost and cannot be reopened:" << m_db.lastError().text();
m_initialized = false;
return 0;
}
QSqlQuery query;
query.prepare("SELECT COUNT(*) FROM ocr_results");
if (!query.exec() || !query.next()) {
qDebug() << "Failed to get image count:" << query.lastError().text();
return 0;
}
int count = query.value(0).toInt();
// Update cache
cacheLocker.relock();
m_cachedImageCount = count;
cacheLocker.unlock();
return count;
}
QSqlDatabase DatabaseManager::getDatabaseConnection()
{
// Get current thread ID to create unique connection name
QThread* currentThread = QThread::currentThread();
QString connectionName = QString("tdb_%1").arg((quintptr)currentThread);
// Check if connection already exists for this thread
if (QSqlDatabase::contains(connectionName)) {
return QSqlDatabase::database(connectionName);
}
// Create new connection for this thread
QSqlDatabase threadDb = QSqlDatabase::addDatabase("QSQLITE", connectionName);
threadDb.setDatabaseName(m_db.databaseName());
if (!threadDb.open()) {
qDebug() << "Failed to open database in thread:" << threadDb.lastError().text();
} else {
// Enable foreign keys in this connection
QSqlQuery query(threadDb);
query.exec("PRAGMA foreign_keys = ON");
}
return threadDb;
}
void DatabaseManager::searchImages(const QString &searchText, int offset, int limit)
{
if (!m_initialized) {
qDebug() << "Database not initialized.";
emit searchResultsReady(QList<ImageItem>(), searchText, offset, limit, 0);
return;
}
// Verify database is still connected
if (!m_db.isOpen() && !m_db.open()) {
qDebug() << "Database connection lost and cannot be reopened:" << m_db.lastError().text();
m_initialized = false;
emit searchResultsReady(QList<ImageItem>(), searchText, offset, limit, 0);
return;
}
// If search text is empty, return all images
if (searchText.isEmpty()) {
// For empty search, return all images with pagination
QList<ImageItem> allImages = getAllImages(offset, limit);
int totalCount = getImageCount();
emit searchResultsReady(allImages, searchText, offset, limit, totalCount);
return;
}
// Check if we have a cached result for this search query
{
QMutexLocker locker(&m_cacheMutex);
QPair<int, int> cacheKey(offset, limit);
if (m_searchCache.contains(searchText) &&
m_searchCache[searchText].contains(cacheKey)) {
SearchCacheItem cacheItem = m_searchCache[searchText][cacheKey];
// Check if cache is still valid
if (cacheItem.timestamp.secsTo(QDateTime::currentDateTime()) < CACHE_LIFETIME_SECS) {
emit searchResultsReady(cacheItem.results, searchText, offset, limit, cacheItem.totalCount);
return;
}
}
}
// No delay needed since we're using typing inactivity timer in MainWindow
// Cancel any ongoing search before starting a new one
cancelSearch();
// Store the current search parameters safely
{
QMutexLocker locker(&m_searchMutex);
m_currentSearchText = searchText;
m_currentOffset = offset;
m_currentLimit = limit;
m_searchCancelled = false;
}
// The signal is now emitted before starting the thread to ensure UI responsiveness
// Start the search operation in a background thread
m_searchFuture = QtConcurrent::run([this, searchText, offset, limit]() {
performSearchInBackground(searchText, offset, limit);
});
// Show immediate feedback that search is starting
emit searchStarted(searchText);
m_searchWatcher.setFuture(m_searchFuture);
}
void DatabaseManager::cancelSearch()
{
// Set cancelled flag
QMutexLocker locker(&m_searchMutex);
m_searchCancelled = true;
m_currentSearchText.clear();
locker.unlock();
// Wait for any running search to complete
if (m_searchFuture.isRunning()) {
m_searchFuture.waitForFinished();
}
}
void DatabaseManager::performSearchInBackground(const QString &searchText, int offset, int limit)
{
QList<ImageItem> images;
// Get a thread-specific database connection
QSqlDatabase threadDb = getDatabaseConnection();
if (!threadDb.isOpen() && !threadDb.open()) {
qDebug() << "Thread database connection failed:" << threadDb.lastError().text();
return;
}
// Check if search was cancelled
{
QMutexLocker locker(&m_searchMutex);
if (m_searchCancelled || m_currentSearchText != searchText) {
return;
}
}
// First, get total count for pagination info
QSqlQuery countQuery(threadDb);
int totalCount = 0;
if (m_ftsEnabled) {
QString ftsQuery = prepareFTSQuery(searchText);
countQuery.prepare("SELECT COUNT(*) FROM ocr_results r "
"JOIN ocr_fts f ON r.id = f.rowid "
"WHERE ocr_fts MATCH :query");
countQuery.bindValue(":query", ftsQuery);
} else {
if (searchText.length() <= 3) {
countQuery.prepare("SELECT COUNT(*) FROM ocr_results WHERE ocr_text LIKE :search");
countQuery.bindValue(":search", "%" + searchText + "%");
} else {
countQuery.prepare("SELECT COUNT(*) FROM ocr_results WHERE ocr_text LIKE :search OR ocr_text LIKE :wordstart");
countQuery.bindValue(":search", "%" + searchText + "%");
countQuery.bindValue(":wordstart", "% " + searchText + "%");
}
}
if (countQuery.exec() && countQuery.next()) {
totalCount = countQuery.value(0).toInt();
} else {
qDebug() << "Failed to get count:" << countQuery.lastError().text();
totalCount = 0;
}
// Check if search was cancelled before main query
{
QMutexLocker locker(&m_searchMutex);
if (m_searchCancelled || m_currentSearchText != searchText) {
return;
}
}
// Start transaction to speed up query
threadDb.transaction();
QSqlQuery query(threadDb);
if (m_ftsEnabled) {
// Use FTS5 virtual table for much faster text search
QString ftsQuery = prepareFTSQuery(searchText);
QString queryStr = "SELECT r.id, r.full_path, r.ocr_text FROM ocr_results r "
"JOIN ocr_fts f ON r.id = f.rowid "
"WHERE ocr_fts MATCH :query "
"ORDER BY rank";
if (limit > 0) {
queryStr += " LIMIT :limit OFFSET :offset";
}
query.prepare(queryStr);
query.bindValue(":query", ftsQuery);
if (limit > 0) {
query.bindValue(":limit", limit);
query.bindValue(":offset", offset);
}
} else {
// Fallback to LIKE queries if FTS is not available
// Optimize the query based on length of search text
if (searchText.length() <= 3) {
// For short search terms, use a more targeted approach
QString queryStr = "SELECT id, full_path, ocr_text FROM ocr_results WHERE ocr_text LIKE :search "
"ORDER BY id";
if (limit > 0) {
queryStr += " LIMIT :limit OFFSET :offset";
}
query.prepare(queryStr);
query.bindValue(":search", "%" + searchText + "%");
if (limit > 0) {
query.bindValue(":limit", limit);
query.bindValue(":offset", offset);
}
} else {
// For longer search terms, use LIKE with a more specific pattern at start
QString queryStr = "SELECT id, full_path, ocr_text FROM ocr_results WHERE ocr_text LIKE :search OR ocr_text LIKE :wordstart "
"ORDER BY id";
if (limit > 0) {
queryStr += " LIMIT :limit OFFSET :offset";
}
query.prepare(queryStr);
query.bindValue(":search", "%" + searchText + "%");
query.bindValue(":wordstart", "% " + searchText + "%");
if (limit > 0) {
query.bindValue(":limit", limit);
query.bindValue(":offset", offset);
}
}
}
if (!query.exec()) {
qDebug() << "Failed to search images:" << query.lastError().text();
qDebug() << "Error details:" << query.lastError().databaseText();
threadDb.rollback();
// If FTS query failed, try fallback to LIKE
if (m_ftsEnabled) {
qDebug() << "Trying fallback to LIKE query...";
threadDb.transaction();
query.prepare("SELECT full_path, ocr_text FROM ocr_results WHERE ocr_text LIKE :search");
query.bindValue(":search", "%" + searchText + "%");
if (!query.exec()) {
qDebug() << "Fallback query also failed:" << query.lastError().text();
threadDb.rollback();
return;
}
} else {
return;
}
}
// Check if search was cancelled
{
QMutexLocker locker(&m_searchMutex);
if (m_searchCancelled || m_currentSearchText != searchText) {
threadDb.rollback();
return;
}
}
// Reserve space for results to avoid reallocations
images.reserve(query.size() > 0 ? query.size() : 100);
while (query.next()) {
// Periodically check if search was cancelled
if (query.at() % 20 == 0) {
QMutexLocker locker(&m_searchMutex);
if (m_searchCancelled || m_currentSearchText != searchText) {
threadDb.rollback();
return;
}
}
ImageItem item;
item.id = query.value(0).toInt();
item.filePath = query.value(1).toString();
item.ocrText = query.value(2).toString();
// Only add images that have a non-empty path
if (!item.filePath.isEmpty()) {
images.append(item);
}
}
threadDb.commit();
// Check if search was cancelled before storing results
{
QMutexLocker locker(&m_searchMutex);
if (m_searchCancelled || m_currentSearchText != searchText) {
return;
}
}
// Cache the result for future queries and emit signal with the results
QMutexLocker locker(&m_cacheMutex);
// Create cache item with results and metadata
SearchCacheItem cacheItem;
cacheItem.results = images;
cacheItem.totalCount = totalCount;
cacheItem.timestamp = QDateTime::currentDateTime();
// If this is the first query for this search text, create a new map
if (!m_searchCache.contains(searchText)) {
m_searchCache.insert(searchText, QMap<QPair<int, int>, SearchCacheItem>());
}
// Store results for this specific offset/limit combination
QPair<int, int> cacheKey(offset, limit);
m_searchCache[searchText].insert(cacheKey, cacheItem);
// Limit cache size to avoid memory issues
if (m_searchCache.size() > MAX_CACHE_SIZE) {
// Remove the oldest entry
if (!m_searchCache.isEmpty()) {
QString oldestKey = m_searchCache.firstKey();
m_searchCache.remove(oldestKey);
}
}
// Release mutex before emitting signal
locker.unlock();
// Emit signal with results, including pagination info
QMutexLocker searchLocker(&m_searchMutex);
if (!m_searchCancelled && m_currentSearchText == searchText) {
searchLocker.unlock();
emit searchResultsReady(images, searchText, offset, limit, totalCount);
}
}
void DatabaseManager::cleanupCache()
{
QMutexLocker locker(&m_cacheMutex);
// Get current time
QDateTime now = QDateTime::currentDateTime();
// Expire old search cache items
QMutableMapIterator<QString, QMap<QPair<int, int>, SearchCacheItem>> i(m_searchCache);
while (i.hasNext()) {
i.next();
QMutableMapIterator<QPair<int, int>, SearchCacheItem> j(i.value());
while (j.hasNext()) {
j.next();
if (j.value().timestamp.secsTo(now) > CACHE_LIFETIME_SECS) {
j.remove();
}
}
// If no more results for this search text, remove the entry
if (i.value().isEmpty()) {
i.remove();
}
}
// Expire old all-images cache
if (m_lastCacheUpdate.secsTo(now) > CACHE_LIFETIME_SECS) {
m_allImagesCache.clear();
m_cachedImageCount = -1; // Invalidate count cache
}
}
bool DatabaseManager::initializeFTS()
{
// Check if SQLite has FTS5 support
QSqlQuery query;
query.exec("SELECT sqlite_compileoption_used('ENABLE_FTS5')");
if (!query.next() || !query.value(0).toBool()) {
qDebug() << "FTS5 not available in this SQLite installation";
return false;
}
// Check if our FTS table already exists
query.exec("SELECT name FROM sqlite_master WHERE type='table' AND name='ocr_fts'");
if (!query.next()) {
// Create FTS5 virtual table
qDebug() << "Creating FTS5 virtual table...";
bool success = query.exec(
"CREATE VIRTUAL TABLE IF NOT EXISTS ocr_fts USING fts5("
"ocr_text, "
"content='ocr_results', "
"content_rowid='id', "
"tokenize='porter unicode61');"
);
if (!success) {
qDebug() << "Failed to create FTS5 table:" << query.lastError().text();
return false;
}
// Populate the FTS table from existing data
query.exec("BEGIN TRANSACTION;");
success = query.exec(
"INSERT INTO ocr_fts(rowid, ocr_text) "
"SELECT id, ocr_text FROM ocr_results;"
);
query.exec("COMMIT;");
if (!success) {
qDebug() << "Failed to populate FTS5 table:" << query.lastError().text();
return false;
}
// Create triggers to keep FTS table in sync with ocr_results
success = query.exec(
"CREATE TRIGGER IF NOT EXISTS ocr_fts_insert AFTER INSERT ON ocr_results BEGIN "
" INSERT INTO ocr_fts(rowid, ocr_text) VALUES (new.id, new.ocr_text); "
"END;"
);
if (!success) {
qDebug() << "Failed to create insert trigger:" << query.lastError().text();
return false;
}
success = query.exec(
"CREATE TRIGGER IF NOT EXISTS ocr_fts_delete AFTER DELETE ON ocr_results BEGIN "
" INSERT INTO ocr_fts(ocr_fts, rowid, ocr_text) VALUES('delete', old.id, old.ocr_text); "
"END;"
);
if (!success) {
qDebug() << "Failed to create delete trigger:" << query.lastError().text();
return false;
}
success = query.exec(
"CREATE TRIGGER IF NOT EXISTS ocr_fts_update AFTER UPDATE ON ocr_results BEGIN "
" INSERT INTO ocr_fts(ocr_fts, rowid, ocr_text) VALUES('delete', old.id, old.ocr_text); "
" INSERT INTO ocr_fts(rowid, ocr_text) VALUES (new.id, new.ocr_text); "
"END;"
);
if (!success) {
qDebug() << "Failed to create update trigger:" << query.lastError().text();
return false;
}
}
return true;
}
QString DatabaseManager::prepareFTSQuery(const QString &searchText)
{
// Split the search text into tokens
QStringList tokens = searchText.simplified().split(' ', Qt::SkipEmptyParts);
// For single word searches, search for the word as-is and with a wildcard
if (tokens.size() == 1) {
QString token = tokens.first();
// Use prefix search (words starting with the term)
return QString("%1* OR %1").arg(token);
}
// For multi-word searches
else {
// Build both exact phrase search and individual term search
QStringList tokenQueries;
// Add phrase match (higher relevance)
tokenQueries << QString("\"%1\"").arg(searchText);
// Add individual token matches with wildcards
for (const QString &token : tokens) {
if (token.length() > 2) { // Only use wildcards for tokens with 3+ chars
tokenQueries << QString("%1*").arg(token);
} else {
tokenQueries << token;
}
}
return tokenQueries.join(" OR ");
}
}