Files
unsorted-folder-full-text-s…/uff_app.py

416 lines
15 KiB
Python

import sys
import os
import sqlite3
import pdfplumber
# NEU: Für die Fuzzy-Logik
from rapidfuzz import process, fuzz
from PyQt6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
QHBoxLayout, QLineEdit, QPushButton, QLabel,
QFileDialog, QTextBrowser, QProgressBar, QMessageBox,
QListWidget, QListWidgetItem, QSplitter, QFrame)
from PyQt6.QtCore import Qt, QThread, pyqtSignal, QUrl
from PyQt6.QtGui import QDesktopServices
# --- 1. DATENBANK MANAGER (Mit Fuzzy-Ranking) ---
class DatabaseHandler:
def __init__(self):
# 1. Wir ermitteln den korrekten AppData Ordner für den User
# Windows: C:\Users\Name\AppData\Local\UFF_Search
if os.name == 'nt':
base_dir = os.getenv('LOCALAPPDATA')
else:
# Mac/Linux: ~/.local/share/uff_search
base_dir = os.path.join(os.path.expanduser("~"), ".local", "share")
# 2. Wir erstellen unseren eigenen Unterordner
self.app_data_dir = os.path.join(base_dir, "UFF_Search")
# Falls der Ordner nicht existiert, erstellen wir ihn
if not os.path.exists(self.app_data_dir):
os.makedirs(self.app_data_dir)
# 3. Der Pfad zur Datenbank
self.db_name = os.path.join(self.app_data_dir, "uff_index.db")
# Debug-Info (falls du es im Terminal testest)
print(f"Datenbank Pfad: {self.db_name}")
self.init_db()
def init_db(self):
conn = sqlite3.connect(self.db_name)
cursor = conn.cursor()
cursor.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS documents
USING fts5(filename, path, content);
""")
cursor.execute("""
CREATE TABLE IF NOT EXISTS folders (
path TEXT PRIMARY KEY,
alias TEXT
);
""")
conn.commit()
conn.close()
def add_folder(self, path):
conn = sqlite3.connect(self.db_name)
try:
conn.execute("INSERT OR IGNORE INTO folders (path, alias) VALUES (?, ?)", (path, os.path.basename(path)))
conn.commit()
return True
except:
return False
finally:
conn.close()
def remove_folder(self, path):
conn = sqlite3.connect(self.db_name)
conn.execute("DELETE FROM folders WHERE path = ?", (path,))
conn.execute("DELETE FROM documents WHERE path LIKE ?", (f"{path}%",))
conn.commit()
conn.close()
def get_folders(self):
conn = sqlite3.connect(self.db_name)
rows = conn.execute("SELECT path FROM folders").fetchall()
conn.close()
return [r[0] for r in rows]
def search(self, query):
if not query.strip(): return []
conn = sqlite3.connect(self.db_name)
# 1. Versuch: Strikte Datenbank-Suche (Schnell)
words = query.replace('"', '').split()
# Wir suchen nach "Wort*" -> findet Wortanfänge
sql_query_parts = [f'"{w}"*' for w in words]
sql_query_string = " OR ".join(sql_query_parts)
sql = """
SELECT filename, path, snippet(documents, 2, '<b>', '</b>', '...', 15), content
FROM documents
WHERE documents MATCH ?
LIMIT 200
"""
try:
rows = conn.execute(sql, (sql_query_string,)).fetchall()
except:
rows = []
# 2. Versuch (FALLBACK): Wenn DB nichts findet, laden wir ALLES
# Das ist der "Panic Mode" für starke Tippfehler (wie "vertraaag")
if len(rows) < 5:
# Wir holen einfach mal die ersten 1000 Dokumente ohne Filter
fallback_sql = """
SELECT filename, path, snippet(documents, 2, '<b>', '</b>', '...', 15), content
FROM documents
LIMIT 1000
"""
rows = conn.execute(fallback_sql).fetchall()
conn.close()
# 3. Python Fuzzy Re-Ranking (RapidFuzz)
scored_results = []
for filename, path, snippet, content in rows:
# Wir berechnen Scores mit besserer Gewichtung
score_name = fuzz.WRatio(query.lower(), filename.lower())
# Content-Check: Wir nehmen Content (falls snippet zu kurz ist)
# Begrenzung auf die ersten 5000 Zeichen für Performance
check_content = content[:5000] if content else ""
score_content = fuzz.partial_token_set_ratio(query.lower(), check_content.lower())
# Gewichteter Durchschnitt: Inhalt ist wichtiger als Dateiname
final_score = (score_name * 0.2) + (score_content * 0.8)
# Bonus für exakte Wort-Treffer (jetzt stärker)
if all(w.lower() in (filename + check_content).lower() for w in words):
final_score += 20
# Filter: Nur anzeigen, wenn Score halbwegs okay ist
# Bei "vertraaag" vs "vertrag" ist der Score meist > 70
if final_score > 55:
scored_results.append({
"score": final_score,
"data": (filename, path, snippet)
})
# 4. Sortieren
scored_results.sort(key=lambda x: x["score"], reverse=True)
return [item["data"] for item in scored_results[:50]]
# --- 2. INDEXER (Unverändert) ---
class IndexerThread(QThread):
progress_signal = pyqtSignal(str)
finished_signal = pyqtSignal(int, int, bool)
def __init__(self, folder_path, db_name="uff_index.db"):
super().__init__()
self.folder_path = folder_path
self.db_name = db_name
self.is_running = True
def stop(self):
self.is_running = False
def _extract_text(self, filepath):
ext = os.path.splitext(filepath)[1].lower()
try:
if ext == ".pdf":
with pdfplumber.open(filepath) as pdf:
text = ""
for page in pdf.pages:
if page_text := page.extract_text():
text += page_text + "\n"
return text
elif ext in [".txt", ".md", ".py", ".json", ".csv", ".html", ".log", ".ini", ".xml"]:
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
return f.read()
return None
except:
return None
def run(self):
conn = sqlite3.connect(self.db_name)
conn.execute("DELETE FROM documents WHERE path LIKE ?", (f"{self.folder_path}%",))
conn.commit()
indexed = 0
skipped = 0
was_cancelled = False
for root, dirs, files in os.walk(self.folder_path):
if not self.is_running:
was_cancelled = True
break
for file in files:
if not self.is_running:
was_cancelled = True
break
self.progress_signal.emit(f"Lese: {file}...")
path = os.path.join(root, file)
content = self._extract_text(path)
if content and len(content.strip()) > 0:
conn.execute(
"INSERT INTO documents (filename, path, content) VALUES (?, ?, ?)",
(file, path, content)
)
indexed += 1
else:
skipped += 1
if was_cancelled: break
conn.commit()
conn.close()
self.finished_signal.emit(indexed, skipped, was_cancelled)
# --- 3. UI (Unverändert) ---
class UffWindow(QMainWindow):
def __init__(self):
super().__init__()
self.db = DatabaseHandler()
self.indexer_thread = None
self.initUI()
self.load_saved_folders()
def initUI(self):
self.setWindowTitle("UFF Text Search v3.0 (Fuzzy)")
self.resize(1000, 700)
central = QWidget()
self.setCentralWidget(central)
main_layout = QHBoxLayout(central)
# LINKS
left_panel = QFrame()
left_panel.setFixedWidth(250)
left_layout = QVBoxLayout(left_panel)
left_layout.setContentsMargins(0, 0, 0, 0)
lbl_folders = QLabel("📂 Meine Ordner")
lbl_folders.setStyleSheet("font-weight: bold; font-size: 14px;")
self.folder_list = QListWidget()
self.folder_list.setSelectionMode(QListWidget.SelectionMode.SingleSelection)
btn_add = QPushButton(" + Hinzufügen")
btn_add.clicked.connect(self.add_new_folder)
btn_remove = QPushButton(" - Entfernen")
btn_remove.clicked.connect(self.delete_selected_folder)
self.btn_rescan = QPushButton(" ↻ Neu scannen")
self.btn_rescan.clicked.connect(self.rescan_selected_folder)
self.btn_cancel = QPushButton("🛑 Abbrechen")
self.btn_cancel.setStyleSheet("background-color: #ffcccc; color: #cc0000; font-weight: bold;")
self.btn_cancel.clicked.connect(self.cancel_indexing)
self.btn_cancel.hide()
left_layout.addWidget(lbl_folders)
left_layout.addWidget(self.folder_list)
left_layout.addWidget(btn_add)
left_layout.addWidget(btn_remove)
left_layout.addStretch()
left_layout.addWidget(self.btn_rescan)
left_layout.addWidget(self.btn_cancel)
# RECHTS
right_panel = QWidget()
right_layout = QVBoxLayout(right_panel)
search_container = QHBoxLayout()
self.input_search = QLineEdit()
self.input_search.setPlaceholderText("Suchbegriff... (Fuzzy aktiv)")
self.input_search.returnPressed.connect(self.perform_search)
self.input_search.setStyleSheet("padding: 8px; font-size: 14px;")
btn_go = QPushButton("Suchen")
btn_go.setFixedWidth(100)
btn_go.clicked.connect(self.perform_search)
search_container.addWidget(self.input_search)
search_container.addWidget(btn_go)
self.lbl_status = QLabel("Bereit.")
self.lbl_status.setStyleSheet("color: #666;")
self.progress_bar = QProgressBar()
self.progress_bar.hide()
self.result_browser = QTextBrowser()
self.result_browser.setOpenExternalLinks(False)
self.result_browser.anchorClicked.connect(self.link_clicked)
self.result_browser.setStyleSheet("background-color: white; border: 1px solid #ccc;")
right_layout.addLayout(search_container)
right_layout.addWidget(self.lbl_status)
right_layout.addWidget(self.progress_bar)
right_layout.addWidget(self.result_browser)
splitter = QSplitter(Qt.Orientation.Horizontal)
splitter.addWidget(left_panel)
splitter.addWidget(right_panel)
splitter.setSizes([250, 750])
main_layout.addWidget(splitter)
# LOGIK
def load_saved_folders(self):
self.folder_list.clear()
folders = self.db.get_folders()
for f in folders:
item = QListWidgetItem(f)
item.setToolTip(f)
self.folder_list.addItem(item)
def add_new_folder(self):
folder = QFileDialog.getExistingDirectory(self, "Ordner wählen")
if folder:
if self.db.add_folder(folder):
self.load_saved_folders()
self.start_indexing(folder)
else:
QMessageBox.warning(self, "Info", "Ordner ist bereits vorhanden.")
def delete_selected_folder(self):
item = self.folder_list.currentItem()
if not item: return
path = item.text()
if QMessageBox.question(self, "Löschen", f"Ordner entfernen?\n{path}",
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No) == QMessageBox.StandardButton.Yes:
self.db.remove_folder(path)
self.load_saved_folders()
self.result_browser.clear()
self.lbl_status.setText("Ordner entfernt.")
def rescan_selected_folder(self):
item = self.folder_list.currentItem()
if not item:
QMessageBox.information(self, "Info", "Bitte Ordner links auswählen.")
return
self.start_indexing(item.text())
def start_indexing(self, folder):
self.set_ui_busy(True)
self.lbl_status.setText(f"Starte... {os.path.basename(folder)}")
# HIER WAR DER FEHLER:
# Wir müssen dem Thread explizit sagen, wo die Datenbank liegt!
# self.db.db_name enthält den korrekten Pfad (C:\Users\...\AppData\...)
self.indexer_thread = IndexerThread(folder, db_name=self.db.db_name)
self.indexer_thread.progress_signal.connect(lambda msg: self.lbl_status.setText(msg))
self.indexer_thread.finished_signal.connect(self.indexing_finished)
self.indexer_thread.start()
def cancel_indexing(self):
if self.indexer_thread and self.indexer_thread.isRunning():
self.lbl_status.setText("Breche ab...")
self.indexer_thread.stop()
def indexing_finished(self, indexed, skipped, was_cancelled):
self.set_ui_busy(False)
if was_cancelled:
self.lbl_status.setText(f"Abgebrochen. ({indexed} indiziert).")
QMessageBox.information(self, "Abbruch", f"Vorgang abgebrochen.\nBis dahin indiziert: {indexed}")
else:
self.lbl_status.setText(f"Fertig. {indexed} neu, {skipped} übersprungen.")
QMessageBox.information(self, "Fertig", f"Scan abgeschlossen!\n{indexed} Dateien im Index.")
def set_ui_busy(self, busy):
self.input_search.setEnabled(not busy)
self.folder_list.setEnabled(not busy)
self.btn_rescan.setVisible(not busy)
self.btn_cancel.setVisible(busy)
if busy:
self.progress_bar.setRange(0, 0)
self.progress_bar.show()
else:
self.progress_bar.hide()
def perform_search(self):
query = self.input_search.text()
if not query: return
# Suche ausführen (jetzt mit Fuzzy!)
results = self.db.search(query)
self.lbl_status.setText(f"{len(results)} relevante Treffer.")
html = ""
if not results:
html = "<h3 style='color: gray; text-align: center; margin-top: 20px;'>Nichts gefunden.</h3>"
for filename, filepath, snippet in results:
file_url = QUrl.fromLocalFile(filepath).toString()
html += f"""
<div style='margin-bottom: 10px; padding: 10px; background-color: #f9f9f9; border-left: 4px solid #2980b9;'>
<a href="{file_url}" style='font-size: 16px; font-weight: bold; color: #2980b9; text-decoration: none;'>
{filename}
</a>
<div style='color: #333; margin-top: 5px; font-family: sans-serif; font-size: 13px;'>{snippet}</div>
<div style='color: #999; font-size: 11px; margin-top: 4px;'>{filepath}</div>
</div>
"""
self.result_browser.setHtml(html)
def link_clicked(self, url):
QDesktopServices.openUrl(url)
if __name__ == "__main__":
app = QApplication(sys.argv)
window = UffWindow()
window.show()
sys.exit(app.exec())