import sys import os import sqlite3 import pdfplumber # NEU: Für die Fuzzy-Logik from rapidfuzz import process, fuzz from PyQt6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QLineEdit, QPushButton, QLabel, QFileDialog, QTextBrowser, QProgressBar, QMessageBox, QListWidget, QListWidgetItem, QSplitter, QFrame) from PyQt6.QtCore import Qt, QThread, pyqtSignal, QUrl from PyQt6.QtGui import QDesktopServices # --- 1. DATENBANK MANAGER (Mit Fuzzy-Ranking) --- class DatabaseHandler: def __init__(self): # 1. Wir ermitteln den korrekten AppData Ordner für den User # Windows: C:\Users\Name\AppData\Local\UFF_Search if os.name == 'nt': base_dir = os.getenv('LOCALAPPDATA') else: # Mac/Linux: ~/.local/share/uff_search base_dir = os.path.join(os.path.expanduser("~"), ".local", "share") # 2. Wir erstellen unseren eigenen Unterordner self.app_data_dir = os.path.join(base_dir, "UFF_Search") # Falls der Ordner nicht existiert, erstellen wir ihn if not os.path.exists(self.app_data_dir): os.makedirs(self.app_data_dir) # 3. Der Pfad zur Datenbank self.db_name = os.path.join(self.app_data_dir, "uff_index.db") # Debug-Info (falls du es im Terminal testest) print(f"Datenbank Pfad: {self.db_name}") self.init_db() def init_db(self): conn = sqlite3.connect(self.db_name) cursor = conn.cursor() cursor.execute(""" CREATE VIRTUAL TABLE IF NOT EXISTS documents USING fts5(filename, path, content); """) cursor.execute(""" CREATE TABLE IF NOT EXISTS folders ( path TEXT PRIMARY KEY, alias TEXT ); """) conn.commit() conn.close() def add_folder(self, path): conn = sqlite3.connect(self.db_name) try: conn.execute("INSERT OR IGNORE INTO folders (path, alias) VALUES (?, ?)", (path, os.path.basename(path))) conn.commit() return True except: return False finally: conn.close() def remove_folder(self, path): conn = sqlite3.connect(self.db_name) conn.execute("DELETE FROM folders WHERE path = ?", (path,)) conn.execute("DELETE FROM documents WHERE path LIKE ?", (f"{path}%",)) conn.commit() conn.close() def get_folders(self): conn = sqlite3.connect(self.db_name) rows = conn.execute("SELECT path FROM folders").fetchall() conn.close() return [r[0] for r in rows] def search(self, query): if not query.strip(): return [] conn = sqlite3.connect(self.db_name) # 1. Versuch: Strikte Datenbank-Suche (Schnell) words = query.replace('"', '').split() # Wir suchen nach "Wort*" -> findet Wortanfänge sql_query_parts = [f'"{w}"*' for w in words] sql_query_string = " OR ".join(sql_query_parts) sql = """ SELECT filename, path, snippet(documents, 2, '', '', '...', 15), content FROM documents WHERE documents MATCH ? LIMIT 200 """ try: rows = conn.execute(sql, (sql_query_string,)).fetchall() except: rows = [] # 2. Versuch (FALLBACK): Wenn DB nichts findet, laden wir ALLES # Das ist der "Panic Mode" für starke Tippfehler (wie "vertraaag") if len(rows) < 5: # Wir holen einfach mal die ersten 1000 Dokumente ohne Filter fallback_sql = """ SELECT filename, path, snippet(documents, 2, '', '', '...', 15), content FROM documents LIMIT 1000 """ rows = conn.execute(fallback_sql).fetchall() conn.close() # 3. Python Fuzzy Re-Ranking (RapidFuzz) scored_results = [] for filename, path, snippet, content in rows: # Wir berechnen Scores mit besserer Gewichtung score_name = fuzz.WRatio(query.lower(), filename.lower()) # Content-Check: Wir nehmen Content (falls snippet zu kurz ist) # Begrenzung auf die ersten 5000 Zeichen für Performance check_content = content[:5000] if content else "" score_content = fuzz.partial_token_set_ratio(query.lower(), check_content.lower()) # Gewichteter Durchschnitt: Inhalt ist wichtiger als Dateiname final_score = (score_name * 0.2) + (score_content * 0.8) # Bonus für exakte Wort-Treffer (jetzt stärker) if all(w.lower() in (filename + check_content).lower() for w in words): final_score += 20 # Filter: Nur anzeigen, wenn Score halbwegs okay ist # Bei "vertraaag" vs "vertrag" ist der Score meist > 70 if final_score > 55: scored_results.append({ "score": final_score, "data": (filename, path, snippet) }) # 4. Sortieren scored_results.sort(key=lambda x: x["score"], reverse=True) return [item["data"] for item in scored_results[:50]] # --- 2. INDEXER (Unverändert) --- class IndexerThread(QThread): progress_signal = pyqtSignal(str) finished_signal = pyqtSignal(int, int, bool) def __init__(self, folder_path, db_name="uff_index.db"): super().__init__() self.folder_path = folder_path self.db_name = db_name self.is_running = True def stop(self): self.is_running = False def _extract_text(self, filepath): ext = os.path.splitext(filepath)[1].lower() try: if ext == ".pdf": with pdfplumber.open(filepath) as pdf: text = "" for page in pdf.pages: if page_text := page.extract_text(): text += page_text + "\n" return text elif ext in [".txt", ".md", ".py", ".json", ".csv", ".html", ".log", ".ini", ".xml"]: with open(filepath, "r", encoding="utf-8", errors="ignore") as f: return f.read() return None except: return None def run(self): conn = sqlite3.connect(self.db_name) conn.execute("DELETE FROM documents WHERE path LIKE ?", (f"{self.folder_path}%",)) conn.commit() indexed = 0 skipped = 0 was_cancelled = False for root, dirs, files in os.walk(self.folder_path): if not self.is_running: was_cancelled = True break for file in files: if not self.is_running: was_cancelled = True break self.progress_signal.emit(f"Lese: {file}...") path = os.path.join(root, file) content = self._extract_text(path) if content and len(content.strip()) > 0: conn.execute( "INSERT INTO documents (filename, path, content) VALUES (?, ?, ?)", (file, path, content) ) indexed += 1 else: skipped += 1 if was_cancelled: break conn.commit() conn.close() self.finished_signal.emit(indexed, skipped, was_cancelled) # --- 3. UI (Unverändert) --- class UffWindow(QMainWindow): def __init__(self): super().__init__() self.db = DatabaseHandler() self.indexer_thread = None self.initUI() self.load_saved_folders() def initUI(self): self.setWindowTitle("UFF Text Search v3.0 (Fuzzy)") self.resize(1000, 700) central = QWidget() self.setCentralWidget(central) main_layout = QHBoxLayout(central) # LINKS left_panel = QFrame() left_panel.setFixedWidth(250) left_layout = QVBoxLayout(left_panel) left_layout.setContentsMargins(0, 0, 0, 0) lbl_folders = QLabel("📂 Meine Ordner") lbl_folders.setStyleSheet("font-weight: bold; font-size: 14px;") self.folder_list = QListWidget() self.folder_list.setSelectionMode(QListWidget.SelectionMode.SingleSelection) btn_add = QPushButton(" + Hinzufügen") btn_add.clicked.connect(self.add_new_folder) btn_remove = QPushButton(" - Entfernen") btn_remove.clicked.connect(self.delete_selected_folder) self.btn_rescan = QPushButton(" ↻ Neu scannen") self.btn_rescan.clicked.connect(self.rescan_selected_folder) self.btn_cancel = QPushButton("🛑 Abbrechen") self.btn_cancel.setStyleSheet("background-color: #ffcccc; color: #cc0000; font-weight: bold;") self.btn_cancel.clicked.connect(self.cancel_indexing) self.btn_cancel.hide() left_layout.addWidget(lbl_folders) left_layout.addWidget(self.folder_list) left_layout.addWidget(btn_add) left_layout.addWidget(btn_remove) left_layout.addStretch() left_layout.addWidget(self.btn_rescan) left_layout.addWidget(self.btn_cancel) # RECHTS right_panel = QWidget() right_layout = QVBoxLayout(right_panel) search_container = QHBoxLayout() self.input_search = QLineEdit() self.input_search.setPlaceholderText("Suchbegriff... (Fuzzy aktiv)") self.input_search.returnPressed.connect(self.perform_search) self.input_search.setStyleSheet("padding: 8px; font-size: 14px;") btn_go = QPushButton("Suchen") btn_go.setFixedWidth(100) btn_go.clicked.connect(self.perform_search) search_container.addWidget(self.input_search) search_container.addWidget(btn_go) self.lbl_status = QLabel("Bereit.") self.lbl_status.setStyleSheet("color: #666;") self.progress_bar = QProgressBar() self.progress_bar.hide() self.result_browser = QTextBrowser() self.result_browser.setOpenExternalLinks(False) self.result_browser.anchorClicked.connect(self.link_clicked) self.result_browser.setStyleSheet("background-color: white; border: 1px solid #ccc;") right_layout.addLayout(search_container) right_layout.addWidget(self.lbl_status) right_layout.addWidget(self.progress_bar) right_layout.addWidget(self.result_browser) splitter = QSplitter(Qt.Orientation.Horizontal) splitter.addWidget(left_panel) splitter.addWidget(right_panel) splitter.setSizes([250, 750]) main_layout.addWidget(splitter) # LOGIK def load_saved_folders(self): self.folder_list.clear() folders = self.db.get_folders() for f in folders: item = QListWidgetItem(f) item.setToolTip(f) self.folder_list.addItem(item) def add_new_folder(self): folder = QFileDialog.getExistingDirectory(self, "Ordner wählen") if folder: if self.db.add_folder(folder): self.load_saved_folders() self.start_indexing(folder) else: QMessageBox.warning(self, "Info", "Ordner ist bereits vorhanden.") def delete_selected_folder(self): item = self.folder_list.currentItem() if not item: return path = item.text() if QMessageBox.question(self, "Löschen", f"Ordner entfernen?\n{path}", QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No) == QMessageBox.StandardButton.Yes: self.db.remove_folder(path) self.load_saved_folders() self.result_browser.clear() self.lbl_status.setText("Ordner entfernt.") def rescan_selected_folder(self): item = self.folder_list.currentItem() if not item: QMessageBox.information(self, "Info", "Bitte Ordner links auswählen.") return self.start_indexing(item.text()) def start_indexing(self, folder): self.set_ui_busy(True) self.lbl_status.setText(f"Starte... {os.path.basename(folder)}") # HIER WAR DER FEHLER: # Wir müssen dem Thread explizit sagen, wo die Datenbank liegt! # self.db.db_name enthält den korrekten Pfad (C:\Users\...\AppData\...) self.indexer_thread = IndexerThread(folder, db_name=self.db.db_name) self.indexer_thread.progress_signal.connect(lambda msg: self.lbl_status.setText(msg)) self.indexer_thread.finished_signal.connect(self.indexing_finished) self.indexer_thread.start() def cancel_indexing(self): if self.indexer_thread and self.indexer_thread.isRunning(): self.lbl_status.setText("Breche ab...") self.indexer_thread.stop() def indexing_finished(self, indexed, skipped, was_cancelled): self.set_ui_busy(False) if was_cancelled: self.lbl_status.setText(f"Abgebrochen. ({indexed} indiziert).") QMessageBox.information(self, "Abbruch", f"Vorgang abgebrochen.\nBis dahin indiziert: {indexed}") else: self.lbl_status.setText(f"Fertig. {indexed} neu, {skipped} übersprungen.") QMessageBox.information(self, "Fertig", f"Scan abgeschlossen!\n{indexed} Dateien im Index.") def set_ui_busy(self, busy): self.input_search.setEnabled(not busy) self.folder_list.setEnabled(not busy) self.btn_rescan.setVisible(not busy) self.btn_cancel.setVisible(busy) if busy: self.progress_bar.setRange(0, 0) self.progress_bar.show() else: self.progress_bar.hide() def perform_search(self): query = self.input_search.text() if not query: return # Suche ausführen (jetzt mit Fuzzy!) results = self.db.search(query) self.lbl_status.setText(f"{len(results)} relevante Treffer.") html = "" if not results: html = "