From 81c7b0060f64cf5c3b656a25adfffaf4b3873caa Mon Sep 17 00:00:00 2001
From: Konstantin <konstantin.rossmann@gmail.com>
Date: Fri, 9 Jan 2026 16:09:33 +0100
Subject: [PATCH] Remove outdated UFF-Search.spec file and update
 requirements.txt to replace pypdf with pdfplumber and pdfminer.six; enhance
 scoring logic in uff_app.py for improved search accuracy.

---
 UFF-Search.spec  | 38 --------------------------------------
 requirements.txt |  3 ++-
 uff_app.py       | 24 +++++++++++++-----------
 3 files changed, 15 insertions(+), 50 deletions(-)
 delete mode 100644 UFF-Search.spec

diff --git a/UFF-Search.spec b/UFF-Search.spec
deleted file mode 100644
index 1563e28..0000000
--- a/UFF-Search.spec
+++ /dev/null
@@ -1,38 +0,0 @@
-# -*- mode: python ; coding: utf-8 -*-
-
-
-a = Analysis(
-    ['uff_app.py'],
-    pathex=[],
-    binaries=[],
-    datas=[],
-    hiddenimports=['rapidfuzz', 'pypdf'],
-    hookspath=[],
-    hooksconfig={},
-    runtime_hooks=[],
-    excludes=[],
-    noarchive=False,
-    optimize=0,
-)
-pyz = PYZ(a.pure)
-
-exe = EXE(
-    pyz,
-    a.scripts,
-    a.binaries,
-    a.datas,
-    [],
-    name='UFF-Search',
-    debug=False,
-    bootloader_ignore_signals=False,
-    strip=False,
-    upx=True,
-    upx_exclude=[],
-    runtime_tmpdir=None,
-    console=False,
-    disable_windowed_traceback=False,
-    argv_emulation=False,
-    target_arch=None,
-    codesign_identity=None,
-    entitlements_file=None,
-)
diff --git a/requirements.txt b/requirements.txt
index 6bb09ce..392d738 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
-pypdf
+pdfplumber
+pdfminer.six
 rapidfuzz
 PyQt6
\ No newline at end of file
diff --git a/uff_app.py b/uff_app.py
index b83d239..7b995ef 100644
--- a/uff_app.py
+++ b/uff_app.py
@@ -1,7 +1,7 @@
 import sys
 import os
 import sqlite3
-from pypdf import PdfReader
+import pdfplumber
 
 # NEU: Für die Fuzzy-Logik
 from rapidfuzz import process, fuzz
@@ -120,19 +120,20 @@ class DatabaseHandler:
         scored_results = []
         
         for filename, path, snippet, content in rows:
-            # Wir berechnen Scores
-            score_name = fuzz.partial_ratio(query.lower(), filename.lower())
+            # Wir berechnen Scores mit besserer Gewichtung
+            score_name = fuzz.WRatio(query.lower(), filename.lower())
             
             # Content-Check: Wir nehmen Content (falls snippet zu kurz ist)
             # Begrenzung auf die ersten 5000 Zeichen für Performance
             check_content = content[:5000] if content else ""
             score_content = fuzz.partial_token_set_ratio(query.lower(), check_content.lower())
             
-            final_score = max(score_name, score_content)
+            # Gewichteter Durchschnitt: Inhalt ist wichtiger als Dateiname
+            final_score = (score_name * 0.2) + (score_content * 0.8)
             
-            # Bonus für exakte Wort-Treffer
+            # Bonus für exakte Wort-Treffer (jetzt stärker)
             if all(w.lower() in (filename + check_content).lower() for w in words):
-                final_score += 10
+                final_score += 20
             
             # Filter: Nur anzeigen, wenn Score halbwegs okay ist
             # Bei "vertraaag" vs "vertrag" ist der Score meist > 70
@@ -166,11 +167,12 @@ class IndexerThread(QThread):
         ext = os.path.splitext(filepath)[1].lower()
         try:
             if ext == ".pdf":
-                reader = PdfReader(filepath)
-                text = ""
-                for page in reader.pages:
-                    if page_text := page.extract_text(): text += page_text + "\n"
-                return text
+                with pdfplumber.open(filepath) as pdf:
+                    text = ""
+                    for page in pdf.pages:
+                        if page_text := page.extract_text():
+                            text += page_text + "\n"
+                    return text
             elif ext in [".txt", ".md", ".py", ".json", ".csv", ".html", ".log", ".ini", ".xml"]:
                 with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
                     return f.read()