Remove outdated UFF-Search.spec file and update requirements.txt to replace pypdf with pdfplumber and pdfminer.six; enhance scoring logic in uff_app.py for improved search accuracy.

This commit is contained in:
2026-01-09 16:09:33 +01:00
parent 896534a91f
commit 81c7b0060f
3 changed files with 15 additions and 50 deletions

View File

@@ -1,38 +0,0 @@
# -*- mode: python ; coding: utf-8 -*-
a = Analysis(
['uff_app.py'],
pathex=[],
binaries=[],
datas=[],
hiddenimports=['rapidfuzz', 'pypdf'],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[],
noarchive=False,
optimize=0,
)
pyz = PYZ(a.pure)
exe = EXE(
pyz,
a.scripts,
a.binaries,
a.datas,
[],
name='UFF-Search',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
upx_exclude=[],
runtime_tmpdir=None,
console=False,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)

View File

@@ -1,3 +1,4 @@
pypdf
pdfplumber
pdfminer.six
rapidfuzz
PyQt6

View File

@@ -1,7 +1,7 @@
import sys
import os
import sqlite3
from pypdf import PdfReader
import pdfplumber
# NEU: Für die Fuzzy-Logik
from rapidfuzz import process, fuzz
@@ -120,19 +120,20 @@ class DatabaseHandler:
scored_results = []
for filename, path, snippet, content in rows:
# Wir berechnen Scores
score_name = fuzz.partial_ratio(query.lower(), filename.lower())
# Wir berechnen Scores mit besserer Gewichtung
score_name = fuzz.WRatio(query.lower(), filename.lower())
# Content-Check: Wir nehmen Content (falls snippet zu kurz ist)
# Begrenzung auf die ersten 5000 Zeichen für Performance
check_content = content[:5000] if content else ""
score_content = fuzz.partial_token_set_ratio(query.lower(), check_content.lower())
final_score = max(score_name, score_content)
# Gewichteter Durchschnitt: Inhalt ist wichtiger als Dateiname
final_score = (score_name * 0.2) + (score_content * 0.8)
# Bonus für exakte Wort-Treffer
# Bonus für exakte Wort-Treffer (jetzt stärker)
if all(w.lower() in (filename + check_content).lower() for w in words):
final_score += 10
final_score += 20
# Filter: Nur anzeigen, wenn Score halbwegs okay ist
# Bei "vertraaag" vs "vertrag" ist der Score meist > 70
@@ -166,10 +167,11 @@ class IndexerThread(QThread):
ext = os.path.splitext(filepath)[1].lower()
try:
if ext == ".pdf":
reader = PdfReader(filepath)
with pdfplumber.open(filepath) as pdf:
text = ""
for page in reader.pages:
if page_text := page.extract_text(): text += page_text + "\n"
for page in pdf.pages:
if page_text := page.extract_text():
text += page_text + "\n"
return text
elif ext in [".txt", ".md", ".py", ".json", ".csv", ".html", ".log", ".ini", ".xml"]:
with open(filepath, "r", encoding="utf-8", errors="ignore") as f: