Remove outdated UFF-Search.spec file and update requirements.txt to replace pypdf with pdfplumber and pdfminer.six; enhance scoring logic in uff_app.py for improved search accuracy.
This commit is contained in:
@@ -1,38 +0,0 @@
|
|||||||
# -*- mode: python ; coding: utf-8 -*-
|
|
||||||
|
|
||||||
|
|
||||||
a = Analysis(
|
|
||||||
['uff_app.py'],
|
|
||||||
pathex=[],
|
|
||||||
binaries=[],
|
|
||||||
datas=[],
|
|
||||||
hiddenimports=['rapidfuzz', 'pypdf'],
|
|
||||||
hookspath=[],
|
|
||||||
hooksconfig={},
|
|
||||||
runtime_hooks=[],
|
|
||||||
excludes=[],
|
|
||||||
noarchive=False,
|
|
||||||
optimize=0,
|
|
||||||
)
|
|
||||||
pyz = PYZ(a.pure)
|
|
||||||
|
|
||||||
exe = EXE(
|
|
||||||
pyz,
|
|
||||||
a.scripts,
|
|
||||||
a.binaries,
|
|
||||||
a.datas,
|
|
||||||
[],
|
|
||||||
name='UFF-Search',
|
|
||||||
debug=False,
|
|
||||||
bootloader_ignore_signals=False,
|
|
||||||
strip=False,
|
|
||||||
upx=True,
|
|
||||||
upx_exclude=[],
|
|
||||||
runtime_tmpdir=None,
|
|
||||||
console=False,
|
|
||||||
disable_windowed_traceback=False,
|
|
||||||
argv_emulation=False,
|
|
||||||
target_arch=None,
|
|
||||||
codesign_identity=None,
|
|
||||||
entitlements_file=None,
|
|
||||||
)
|
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
pypdf
|
pdfplumber
|
||||||
|
pdfminer.six
|
||||||
rapidfuzz
|
rapidfuzz
|
||||||
PyQt6
|
PyQt6
|
||||||
24
uff_app.py
24
uff_app.py
@@ -1,7 +1,7 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from pypdf import PdfReader
|
import pdfplumber
|
||||||
|
|
||||||
# NEU: Für die Fuzzy-Logik
|
# NEU: Für die Fuzzy-Logik
|
||||||
from rapidfuzz import process, fuzz
|
from rapidfuzz import process, fuzz
|
||||||
@@ -120,19 +120,20 @@ class DatabaseHandler:
|
|||||||
scored_results = []
|
scored_results = []
|
||||||
|
|
||||||
for filename, path, snippet, content in rows:
|
for filename, path, snippet, content in rows:
|
||||||
# Wir berechnen Scores
|
# Wir berechnen Scores mit besserer Gewichtung
|
||||||
score_name = fuzz.partial_ratio(query.lower(), filename.lower())
|
score_name = fuzz.WRatio(query.lower(), filename.lower())
|
||||||
|
|
||||||
# Content-Check: Wir nehmen Content (falls snippet zu kurz ist)
|
# Content-Check: Wir nehmen Content (falls snippet zu kurz ist)
|
||||||
# Begrenzung auf die ersten 5000 Zeichen für Performance
|
# Begrenzung auf die ersten 5000 Zeichen für Performance
|
||||||
check_content = content[:5000] if content else ""
|
check_content = content[:5000] if content else ""
|
||||||
score_content = fuzz.partial_token_set_ratio(query.lower(), check_content.lower())
|
score_content = fuzz.partial_token_set_ratio(query.lower(), check_content.lower())
|
||||||
|
|
||||||
final_score = max(score_name, score_content)
|
# Gewichteter Durchschnitt: Inhalt ist wichtiger als Dateiname
|
||||||
|
final_score = (score_name * 0.2) + (score_content * 0.8)
|
||||||
|
|
||||||
# Bonus für exakte Wort-Treffer
|
# Bonus für exakte Wort-Treffer (jetzt stärker)
|
||||||
if all(w.lower() in (filename + check_content).lower() for w in words):
|
if all(w.lower() in (filename + check_content).lower() for w in words):
|
||||||
final_score += 10
|
final_score += 20
|
||||||
|
|
||||||
# Filter: Nur anzeigen, wenn Score halbwegs okay ist
|
# Filter: Nur anzeigen, wenn Score halbwegs okay ist
|
||||||
# Bei "vertraaag" vs "vertrag" ist der Score meist > 70
|
# Bei "vertraaag" vs "vertrag" ist der Score meist > 70
|
||||||
@@ -166,11 +167,12 @@ class IndexerThread(QThread):
|
|||||||
ext = os.path.splitext(filepath)[1].lower()
|
ext = os.path.splitext(filepath)[1].lower()
|
||||||
try:
|
try:
|
||||||
if ext == ".pdf":
|
if ext == ".pdf":
|
||||||
reader = PdfReader(filepath)
|
with pdfplumber.open(filepath) as pdf:
|
||||||
text = ""
|
text = ""
|
||||||
for page in reader.pages:
|
for page in pdf.pages:
|
||||||
if page_text := page.extract_text(): text += page_text + "\n"
|
if page_text := page.extract_text():
|
||||||
return text
|
text += page_text + "\n"
|
||||||
|
return text
|
||||||
elif ext in [".txt", ".md", ".py", ".json", ".csv", ".html", ".log", ".ini", ".xml"]:
|
elif ext in [".txt", ".md", ".py", ".json", ".csv", ".html", ".log", ".ini", ".xml"]:
|
||||||
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|||||||
Reference in New Issue
Block a user