81 lines
2.7 KiB
Python
81 lines
2.7 KiB
Python
"""
|
||
services/kb_service.py(瘦身版)
|
||
|
||
仅保留报告生成「附图提取」所需的知识库文档磁盘路径解析助手:
|
||
从 eval_report 的完整 kb_service.py 中抽取,去除知识库 CRUD / 上传 / worker 等无关逻辑。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from pathlib import Path
|
||
from typing import List, Optional
|
||
|
||
from config import settings
|
||
from database.models import KbDocument as KbDocumentModel
|
||
|
||
|
||
def _normalize_rel_path(path: str) -> str:
|
||
"""将 'a\\b\\c' 规范为 'a/b/c',并去掉前导 '/'。"""
|
||
s = str(path or "").replace("\\", "/").strip()
|
||
while s.startswith("./"):
|
||
s = s[2:]
|
||
return s.lstrip("/")
|
||
|
||
|
||
def _kb_doc_storage_rel_path(
|
||
file_path_dir: Optional[str],
|
||
basename: str,
|
||
storage_rel_path: Optional[str] = None,
|
||
) -> str:
|
||
"""项目目录下的相对存储路径(含文件名)。优先 storage_rel_path(confirm 时写入)。"""
|
||
stored = _normalize_rel_path(str(storage_rel_path or ""))
|
||
if stored:
|
||
return stored
|
||
d = _normalize_rel_path(str(file_path_dir or ""))
|
||
bn = str(basename or "").strip()
|
||
if d and bn:
|
||
return f"{d}/{bn}"
|
||
return bn or d
|
||
|
||
|
||
def _kb_doc_path_candidates_for_model(doc_root: Path, doc: KbDocumentModel) -> List[Path]:
|
||
"""解析磁盘路径时的候选列表(按优先级)。"""
|
||
rel = _kb_doc_storage_rel_path(
|
||
doc.file_path,
|
||
doc.name,
|
||
getattr(doc, "storage_rel_path", None),
|
||
)
|
||
candidates: List[Path] = []
|
||
if rel:
|
||
candidates.append((doc_root / doc.project_id / rel).resolve())
|
||
name = str(doc.name or "").strip()
|
||
fp_dir = _normalize_rel_path(str(doc.file_path or ""))
|
||
if fp_dir and name:
|
||
candidates.append((doc_root / doc.project_id / fp_dir / name).resolve())
|
||
if name:
|
||
candidates.append((doc_root / doc.project_id / name).resolve())
|
||
if not candidates:
|
||
candidates.append((doc_root / doc.project_id / "_missing_").resolve())
|
||
deduped: List[Path] = []
|
||
seen: set[str] = set()
|
||
for p in candidates:
|
||
key = str(p)
|
||
if key in seen:
|
||
continue
|
||
seen.add(key)
|
||
deduped.append(p)
|
||
return deduped
|
||
|
||
|
||
def _kb_doc_absolute_file_path_for_model(doc_root: Path, doc: KbDocumentModel) -> Path:
|
||
for p in _kb_doc_path_candidates_for_model(doc_root, doc):
|
||
if p.is_file():
|
||
return p
|
||
return _kb_doc_path_candidates_for_model(doc_root, doc)[0]
|
||
|
||
|
||
def _kb_doc_file_exists_for_model(doc: KbDocumentModel) -> bool:
|
||
"""文档在磁盘上是否可读(多路径回退,兼容历史 file_path/name 组合)。"""
|
||
doc_root = Path(settings.DOC_PAT).resolve()
|
||
return any(p.is_file() for p in _kb_doc_path_candidates_for_model(doc_root, doc))
|