report_generation/services/kb_service.py
xxy aa98ea2623 @
Initial commit

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
@
2026-06-05 18:45:29 +08:00

81 lines
2.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
services/kb_service.py瘦身版
仅保留报告生成「附图提取」所需的知识库文档磁盘路径解析助手:
从 eval_report 的完整 kb_service.py 中抽取,去除知识库 CRUD / 上传 / worker 等无关逻辑。
"""
from __future__ import annotations
from pathlib import Path
from typing import List, Optional
from config import settings
from database.models import KbDocument as KbDocumentModel
def _normalize_rel_path(path: str) -> str:
"""'a\\b\\c' 规范为 'a/b/c',并去掉前导 '/'"""
s = str(path or "").replace("\\", "/").strip()
while s.startswith("./"):
s = s[2:]
return s.lstrip("/")
def _kb_doc_storage_rel_path(
file_path_dir: Optional[str],
basename: str,
storage_rel_path: Optional[str] = None,
) -> str:
"""项目目录下的相对存储路径(含文件名)。优先 storage_rel_pathconfirm 时写入)。"""
stored = _normalize_rel_path(str(storage_rel_path or ""))
if stored:
return stored
d = _normalize_rel_path(str(file_path_dir or ""))
bn = str(basename or "").strip()
if d and bn:
return f"{d}/{bn}"
return bn or d
def _kb_doc_path_candidates_for_model(doc_root: Path, doc: KbDocumentModel) -> List[Path]:
"""解析磁盘路径时的候选列表(按优先级)。"""
rel = _kb_doc_storage_rel_path(
doc.file_path,
doc.name,
getattr(doc, "storage_rel_path", None),
)
candidates: List[Path] = []
if rel:
candidates.append((doc_root / doc.project_id / rel).resolve())
name = str(doc.name or "").strip()
fp_dir = _normalize_rel_path(str(doc.file_path or ""))
if fp_dir and name:
candidates.append((doc_root / doc.project_id / fp_dir / name).resolve())
if name:
candidates.append((doc_root / doc.project_id / name).resolve())
if not candidates:
candidates.append((doc_root / doc.project_id / "_missing_").resolve())
deduped: List[Path] = []
seen: set[str] = set()
for p in candidates:
key = str(p)
if key in seen:
continue
seen.add(key)
deduped.append(p)
return deduped
def _kb_doc_absolute_file_path_for_model(doc_root: Path, doc: KbDocumentModel) -> Path:
for p in _kb_doc_path_candidates_for_model(doc_root, doc):
if p.is_file():
return p
return _kb_doc_path_candidates_for_model(doc_root, doc)[0]
def _kb_doc_file_exists_for_model(doc: KbDocumentModel) -> bool:
"""文档在磁盘上是否可读(多路径回退,兼容历史 file_path/name 组合)。"""
doc_root = Path(settings.DOC_PAT).resolve()
return any(p.is_file() for p in _kb_doc_path_candidates_for_model(doc_root, doc))