127 lines
5.5 KiB
Python
127 lines
5.5 KiB
Python
"""
|
||
services/declaration_service.py
|
||
为每个目录(章节)生成一个"声明"。
|
||
|
||
声明:一段说明该章节应写什么、结构与约束的撰写指引,存入
|
||
report_template_sections.section_prompt。
|
||
|
||
优先用 LLM(结合章节标题 + 该章节正文)生成;未配置或失败时
|
||
回退到确定性模板,保证流程稳定可用。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
import re
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
|
||
from config import settings
|
||
from services.llm_client import chat_completions_json, llm_configured
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
_SYSTEM_PROMPT = (
|
||
"你是报告模板专家。任务:阅读给定章节的范文(参考正文),总结这一章应该怎么写,"
|
||
"作为后续报告撰写该章节的写作指引。需提炼:①内容要点(写哪些事项);"
|
||
"②组织结构(应有的小节/条目顺序);③数据与口径要求(需引用的对比/指标/表格等);"
|
||
"④写作约束(先事实后评价、缺失写「待补充」、不得编造)。"
|
||
"严格要求:不要输出任何思考过程或解释;只输出 JSON 对象 {\"guide\": \"...\"};"
|
||
"guide 为 300 字以内的写作指引纯文本(不含 markdown 标题);"
|
||
"范文缺失或过短时,按章节标题给出通用写作指引。"
|
||
)
|
||
|
||
|
||
def _strip_number_prefix(title: str) -> str:
|
||
t = str(title or "").strip()
|
||
t = re.sub(r"^(?:\d+(?:\.\d+)*|[一二三四五六七八九十]+[、..])\s*", "", t).strip()
|
||
return t
|
||
|
||
|
||
def _fallback_declaration(section_title: str) -> str:
|
||
label = _strip_number_prefix(section_title) or "本章节"
|
||
return (
|
||
f"本章节为「{label}」。撰写时应紧扣标题主题,先陈述事实与数据,再给出分析与评价;"
|
||
f"结构需与标题保持一致,条理清晰、用语规范;"
|
||
f"所有结论须有依据,缺失信息写「待补充」,禁止编造。"
|
||
)
|
||
|
||
|
||
def _build_user_prompt(section_title: str, content: str) -> str:
|
||
body = (content or "").strip()
|
||
if len(body) > 2500:
|
||
body = body[:2500]
|
||
body_block = f"\n\n该章节范文(参考正文,节选):\n```\n{body}\n```" if body else ""
|
||
return (
|
||
f"章节标题:{section_title}{body_block}\n\n"
|
||
f"请根据上述范文,总结该章节应该怎么写,并只返回 JSON:{{\"guide\": \"300字以内的写作指引\"}}。"
|
||
)
|
||
|
||
|
||
def generate_declaration(section_title: str, content: str = "") -> str:
|
||
"""根据范文为单个章节生成"怎么写"的写作指引(JSON 取 guide,自动剔除思考过程)。"""
|
||
use_llm = bool(getattr(settings, "DECLARATION_USE_LLM", True)) and llm_configured()
|
||
if not use_llm:
|
||
return _fallback_declaration(section_title)
|
||
try:
|
||
data = chat_completions_json(
|
||
system_prompt=_SYSTEM_PROMPT,
|
||
user_prompt=_build_user_prompt(section_title, content),
|
||
temperature=0.2,
|
||
max_tokens=2048,
|
||
)
|
||
guide = str((data or {}).get("guide") or "").strip()
|
||
if guide:
|
||
return guide
|
||
except Exception as e: # noqa: BLE001 - 兜底,保证主流程不被 LLM 影响
|
||
logger.warning("生成章节声明失败,使用兜底模板 | title=%s | err=%s", section_title, e)
|
||
return _fallback_declaration(section_title)
|
||
|
||
|
||
def _content_for_section(s: dict, content_by_key: dict[str, str]) -> str:
|
||
"""目录键可能是 canonical 形式,优先用标题中的编号前缀去匹配正文。"""
|
||
title = str(s.get("sectionTitle") or "")
|
||
m = re.match(r"^(\d+(?:\.\d+)*)", title.strip())
|
||
num = m.group(1) if m else ""
|
||
return content_by_key.get(num, "") or content_by_key.get(str(s.get("sectionKey") or ""), "")
|
||
|
||
|
||
def generate_declarations(sections: list[dict], content_by_key: dict[str, str] | None = None) -> list[str]:
|
||
"""
|
||
为目录中每个章节并发生成"怎么写"的写作指引(基于范文)。
|
||
sections: [{sectionKey, sectionTitle}, ...]
|
||
content_by_key: 章节编号/键 -> 范文正文,用于为指引提供上下文(可选)。
|
||
|
||
每章一次 LLM 调用,多线程并发以打满 GPU(LLM 为网络 I/O,线程下真正并行)。
|
||
"""
|
||
content_by_key = content_by_key or {}
|
||
tasks = [(str(s.get("sectionTitle") or ""), _content_for_section(s, content_by_key)) for s in sections]
|
||
if not tasks:
|
||
return []
|
||
|
||
use_llm = bool(getattr(settings, "DECLARATION_USE_LLM", True)) and llm_configured()
|
||
if not use_llm:
|
||
return [_fallback_declaration(title) for title, _ in tasks]
|
||
|
||
max_workers = max(int(getattr(settings, "TEMPLATE_UPLOAD_LLM_MAX_WORKERS", 8) or 8), 1)
|
||
results: list[str] = [""] * len(tasks)
|
||
|
||
if len(tasks) == 1:
|
||
results[0] = generate_declaration(*tasks[0])
|
||
return results
|
||
|
||
workers = min(max_workers, len(tasks))
|
||
with ThreadPoolExecutor(max_workers=workers) as executor:
|
||
future_to_idx = {
|
||
executor.submit(generate_declaration, title, content): i
|
||
for i, (title, content) in enumerate(tasks)
|
||
}
|
||
for fut in as_completed(future_to_idx):
|
||
idx = future_to_idx[fut]
|
||
try:
|
||
results[idx] = fut.result()
|
||
except Exception as e: # noqa: BLE001
|
||
logger.warning("章节声明并发生成失败,使用兜底 | idx=%s | err=%s", idx, e)
|
||
results[idx] = _fallback_declaration(tasks[idx][0])
|
||
logger.info("章节声明生成 | 章节=%s | 线程=%s", len(tasks), workers)
|
||
return results
|