section_reference_block/services/declaration_service.py
xxy 43f3e0b746 Initial commit
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 18:41:06 +08:00

127 lines
5.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
services/declaration_service.py
为每个目录(章节)生成一个"声明"
声明:一段说明该章节应写什么、结构与约束的撰写指引,存入
report_template_sections.section_prompt。
优先用 LLM结合章节标题 + 该章节正文)生成;未配置或失败时
回退到确定性模板,保证流程稳定可用。
"""
from __future__ import annotations
import logging
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from config import settings
from services.llm_client import chat_completions_json, llm_configured
logger = logging.getLogger(__name__)
_SYSTEM_PROMPT = (
"你是报告模板专家。任务:阅读给定章节的范文(参考正文),总结这一章应该怎么写,"
"作为后续报告撰写该章节的写作指引。需提炼:①内容要点(写哪些事项);"
"②组织结构(应有的小节/条目顺序);③数据与口径要求(需引用的对比/指标/表格等);"
"④写作约束(先事实后评价、缺失写「待补充」、不得编造)。"
"严格要求:不要输出任何思考过程或解释;只输出 JSON 对象 {\"guide\": \"...\"}"
"guide 为 300 字以内的写作指引纯文本(不含 markdown 标题);"
"范文缺失或过短时,按章节标题给出通用写作指引。"
)
def _strip_number_prefix(title: str) -> str:
t = str(title or "").strip()
t = re.sub(r"^(?:\d+(?:\.\d+)*|[一二三四五六七八九十]+[、.])\s*", "", t).strip()
return t
def _fallback_declaration(section_title: str) -> str:
label = _strip_number_prefix(section_title) or "本章节"
return (
f"本章节为「{label}」。撰写时应紧扣标题主题,先陈述事实与数据,再给出分析与评价;"
f"结构需与标题保持一致,条理清晰、用语规范;"
f"所有结论须有依据,缺失信息写「待补充」,禁止编造。"
)
def _build_user_prompt(section_title: str, content: str) -> str:
body = (content or "").strip()
if len(body) > 2500:
body = body[:2500]
body_block = f"\n\n该章节范文(参考正文,节选):\n```\n{body}\n```" if body else ""
return (
f"章节标题:{section_title}{body_block}\n\n"
f"请根据上述范文,总结该章节应该怎么写,并只返回 JSON{{\"guide\": \"300字以内的写作指引\"}}"
)
def generate_declaration(section_title: str, content: str = "") -> str:
"""根据范文为单个章节生成"怎么写"的写作指引JSON 取 guide自动剔除思考过程"""
use_llm = bool(getattr(settings, "DECLARATION_USE_LLM", True)) and llm_configured()
if not use_llm:
return _fallback_declaration(section_title)
try:
data = chat_completions_json(
system_prompt=_SYSTEM_PROMPT,
user_prompt=_build_user_prompt(section_title, content),
temperature=0.2,
max_tokens=2048,
)
guide = str((data or {}).get("guide") or "").strip()
if guide:
return guide
except Exception as e: # noqa: BLE001 - 兜底,保证主流程不被 LLM 影响
logger.warning("生成章节声明失败,使用兜底模板 | title=%s | err=%s", section_title, e)
return _fallback_declaration(section_title)
def _content_for_section(s: dict, content_by_key: dict[str, str]) -> str:
"""目录键可能是 canonical 形式,优先用标题中的编号前缀去匹配正文。"""
title = str(s.get("sectionTitle") or "")
m = re.match(r"^(\d+(?:\.\d+)*)", title.strip())
num = m.group(1) if m else ""
return content_by_key.get(num, "") or content_by_key.get(str(s.get("sectionKey") or ""), "")
def generate_declarations(sections: list[dict], content_by_key: dict[str, str] | None = None) -> list[str]:
"""
为目录中每个章节并发生成"怎么写"的写作指引(基于范文)。
sections: [{sectionKey, sectionTitle}, ...]
content_by_key: 章节编号/键 -> 范文正文,用于为指引提供上下文(可选)。
每章一次 LLM 调用,多线程并发以打满 GPULLM 为网络 I/O线程下真正并行
"""
content_by_key = content_by_key or {}
tasks = [(str(s.get("sectionTitle") or ""), _content_for_section(s, content_by_key)) for s in sections]
if not tasks:
return []
use_llm = bool(getattr(settings, "DECLARATION_USE_LLM", True)) and llm_configured()
if not use_llm:
return [_fallback_declaration(title) for title, _ in tasks]
max_workers = max(int(getattr(settings, "TEMPLATE_UPLOAD_LLM_MAX_WORKERS", 8) or 8), 1)
results: list[str] = [""] * len(tasks)
if len(tasks) == 1:
results[0] = generate_declaration(*tasks[0])
return results
workers = min(max_workers, len(tasks))
with ThreadPoolExecutor(max_workers=workers) as executor:
future_to_idx = {
executor.submit(generate_declaration, title, content): i
for i, (title, content) in enumerate(tasks)
}
for fut in as_completed(future_to_idx):
idx = future_to_idx[fut]
try:
results[idx] = fut.result()
except Exception as e: # noqa: BLE001
logger.warning("章节声明并发生成失败,使用兜底 | idx=%s | err=%s", idx, e)
results[idx] = _fallback_declaration(tasks[idx][0])
logger.info("章节声明生成 | 章节=%s | 线程=%s", len(tasks), workers)
return results