report_generation/services/template_service.py
xxy 43f3e0b746 Initial commit
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 18:41:06 +08:00

525 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
services/template_service.py
复刻自 eval_reportreport_template_sections 数据的获取方式。
- DEFAULT_TEMPLATE_SECTIONS系统默认后评价报告章节目录key, title
- default_section_prompt / default_section_output_contract / default_section_examples
按章节标题/编号取对应提示词、输出合同、示例
- build_default_template_catalog默认目录 + 提示词/合同(供上传模版匹配)
说明eval_report 会额外从《编制细则》与《模版》Word 文档抽取更细的提示词/示例;
本项目默认不含这两个 .doc 文件与 DocParser故相关函数在缺文件时优雅降级
回退到 SECTION_PROMPT_RULES / SECTION_EXAMPLE_RULES。
"""
from __future__ import annotations
import re
import uuid
from datetime import datetime
from functools import lru_cache
from pathlib import Path
from sqlalchemy.orm import Session
from database.models import ReportTemplate, ReportTemplateSection
from prompts.report_generation.section_output_contracts import (
DEFAULT_SECTION_OUTPUT_CONTRACT,
SECTION_OUTPUT_CONTRACTS,
)
from prompts.report_generation.template_prompt_rules import (
DEFAULT_SECTION_PROMPT,
SECTION_EXAMPLE_RULES,
SECTION_PROMPT_RULES,
)
SYSTEM_DEFAULT_TEMPLATE_NAME = "后评价默认模板"
GUIDELINE_BASENAME = "炼油化工建设项目后评价报告编制细则(修订)"
PROJECT_EXAMPLE_BASENAME = "模版"
MAX_SECTION_EXAMPLE_CHARS = 12000
DEFAULT_TEMPLATE_SECTIONS: list[tuple[str, str]] = [
("1", "1 项目概况"),
("1-1", "1.1 项目基本情况"),
("1-2", "1.2 项目决策要点"),
("1-3", "1.3 项目实施情况"),
("1-4", "1.4 项目运行情况"),
("2", "2 前期工作评价"),
("2-1", "2.1 项目要素评价"),
("2-1-1", "2.1.1 资源与原料评价"),
("2-1-2", "2.1.2 产品方案及市场评价"),
("2-1-2-1", "2.1.2.1 产品方案评价"),
("2-1-2-2", "2.1.2.2 产品市场评价"),
("2-1-3", "2.1.3 工艺方案评价"),
("2-1-3-1", "2.1.3.1 总加工方案评价"),
("2-1-3-2", "2.1.3.2 建设规模及工艺技术方案评价"),
("2-1-3-3", "2.1.3.3 主要设备方案评价"),
("2-1-4", "2.1.4 厂址选择及外部条件评价"),
("2-1-5", "2.1.5 总图及系统配套工程评价"),
("2-1-6", "2.1.6 主要技术指标评价"),
("2-1-7", "2.1.7 风险分析评价"),
("2-2", "2.2 工作程序评价"),
("2-2-1", "2.2.1 编制单位资质及选择方式评价"),
("2-2-2", "2.2.2 编制进度评价"),
("2-2-3", "2.2.3 与专项评价的结合情况"),
("2-2-4", "2.2.4 可行性研究报告的质量评价"),
("2-3", "2.3 前评估工作评价"),
("2-4", "2.4 初步设计评价"),
("2-4-1", "2.4.1 设计单位资质及选择方式评价"),
("2-4-2", "2.4.2 初步设计进度评价"),
("2-4-3", "2.4.3 初步设计质量评价"),
("2-4-4", "2.4.4 初步设计审查工作评价"),
("2-5", "2.5 前期决策程序评价"),
("2-6", "2.6 前期工作评价结论"),
("3", "3 建设实施评价"),
("3-1", "3.1 工程建设管理模式评价"),
("3-2", "3.2 招投标评价"),
("3-3", "3.3 施工图设计评价"),
("3-3-1", "3.3.1 与批复后初步设计符合性评价"),
("3-3-2", "3.3.2 设计进度评价"),
("3-3-3", "3.3.3 施工图设计水平及质量评价"),
("3-3-4", "3.3.4 施工图设计变更管理评价"),
("3-4", "3.4 工程承包商或施工单位评价"),
("3-4-1", "3.4.1 施工准备评价"),
("3-4-2", "3.4.2 施工计划的执行情况"),
("3-5", "3.5 采购工作评价"),
("3-6", "3.6 工程监理评价"),
("3-7", "3.7 工程质量评价"),
("3-8", "3.8 HSE管理评价"),
("3-9", "3.9 三查四定及中间交接"),
("3-10", "3.10 工程竣工验收评价"),
("3-11", "3.11 建设实施评价结论"),
("4", "4 生产运行评价"),
("4-1", "4.1 生产准备评价"),
("4-2", "4.2 联合试运与试生产情况评价"),
("4-3", "4.3 生产运行评价"),
("4-3-1", "4.3.1 原料供应评价"),
("4-3-2", "4.3.2 生产运行总体情况评价"),
("4-3-3", "4.3.3 达标评价"),
("4-3-4", "4.3.4 生产工艺技术评价"),
("4-3-5", "4.3.5 设备运行评价"),
("4-3-6", "4.3.6 公用工程及辅助设施合理性评价"),
("4-4", "4.4 生产运行评价结论"),
("5", "5 投资与经济效益评价"),
("5-1", "5.1 主要经济指标实现程度评价"),
("5-2", "5.2 投资和执行情况评价"),
("5-2-1", "5.2.1 投资控制及变动原因分析"),
("5-2-2", "5.2.2 投资水平分析"),
("5-2-3", "5.2.3 资金来源及到位评价"),
("5-2-4", "5.2.4 投资控制的经验和教训"),
("5-3", "5.3 经济效益分析"),
("5-3-1", "5.3.1 项目投产以来生产经营及效益状况"),
("5-3-2", "5.3.2 项目经济效益后评价"),
("5-4", "5.4 不确定性分析"),
("5-5", "5.5 投资与经济效益评价结论"),
("6", "6 影响与持续性评价"),
("6-1", "6.1 影响评价"),
("6-1-1", "6.1.1 环境影响评价"),
("6-1-2", "6.1.2 安全影响评价"),
("6-1-3", "6.1.3 科技进步影响"),
("6-1-4", "6.1.4 项目社会影响评价"),
("6-1-5", "6.1.5 项目影响评价结论"),
("6-2", "6.2 持续性评价"),
("6-2-1", "6.2.1 资源分析"),
("6-2-2", "6.2.2 产品分析"),
("6-2-3", "6.2.3 主要技术及经济指标对比"),
("6-2-4", "6.2.4 项目持续性评价结论"),
("7", "7 综合评价结论"),
("7-1", "7.1 综合评价结论"),
("7-1-1", "7.1.1 总体评价结论"),
("7-1-2", "7.1.2 成功度评价"),
("7-2", "7.2 主要经验"),
("7-3", "7.3 问题与建议"),
]
def default_section_output_contract(section_title: str, section_key: str | None = None) -> str:
section_no = _extract_number_prefix(section_title) or _section_key_to_number(section_key)
if section_no and section_no in SECTION_OUTPUT_CONTRACTS:
return SECTION_OUTPUT_CONTRACTS[section_no]
return DEFAULT_SECTION_OUTPUT_CONTRACT
def default_section_prompt(section_title: str, section_key: str | None = None) -> str:
guideline_prompt = _guideline_prompt_for(section_title, section_key)
if guideline_prompt:
return guideline_prompt
title = _normalize_section_identity(section_title)
key = str(section_key or "").strip().lower()
for pattern, prompt in SECTION_PROMPT_RULES:
p = pattern.lower()
if title.startswith(p):
return prompt
if p.isdigit() and (title.startswith(f"{p} ") or key.startswith(f"{p}-") or key == p):
return prompt
return DEFAULT_SECTION_PROMPT
def build_default_template_catalog() -> list[dict[str, str]]:
"""系统默认模板章节目录及对应提示词、输出合同(供上传模版匹配)。"""
out: list[dict[str, str]] = []
for key, title in DEFAULT_TEMPLATE_SECTIONS:
out.append(
{
"sectionKey": key,
"sectionTitle": title,
"sectionNumber": _extract_number_prefix(title) or _section_key_to_number(key),
"sectionPrompt": default_section_prompt(title, key),
"sectionOutputContract": default_section_output_contract(title, key),
}
)
return out
def default_section_examples(section_title: str, section_key: str | None = None) -> str:
project_example = _project_example_for(section_title, section_key)
if project_example:
return project_example
title = _normalize_section_identity(section_title)
key = str(section_key or "").strip().lower()
num = _extract_number_prefix(section_title) or _section_key_to_number(section_key)
chapter_no = ""
if num:
chapter_no = num.split(".")[0]
elif key:
chapter_no = key.split("-")[0]
for prefix, examples in SECTION_EXAMPLE_RULES:
p = str(prefix).strip().lower()
if chapter_no == p:
return examples
if title.startswith(f"{p} "):
return examples
if key.startswith(f"{p}-") or key == p:
return examples
return ""
def _normalize_section_identity(value: str | None) -> str:
text = str(value or "").strip().lower()
text = text.replace("", ".").replace("", ".")
text = re.sub(r"\s+", " ", text)
return text
def _section_key_to_number(section_key: str | None) -> str:
key = str(section_key or "").strip()
if not key:
return ""
if re.fullmatch(r"\d+(?:-\d+)*", key):
return key.replace("-", ".")
return ""
def _extract_number_prefix(title: str) -> str:
m = re.match(r"^\s*(\d+(?:\.\d+)*)\s*", str(title or ""))
return m.group(1) if m else ""
def _normalize_heading_key(value: str) -> str:
s = str(value or "").strip().lower()
s = s.replace("", ".").replace("", ".")
s = re.sub(r"\s+", "", s)
return s
def _tuple_from_number(number_str: str) -> tuple[int, ...]:
if not number_str:
return tuple()
parts = []
for p in number_str.split("."):
if p.isdigit():
parts.append(int(p))
else:
return tuple()
return tuple(parts)
def _read_doc_text(path: str) -> str:
"""读取 .doc/.docx 文本。本项目无 DocParser 时返回空串(优雅降级)。"""
try:
from function.documents.doc_parser import DocParser # type: ignore
except Exception:
return ""
try:
return DocParser(path).read()
except Exception:
return ""
@lru_cache(maxsize=1)
def _guideline_section_prompt_map() -> dict[str, str]:
guideline_path = _resolve_guideline_path()
if not guideline_path:
return {}
raw_text = _read_doc_text(guideline_path)
if not raw_text:
return {}
return _build_guideline_prompt_map(raw_text)
def _resolve_guideline_path() -> str | None:
root = Path(__file__).resolve().parents[1]
candidates = [
root / f"{GUIDELINE_BASENAME}.doc",
root / f"{GUIDELINE_BASENAME}.docx",
]
for p in candidates:
if p.is_file():
return str(p)
return None
def _resolve_project_example_path() -> str | None:
root = Path(__file__).resolve().parents[1]
candidates = [
root / f"{PROJECT_EXAMPLE_BASENAME}.doc",
root / f"{PROJECT_EXAMPLE_BASENAME}.docx",
]
for p in candidates:
if p.is_file():
return str(p)
return None
@lru_cache(maxsize=1)
def _project_example_entries() -> list[tuple[str, str]]:
path = _resolve_project_example_path()
if not path:
return []
raw_text = _read_doc_text(path)
if not raw_text:
return []
return _build_project_example_entries(raw_text)
def _build_project_example_entries(text: str) -> list[tuple[str, str]]:
lines = str(text or "").splitlines()
headings: list[tuple[int, int, str]] = []
for idx, raw in enumerate(lines):
line = str(raw or "").strip()
m = re.match(r"^\s*(#{1,6})\s*(.+?)\s*$", line)
if not m:
continue
level = len(m.group(1))
heading_title = m.group(2).strip()
if not heading_title:
continue
headings.append((idx, level, heading_title))
out: list[tuple[str, str]] = []
for i, (start_idx, level, title) in enumerate(headings):
end_idx = len(lines)
for j in range(i + 1, len(headings)):
next_idx, next_level, _ = headings[j]
if next_level <= level:
end_idx = next_idx
break
body = "\n".join(lines[start_idx + 1 : end_idx]).strip()
body = re.sub(r"\n{3,}", "\n\n", body)
if not body:
continue
out.append((title, body))
return out
def _project_example_for(section_title: str, section_key: str | None = None) -> str:
entries = _project_example_entries()
if not entries:
return ""
target_title = _clean_section_title(section_title)
target_key = _section_key_to_number(section_key)
target_core = _core_title(target_title or target_key)
if not target_core:
return ""
best_title = ""
best_body = ""
best_score = -1
for heading, body in entries:
heading_clean = _clean_section_title(heading)
heading_core = _core_title(heading_clean)
score = _title_match_score(target_core, heading_core)
if score > best_score:
best_score = score
best_title = heading_clean
best_body = body
if best_score < 4 or not best_body:
return ""
text = f"### {best_title}\n\n{best_body}".strip()
if len(text) > MAX_SECTION_EXAMPLE_CHARS:
text = text[:MAX_SECTION_EXAMPLE_CHARS].rstrip() + "\n\n(示例过长,已截断)"
return text
def _clean_section_title(value: str | None) -> str:
s = str(value or "").strip()
s = re.sub(r"^\s*\d+(?:[.\-]\d+)*\s*", "", s)
return s.strip()
def _core_title(value: str | None) -> str:
s = str(value or "").strip()
s = s.replace("", "(").replace("", ")")
s = re.sub(r"\([^)]*\)", "", s)
s = re.sub(r"[、,。;::()\-\s]", "", s)
s = s.replace("项目", "")
s = s.replace("情况", "")
s = s.replace("工作", "")
return s.strip().lower()
def _title_match_score(target: str, candidate: str) -> int:
if not target or not candidate:
return 0
if target == candidate:
return 100
score = 0
if target in candidate or candidate in target:
score += 40
tks_t = re.findall(r"[\u4e00-\u9fa5]{2,8}|[a-z]{2,12}", target)
tks_c = re.findall(r"[\u4e00-\u9fa5]{2,8}|[a-z]{2,12}", candidate)
if tks_t and tks_c:
overlap = len(set(tks_t) & set(tks_c))
score += overlap * 8
ch_overlap = len(set(target) & set(candidate))
score += min(ch_overlap, 20)
return score
def _build_guideline_prompt_map(text: str) -> dict[str, str]:
lines = str(text or "").splitlines()
headings: list[tuple[int, str, str, tuple[int, ...]]] = []
for idx, raw in enumerate(lines):
line = str(raw or "").strip()
m = re.match(r"^\s*#{1,6}\s*(.+?)\s*$", line)
if not m:
continue
heading_title = m.group(1).strip()
number = _extract_number_prefix(heading_title)
number_tuple = _tuple_from_number(number)
if not number_tuple:
continue
headings.append((idx, heading_title, number, number_tuple))
prompt_map: dict[str, str] = {}
for i, (start_idx, heading_title, number, number_tuple) in enumerate(headings):
end_idx = len(lines)
for j in range(i + 1, len(headings)):
next_start, _, _, next_tuple = headings[j]
if len(next_tuple) < len(number_tuple) or next_tuple[: len(number_tuple)] != number_tuple:
end_idx = next_start
break
body = "\n".join(lines[start_idx + 1 : end_idx]).strip()
body = re.sub(r"\n{3,}", "\n\n", body)
if not body:
continue
key_title = _normalize_heading_key(heading_title)
key_number = _normalize_heading_key(number)
prompt_map[key_title] = body
prompt_map[key_number] = body
return prompt_map
def _guideline_prompt_for(section_title: str, section_key: str | None = None) -> str:
mapping = _guideline_section_prompt_map()
if not mapping:
return ""
title = str(section_title or "").strip()
number = _extract_number_prefix(title) or _section_key_to_number(section_key)
candidates = [
_normalize_heading_key(title),
_normalize_heading_key(number),
]
for key in candidates:
if key and key in mapping:
return mapping[key]
return ""
def list_templates(db: Session) -> list[ReportTemplate]:
return (
db.query(ReportTemplate)
.order_by(ReportTemplate.is_default.desc(), ReportTemplate.updated_at.desc())
.all()
)
def ensure_default_template(db: Session) -> None:
now = datetime.now()
system_default = (
db.query(ReportTemplate)
.filter(ReportTemplate.name == SYSTEM_DEFAULT_TEMPLATE_NAME)
.first()
)
if not system_default:
system_default = ReportTemplate(
id=uuid.uuid4().hex,
name=SYSTEM_DEFAULT_TEMPLATE_NAME,
description="系统预置模板(细则完整章节)",
is_default=True,
is_active=True,
created_at=now,
updated_at=now,
)
db.add(system_default)
db.flush()
current_rows = (
db.query(ReportTemplateSection)
.filter(ReportTemplateSection.template_id == system_default.id)
.order_by(ReportTemplateSection.section_order.asc())
.all()
)
current_pairs = [(r.section_key, r.section_title) for r in current_rows]
expected_pairs = list(DEFAULT_TEMPLATE_SECTIONS)
db.query(ReportTemplate).update({ReportTemplate.is_default: False})
system_default.is_default = True
system_default.is_active = True
system_default.updated_at = now
if current_pairs == expected_pairs:
has_changed = False
for row in current_rows:
current_examples = str(row.examples or "").strip()
new_examples = default_section_examples(row.section_title, row.section_key).strip()
if new_examples and current_examples != new_examples:
row.examples = new_examples
row.updated_at = now
has_changed = True
current_out = str(getattr(row, "section_output_contract", None) or "").strip()
new_out = default_section_output_contract(row.section_title, row.section_key).strip()
if not current_out and new_out:
row.section_output_contract = new_out
row.updated_at = now
has_changed = True
if has_changed:
system_default.updated_at = now
db.commit()
return
db.query(ReportTemplateSection).filter(
ReportTemplateSection.template_id == system_default.id
).delete()
for i, (key, title) in enumerate(DEFAULT_TEMPLATE_SECTIONS):
db.add(
ReportTemplateSection(
id=uuid.uuid4().hex,
template_id=system_default.id,
section_key=key,
section_title=title,
section_prompt="",
section_output_contract=default_section_output_contract(title, key),
section_order=i,
examples=default_section_examples(title, key),
created_at=now,
updated_at=now,
)
)
db.commit()