section_reference_block/services/template_service.py

"""
services/template_service.py
复刻自 eval_report：report_template_sections 数据的获取方式。

- DEFAULT_TEMPLATE_SECTIONS：系统默认后评价报告章节目录（key, title）
- default_section_prompt / default_section_output_contract / default_section_examples：
  按章节标题/编号取对应提示词、输出合同、示例
- build_default_template_catalog：默认目录 + 提示词/合同（供上传模版匹配）

说明：eval_report 会额外从《编制细则》与《模版》Word 文档抽取更细的提示词/示例；
本项目默认不含这两个 .doc 文件与 DocParser，故相关函数在缺文件时优雅降级，
回退到 SECTION_PROMPT_RULES / SECTION_EXAMPLE_RULES。
"""

from __future__ import annotations

import re
import uuid
from datetime import datetime
from functools import lru_cache
from pathlib import Path

from sqlalchemy.orm import Session

from database.models import ReportTemplate, ReportTemplateSection
from prompts.report_generation.section_output_contracts import (
    DEFAULT_SECTION_OUTPUT_CONTRACT,
    SECTION_OUTPUT_CONTRACTS,
)
from prompts.report_generation.template_prompt_rules import (
    DEFAULT_SECTION_PROMPT,
    SECTION_EXAMPLE_RULES,
    SECTION_PROMPT_RULES,
)

SYSTEM_DEFAULT_TEMPLATE_NAME = "后评价默认模板"
GUIDELINE_BASENAME = "炼油化工建设项目后评价报告编制细则（修订）"
PROJECT_EXAMPLE_BASENAME = "模版"
MAX_SECTION_EXAMPLE_CHARS = 12000

DEFAULT_TEMPLATE_SECTIONS: list[tuple[str, str]] = [
    ("1", "1 项目概况"),
    ("1-1", "1.1 项目基本情况"),
    ("1-2", "1.2 项目决策要点"),
    ("1-3", "1.3 项目实施情况"),
    ("1-4", "1.4 项目运行情况"),
    ("2", "2 前期工作评价"),
    ("2-1", "2.1 项目要素评价"),
    ("2-1-1", "2.1.1 资源与原料评价"),
    ("2-1-2", "2.1.2 产品方案及市场评价"),
    ("2-1-2-1", "2.1.2.1 产品方案评价"),
    ("2-1-2-2", "2.1.2.2 产品市场评价"),
    ("2-1-3", "2.1.3 工艺方案评价"),
    ("2-1-3-1", "2.1.3.1 总加工方案评价"),
    ("2-1-3-2", "2.1.3.2 建设规模及工艺技术方案评价"),
    ("2-1-3-3", "2.1.3.3 主要设备方案评价"),
    ("2-1-4", "2.1.4 厂址选择及外部条件评价"),
    ("2-1-5", "2.1.5 总图及系统配套工程评价"),
    ("2-1-6", "2.1.6 主要技术指标评价"),
    ("2-1-7", "2.1.7 风险分析评价"),
    ("2-2", "2.2 工作程序评价"),
    ("2-2-1", "2.2.1 编制单位资质及选择方式评价"),
    ("2-2-2", "2.2.2 编制进度评价"),
    ("2-2-3", "2.2.3 与专项评价的结合情况"),
    ("2-2-4", "2.2.4 可行性研究报告的质量评价"),
    ("2-3", "2.3 前评估工作评价"),
    ("2-4", "2.4 初步设计评价"),
    ("2-4-1", "2.4.1 设计单位资质及选择方式评价"),
    ("2-4-2", "2.4.2 初步设计进度评价"),
    ("2-4-3", "2.4.3 初步设计质量评价"),
    ("2-4-4", "2.4.4 初步设计审查工作评价"),
    ("2-5", "2.5 前期决策程序评价"),
    ("2-6", "2.6 前期工作评价结论"),
    ("3", "3 建设实施评价"),
    ("3-1", "3.1 工程建设管理模式评价"),
    ("3-2", "3.2 招投标评价"),
    ("3-3", "3.3 施工图设计评价"),
    ("3-3-1", "3.3.1 与批复后初步设计符合性评价"),
    ("3-3-2", "3.3.2 设计进度评价"),
    ("3-3-3", "3.3.3 施工图设计水平及质量评价"),
    ("3-3-4", "3.3.4 施工图设计变更管理评价"),
    ("3-4", "3.4 工程承包商或施工单位评价"),
    ("3-4-1", "3.4.1 施工准备评价"),
    ("3-4-2", "3.4.2 施工计划的执行情况"),
    ("3-5", "3.5 采购工作评价"),
    ("3-6", "3.6 工程监理评价"),
    ("3-7", "3.7 工程质量评价"),
    ("3-8", "3.8 HSE管理评价"),
    ("3-9", "3.9 三查四定及中间交接"),
    ("3-10", "3.10 工程竣工验收评价"),
    ("3-11", "3.11 建设实施评价结论"),
    ("4", "4 生产运行评价"),
    ("4-1", "4.1 生产准备评价"),
    ("4-2", "4.2 联合试运与试生产情况评价"),
    ("4-3", "4.3 生产运行评价"),
    ("4-3-1", "4.3.1 原料供应评价"),
    ("4-3-2", "4.3.2 生产运行总体情况评价"),
    ("4-3-3", "4.3.3 达标评价"),
    ("4-3-4", "4.3.4 生产工艺技术评价"),
    ("4-3-5", "4.3.5 设备运行评价"),
    ("4-3-6", "4.3.6 公用工程及辅助设施合理性评价"),
    ("4-4", "4.4 生产运行评价结论"),
    ("5", "5 投资与经济效益评价"),
    ("5-1", "5.1 主要经济指标实现程度评价"),
    ("5-2", "5.2 投资和执行情况评价"),
    ("5-2-1", "5.2.1 投资控制及变动原因分析"),
    ("5-2-2", "5.2.2 投资水平分析"),
    ("5-2-3", "5.2.3 资金来源及到位评价"),
    ("5-2-4", "5.2.4 投资控制的经验和教训"),
    ("5-3", "5.3 经济效益分析"),
    ("5-3-1", "5.3.1 项目投产以来生产经营及效益状况"),
    ("5-3-2", "5.3.2 项目经济效益后评价"),
    ("5-4", "5.4 不确定性分析"),
    ("5-5", "5.5 投资与经济效益评价结论"),
    ("6", "6 影响与持续性评价"),
    ("6-1", "6.1 影响评价"),
    ("6-1-1", "6.1.1 环境影响评价"),
    ("6-1-2", "6.1.2 安全影响评价"),
    ("6-1-3", "6.1.3 科技进步影响"),
    ("6-1-4", "6.1.4 项目社会影响评价"),
    ("6-1-5", "6.1.5 项目影响评价结论"),
    ("6-2", "6.2 持续性评价"),
    ("6-2-1", "6.2.1 资源分析"),
    ("6-2-2", "6.2.2 产品分析"),
    ("6-2-3", "6.2.3 主要技术及经济指标对比"),
    ("6-2-4", "6.2.4 项目持续性评价结论"),
    ("7", "7 综合评价结论"),
    ("7-1", "7.1 综合评价结论"),
    ("7-1-1", "7.1.1 总体评价结论"),
    ("7-1-2", "7.1.2 成功度评价"),
    ("7-2", "7.2 主要经验"),
    ("7-3", "7.3 问题与建议"),
]


def default_section_output_contract(section_title: str, section_key: str | None = None) -> str:
    section_no = _extract_number_prefix(section_title) or _section_key_to_number(section_key)
    if section_no and section_no in SECTION_OUTPUT_CONTRACTS:
        return SECTION_OUTPUT_CONTRACTS[section_no]
    return DEFAULT_SECTION_OUTPUT_CONTRACT


def default_section_prompt(section_title: str, section_key: str | None = None) -> str:
    guideline_prompt = _guideline_prompt_for(section_title, section_key)
    if guideline_prompt:
        return guideline_prompt

    title = _normalize_section_identity(section_title)
    key = str(section_key or "").strip().lower()
    for pattern, prompt in SECTION_PROMPT_RULES:
        p = pattern.lower()
        if title.startswith(p):
            return prompt
        if p.isdigit() and (title.startswith(f"{p} ") or key.startswith(f"{p}-") or key == p):
            return prompt
    return DEFAULT_SECTION_PROMPT


def build_default_template_catalog() -> list[dict[str, str]]:
    """系统默认模板章节目录及对应提示词、输出合同（供上传模版匹配）。"""
    out: list[dict[str, str]] = []
    for key, title in DEFAULT_TEMPLATE_SECTIONS:
        out.append(
            {
                "sectionKey": key,
                "sectionTitle": title,
                "sectionNumber": _extract_number_prefix(title) or _section_key_to_number(key),
                "sectionPrompt": default_section_prompt(title, key),
                "sectionOutputContract": default_section_output_contract(title, key),
            }
        )
    return out


def default_section_examples(section_title: str, section_key: str | None = None) -> str:
    project_example = _project_example_for(section_title, section_key)
    if project_example:
        return project_example

    title = _normalize_section_identity(section_title)
    key = str(section_key or "").strip().lower()
    num = _extract_number_prefix(section_title) or _section_key_to_number(section_key)
    chapter_no = ""
    if num:
        chapter_no = num.split(".")[0]
    elif key:
        chapter_no = key.split("-")[0]
    for prefix, examples in SECTION_EXAMPLE_RULES:
        p = str(prefix).strip().lower()
        if chapter_no == p:
            return examples
        if title.startswith(f"{p} "):
            return examples
        if key.startswith(f"{p}-") or key == p:
            return examples
    return ""


def _normalize_section_identity(value: str | None) -> str:
    text = str(value or "").strip().lower()
    text = text.replace("．", ".").replace("。", ".")
    text = re.sub(r"\s+", " ", text)
    return text


def _section_key_to_number(section_key: str | None) -> str:
    key = str(section_key or "").strip()
    if not key:
        return ""
    if re.fullmatch(r"\d+(?:-\d+)*", key):
        return key.replace("-", ".")
    return ""


def _extract_number_prefix(title: str) -> str:
    m = re.match(r"^\s*(\d+(?:\.\d+)*)\s*", str(title or ""))
    return m.group(1) if m else ""


def _normalize_heading_key(value: str) -> str:
    s = str(value or "").strip().lower()
    s = s.replace("．", ".").replace("。", ".")
    s = re.sub(r"\s+", "", s)
    return s


def _tuple_from_number(number_str: str) -> tuple[int, ...]:
    if not number_str:
        return tuple()
    parts = []
    for p in number_str.split("."):
        if p.isdigit():
            parts.append(int(p))
        else:
            return tuple()
    return tuple(parts)


def _read_doc_text(path: str) -> str:
    """读取 .doc/.docx 文本。本项目无 DocParser 时返回空串（优雅降级）。"""
    try:
        from function.documents.doc_parser import DocParser  # type: ignore
    except Exception:
        return ""
    try:
        return DocParser(path).read()
    except Exception:
        return ""


@lru_cache(maxsize=1)
def _guideline_section_prompt_map() -> dict[str, str]:
    guideline_path = _resolve_guideline_path()
    if not guideline_path:
        return {}
    raw_text = _read_doc_text(guideline_path)
    if not raw_text:
        return {}
    return _build_guideline_prompt_map(raw_text)


def _resolve_guideline_path() -> str | None:
    root = Path(__file__).resolve().parents[1]
    candidates = [
        root / f"{GUIDELINE_BASENAME}.doc",
        root / f"{GUIDELINE_BASENAME}.docx",
    ]
    for p in candidates:
        if p.is_file():
            return str(p)
    return None


def _resolve_project_example_path() -> str | None:
    root = Path(__file__).resolve().parents[1]
    candidates = [
        root / f"{PROJECT_EXAMPLE_BASENAME}.doc",
        root / f"{PROJECT_EXAMPLE_BASENAME}.docx",
    ]
    for p in candidates:
        if p.is_file():
            return str(p)
    return None


@lru_cache(maxsize=1)
def _project_example_entries() -> list[tuple[str, str]]:
    path = _resolve_project_example_path()
    if not path:
        return []
    raw_text = _read_doc_text(path)
    if not raw_text:
        return []
    return _build_project_example_entries(raw_text)


def _build_project_example_entries(text: str) -> list[tuple[str, str]]:
    lines = str(text or "").splitlines()
    headings: list[tuple[int, int, str]] = []
    for idx, raw in enumerate(lines):
        line = str(raw or "").strip()
        m = re.match(r"^\s*(#{1,6})\s*(.+?)\s*$", line)
        if not m:
            continue
        level = len(m.group(1))
        heading_title = m.group(2).strip()
        if not heading_title:
            continue
        headings.append((idx, level, heading_title))

    out: list[tuple[str, str]] = []
    for i, (start_idx, level, title) in enumerate(headings):
        end_idx = len(lines)
        for j in range(i + 1, len(headings)):
            next_idx, next_level, _ = headings[j]
            if next_level <= level:
                end_idx = next_idx
                break
        body = "\n".join(lines[start_idx + 1 : end_idx]).strip()
        body = re.sub(r"\n{3,}", "\n\n", body)
        if not body:
            continue
        out.append((title, body))
    return out


def _project_example_for(section_title: str, section_key: str | None = None) -> str:
    entries = _project_example_entries()
    if not entries:
        return ""

    target_title = _clean_section_title(section_title)
    target_key = _section_key_to_number(section_key)
    target_core = _core_title(target_title or target_key)
    if not target_core:
        return ""

    best_title = ""
    best_body = ""
    best_score = -1
    for heading, body in entries:
        heading_clean = _clean_section_title(heading)
        heading_core = _core_title(heading_clean)
        score = _title_match_score(target_core, heading_core)
        if score > best_score:
            best_score = score
            best_title = heading_clean
            best_body = body

    if best_score < 4 or not best_body:
        return ""

    text = f"### {best_title}\n\n{best_body}".strip()
    if len(text) > MAX_SECTION_EXAMPLE_CHARS:
        text = text[:MAX_SECTION_EXAMPLE_CHARS].rstrip() + "\n\n（示例过长，已截断）"
    return text


def _clean_section_title(value: str | None) -> str:
    s = str(value or "").strip()
    s = re.sub(r"^\s*\d+(?:[.\-]\d+)*\s*", "", s)
    return s.strip()


def _core_title(value: str | None) -> str:
    s = str(value or "").strip()
    s = s.replace("（", "(").replace("）", ")")
    s = re.sub(r"\([^)]*\)", "", s)
    s = re.sub(r"[、，。；：:（）()\-\s]", "", s)
    s = s.replace("项目", "")
    s = s.replace("情况", "")
    s = s.replace("工作", "")
    return s.strip().lower()


def _title_match_score(target: str, candidate: str) -> int:
    if not target or not candidate:
        return 0
    if target == candidate:
        return 100
    score = 0
    if target in candidate or candidate in target:
        score += 40
    tks_t = re.findall(r"[\u4e00-\u9fa5]{2,8}|[a-z]{2,12}", target)
    tks_c = re.findall(r"[\u4e00-\u9fa5]{2,8}|[a-z]{2,12}", candidate)
    if tks_t and tks_c:
        overlap = len(set(tks_t) & set(tks_c))
        score += overlap * 8
    ch_overlap = len(set(target) & set(candidate))
    score += min(ch_overlap, 20)
    return score


def _build_guideline_prompt_map(text: str) -> dict[str, str]:
    lines = str(text or "").splitlines()
    headings: list[tuple[int, str, str, tuple[int, ...]]] = []
    for idx, raw in enumerate(lines):
        line = str(raw or "").strip()
        m = re.match(r"^\s*#{1,6}\s*(.+?)\s*$", line)
        if not m:
            continue
        heading_title = m.group(1).strip()
        number = _extract_number_prefix(heading_title)
        number_tuple = _tuple_from_number(number)
        if not number_tuple:
            continue
        headings.append((idx, heading_title, number, number_tuple))

    prompt_map: dict[str, str] = {}
    for i, (start_idx, heading_title, number, number_tuple) in enumerate(headings):
        end_idx = len(lines)
        for j in range(i + 1, len(headings)):
            next_start, _, _, next_tuple = headings[j]
            if len(next_tuple) < len(number_tuple) or next_tuple[: len(number_tuple)] != number_tuple:
                end_idx = next_start
                break
        body = "\n".join(lines[start_idx + 1 : end_idx]).strip()
        body = re.sub(r"\n{3,}", "\n\n", body)
        if not body:
            continue
        key_title = _normalize_heading_key(heading_title)
        key_number = _normalize_heading_key(number)
        prompt_map[key_title] = body
        prompt_map[key_number] = body
    return prompt_map


def _guideline_prompt_for(section_title: str, section_key: str | None = None) -> str:
    mapping = _guideline_section_prompt_map()
    if not mapping:
        return ""
    title = str(section_title or "").strip()
    number = _extract_number_prefix(title) or _section_key_to_number(section_key)
    candidates = [
        _normalize_heading_key(title),
        _normalize_heading_key(number),
    ]
    for key in candidates:
        if key and key in mapping:
            return mapping[key]
    return ""


def list_templates(db: Session) -> list[ReportTemplate]:
    return (
        db.query(ReportTemplate)
        .order_by(ReportTemplate.is_default.desc(), ReportTemplate.updated_at.desc())
        .all()
    )


def ensure_default_template(db: Session) -> None:
    now = datetime.now()
    system_default = (
        db.query(ReportTemplate)
        .filter(ReportTemplate.name == SYSTEM_DEFAULT_TEMPLATE_NAME)
        .first()
    )
    if not system_default:
        system_default = ReportTemplate(
            id=uuid.uuid4().hex,
            name=SYSTEM_DEFAULT_TEMPLATE_NAME,
            description="系统预置模板（细则完整章节）",
            is_default=True,
            is_active=True,
            created_at=now,
            updated_at=now,
        )
        db.add(system_default)
        db.flush()

    current_rows = (
        db.query(ReportTemplateSection)
        .filter(ReportTemplateSection.template_id == system_default.id)
        .order_by(ReportTemplateSection.section_order.asc())
        .all()
    )
    current_pairs = [(r.section_key, r.section_title) for r in current_rows]
    expected_pairs = list(DEFAULT_TEMPLATE_SECTIONS)

    db.query(ReportTemplate).update({ReportTemplate.is_default: False})
    system_default.is_default = True
    system_default.is_active = True
    system_default.updated_at = now

    if current_pairs == expected_pairs:
        has_changed = False
        for row in current_rows:
            current_examples = str(row.examples or "").strip()
            new_examples = default_section_examples(row.section_title, row.section_key).strip()
            if new_examples and current_examples != new_examples:
                row.examples = new_examples
                row.updated_at = now
                has_changed = True
            current_out = str(getattr(row, "section_output_contract", None) or "").strip()
            new_out = default_section_output_contract(row.section_title, row.section_key).strip()
            if not current_out and new_out:
                row.section_output_contract = new_out
                row.updated_at = now
                has_changed = True
        if has_changed:
            system_default.updated_at = now
        db.commit()
        return

    db.query(ReportTemplateSection).filter(
        ReportTemplateSection.template_id == system_default.id
    ).delete()
    for i, (key, title) in enumerate(DEFAULT_TEMPLATE_SECTIONS):
        db.add(
            ReportTemplateSection(
                id=uuid.uuid4().hex,
                template_id=system_default.id,
                section_key=key,
                section_title=title,
                section_prompt="",
                section_output_contract=default_section_output_contract(title, key),
                section_order=i,
                examples=default_section_examples(title, key),
                created_at=now,
                updated_at=now,
            )
        )
    db.commit()