<?php

declare(strict_types=1);

namespace ContentReady\Util;

use ContentReady\Modules\Registry;

final class Anchors
{
	/**
	 * v1 的模块锚点建议值（面向用户可复制的稳定锚点）。
	 * 注意：一旦写入到某篇文章的 `cr_meta_v1.anchors.modules`，后续版本不应再“回写覆盖”，以保证稳定性。
	 */
	public static function default_module_anchor_id(string $module_id): string
	{
		$map = [
			'verdict' => 'verdict',
			'tldr' => 'tldr',
			'limitations' => 'limitations',
			'faq' => 'faq',
			'references' => 'references',
			'last_updated' => 'updated',
		];
		return $map[$module_id] ?? ('cr-' . $module_id);
	}

	public static function sanitize_anchor_id(string $raw): string
	{
		$raw = strtolower(trim($raw));
		if ($raw === '') {
			return '';
		}

		$raw = preg_replace('/[^a-z0-9_-]+/', '-', $raw) ?? '';
		$raw = trim($raw, "- \t\n\r\0\x0B");
		if ($raw === '') {
			return '';
		}

		if (strlen($raw) > 64) {
			$raw = substr($raw, 0, 64);
		}

		return $raw;
	}

	/**
	 * 归一化：确保对所有已知 module_id 都有一个稳定锚点。
	 *
	 * @return array<string,string>
	 */
	public static function normalize_module_anchors($incoming): array
	{
		$incoming = is_array($incoming) ? $incoming : [];

		$out = [];
		foreach (Registry::all_ids() as $module_id) {
			$val = isset($incoming[$module_id]) ? (string)$incoming[$module_id] : '';
			$val = self::sanitize_anchor_id($val);
			if ($val === '') {
				$val = self::default_module_anchor_id($module_id);
			}
			$out[$module_id] = $val;
		}

		return $out;
	}

	public static function get_module_anchor(array $meta, string $module_id): string
	{
		if (!Registry::is_valid_id($module_id)) {
			return 'cr-' . sanitize_key($module_id);
		}
		$anchors = is_array($meta['anchors']['modules'] ?? null) ? $meta['anchors']['modules'] : [];
		$val = isset($anchors[$module_id]) ? self::sanitize_anchor_id((string)$anchors[$module_id]) : '';
		return $val !== '' ? $val : self::default_module_anchor_id($module_id);
	}

	/**
	 * 标题锚点（文章正文内 h2-h6）结构：
	 * `cr_meta_v1.anchors.headings[] = [ 'hash' => string, 'id' => string ]`
	 */
	public static function normalize_heading_anchors($incoming): array
	{
		$incoming = is_array($incoming) ? $incoming : [];
		$out = [];
		$max = 200;

		foreach ($incoming as $it) {
			if (!is_array($it)) {
				continue;
			}
			$hash = isset($it['hash']) ? (string)$it['hash'] : '';
			$id = isset($it['id']) ? (string)$it['id'] : '';
			$legacy_id = isset($it['legacy_id']) ? (string)$it['legacy_id'] : '';
			$id = self::sanitize_anchor_id($id);
			$legacy_id = self::sanitize_anchor_id($legacy_id);
			if ($hash === '' || $id === '') {
				continue;
			}
			$out[] = [
				'hash' => substr($hash, 0, 64),
				'id' => $id,
				'legacy_id' => $legacy_id,
			];
			if (count($out) >= $max) {
				break;
			}
		}

		return $out;
	}

	/**
	 * @return array{id:string, legacy_id:string}
	 */
	public static function get_heading_anchor(array $meta, int $index): array
	{
		$list = self::normalize_heading_anchors($meta['anchors']['headings'] ?? null);
		$it = is_array($list[$index] ?? null) ? $list[$index] : null;
		if (!$it) {
			return ['id' => '', 'legacy_id' => ''];
		}

		$id = self::sanitize_anchor_id((string)($it['id'] ?? ''));
		$legacy_id = self::sanitize_anchor_id((string)($it['legacy_id'] ?? ''));

		// 兼容：历史中文标题可能生成很长的 UTF-8 十六进制片段；改为输出短锚点，
		// 同时保留 legacy_id 作为别名，避免外部旧链接失效。
		if ($legacy_id === '' && self::looks_like_utf8_hex_slug($id)) {
			$legacy_id = $id;
			$id = self::short_id_from_legacy($legacy_id);
		}

		return ['id' => $id, 'legacy_id' => $legacy_id];
	}

	public static function get_heading_anchor_id(array $meta, int $index): string
	{
		$a = self::get_heading_anchor($meta, $index);
		return (string)($a['id'] ?? '');
	}

	/**
	 * 根据当前 post_content + 既有记录生成“标题锚点”（尽量保持稳定）。
	 *
	 * - 若标题已自带 id，则优先沿用（视为用户显式指定）
	 * - 若同一索引位置已有记录，则沿用其 id（标题文本修改也不改锚点）
	 * - 否则尝试按 hash 复用历史 id；最后再按标题文本生成新 id
	 */
	public static function build_heading_anchors(string $html, array $existing): array
	{
		$existing = self::normalize_heading_anchors($existing);
		$by_hash = [];
		foreach ($existing as $it) {
			$by_hash[(string)$it['hash']] = $it;
		}

		$headings = HtmlHeadings::extract($html, 2, 6);
		$out = [];
		$used = [];
		$dup_counter = [];

		foreach ($headings as $idx => $h) {
			$legacy = '';
			$level = (int)($h['level'] ?? 2);
			$text = (string)($h['text'] ?? '');
			$existing_id = self::sanitize_anchor_id((string)($h['existing_id'] ?? ''));

			$norm_text = strtolower(trim(preg_replace('/\\s+/', ' ', $text) ?? ''));
			$occ_key = 'h' . $level . ':' . $norm_text;
			$occ = (int)($dup_counter[$occ_key] ?? 0);
			$dup_counter[$occ_key] = $occ + 1;

			$hash = 'h' . $level . ':' . sha1($norm_text . '|' . (string)$occ);

			$id = '';
			if ($existing_id !== '') {
				$id = $existing_id;
			} elseif (isset($existing[$idx]) && is_array($existing[$idx]) && !empty($existing[$idx]['id'])) {
				$old = self::sanitize_anchor_id((string)$existing[$idx]['id']);
				$legacy = self::sanitize_anchor_id((string)($existing[$idx]['legacy_id'] ?? ''));
				if ($legacy === '' && self::looks_like_utf8_hex_slug($old)) {
					$legacy = $old;
					$old = self::short_id_from_legacy($legacy);
				}
				$id = $old;
			} elseif (isset($by_hash[$hash]) && is_array($by_hash[$hash])) {
				$old = self::sanitize_anchor_id((string)($by_hash[$hash]['id'] ?? ''));
				$legacy = self::sanitize_anchor_id((string)($by_hash[$hash]['legacy_id'] ?? ''));
				if ($legacy === '' && self::looks_like_utf8_hex_slug($old)) {
					$legacy = $old;
					$old = self::short_id_from_legacy($legacy);
				}
				$id = $old;
			} else {
				$slug = self::sanitize_anchor_id((string)sanitize_title($text));
				// 对中文等非拉丁标题：WP 的 sanitize_title 可能生成很长的 UTF-8 十六进制片段，
				// 虽然可用但可读性很差，因此改为短哈希锚点（仍保持稳定、可复制）。
				if ($slug === '' || self::looks_like_utf8_hex_slug($slug)) {
					$slug = self::short_id_from_legacy($hash);
				}
				$id = $slug;
			}

			if ($id === '') {
				$id = 'section-' . ($idx + 1);
			}

			$base = $id;
			$suffix = 2;
			while (isset($used[$id])) {
				$id = $base . '-' . $suffix;
				$suffix++;
			}
			$used[$id] = true;

			$record = [
				'hash' => substr($hash, 0, 64),
				'id' => $id,
			];
			if ($legacy !== '' && $legacy !== $id) {
				$record['legacy_id'] = $legacy;
			}
			$out[] = $record;
		}

		return $out;
	}

	private static function short_id_from_legacy(string $legacy): string
	{
		$legacy = (string)$legacy;
		return 'h-' . substr(sha1('cr:' . $legacy), 0, 10);
	}

	private static function looks_like_utf8_hex_slug(string $slug): bool
	{
		// 形如：e5-9b-9b-e5-a4-a7-e6-81-b6-...
		$parts = explode('-', $slug);
		if (count($parts) < 8) {
			return false;
		}

		$hex2 = 0;
		foreach ($parts as $p) {
			if ($p !== '' && strlen($p) === 2 && ctype_xdigit($p)) {
				$hex2++;
			}
		}

		return $hex2 >= 6 && ($hex2 / max(1, count($parts))) >= 0.6;
	}

}
