<?php

declare(strict_types=1);

namespace ContentReady\Util;

final class HtmlHeadings
{
	/**
	 * 从 HTML 中抽取标题（h2-h6）。
	 *
	 * @return array<int, array{level:int,text:string,existing_id:string}>
	 */
	public static function extract(string $html, int $min_level = 2, int $max_level = 6): array
	{
		if ($html === '') {
			return [];
		}

		$min_level = max(1, min(6, $min_level));
		$max_level = max(1, min(6, $max_level));
		if ($min_level > $max_level) {
			$tmp = $min_level;
			$min_level = $max_level;
			$max_level = $tmp;
		}

		$matches = [];
		$ok = preg_match_all('~<(h[1-6])\\b([^>]*)>(.*?)</\\1>~is', $html, $matches, PREG_SET_ORDER);
		if ($ok === false) {
			return [];
		}
		if (!is_array($matches) || $matches === []) {
			return [];
		}

		$out = [];
		foreach ($matches as $m) {
			$tag = isset($m[1]) ? strtolower((string)$m[1]) : 'h2';
			$attrs = isset($m[2]) ? (string)$m[2] : '';
			$inner = isset($m[3]) ? (string)$m[3] : '';
			$level = (int)substr($tag, 1);
			if ($level < $min_level || $level > $max_level) {
				continue;
			}

			$text = trim((string)wp_strip_all_tags($inner));
			if ($text === '') {
				continue;
			}

			$existing_id = '';
			if ($attrs !== '' && preg_match('~\\bid\\s*=\\s*([\"\\\'])(.*?)\\1~i', $attrs, $am) === 1) {
				$existing_id = (string)($am[2] ?? '');
			}

			$out[] = [
				'level' => $level,
				'text' => $text,
				'existing_id' => $existing_id,
			];
		}

		return $out;
	}
}

