layouts/partials/toc-json-regex.html
{{- /* Convert Hugo's HTML table of contents to JSON using regex substitutions */ -}} {{- /* Input: page object */ -}} {{- /* Output: JSON structure representing the table of contents */ -}} {{- $toc := .TableOfContents -}} {{- /* Remove the nav wrapper and all newlines/extra whitespace */ -}} {{- $toc = $toc | replaceRE "" "" -}} {{- $toc = $toc | replaceRE "\n\s*" "" -}} {{- /* Strip inline HTML elements (like `, , ) from anchor text / -}} {{- $toc = $toc | replaceRE "([^<])" "$1" -}} {{- $toc = $toc | replaceRE "([^<])" "$1" -}} {{- $toc = $toc | replaceRE "([^<])" "$1" -}} {{- $toc = $toc | replaceRE "]>([^<])" "$1" -}} {{- /* Check if there's any content left (i.e., at least one ) / -}} {{- if not (findRE "" $toc) -}} {{- / No TOC content - return empty sections array / -}} {"sections":[]} {{- else -}} {{- / Step 1: Replace with opening bracket for children array / -}} {{- $toc = $toc | replaceRE "" "[" -}} {{- $toc = $toc | replaceRE "" "]" -}} {{- / Step 2: Replace TITLE with {"id":"ID","title":"TITLE" */ -}} {{- $toc = $toc | replaceRE "
([^<]+)" "{"id":"$1","title":"$2"" -}} {{- /* Step 2b: Remove any remaining or tags that weren't matched (orphaned entries) / -}} {{- $toc = $toc | replaceRE "" "" -}} {{- $toc = $toc | replaceRE "]>[^<]" "" -}} {{- / Step 3: Replace with } / -}} {{- $toc = $toc | replaceRE " " "}" -}} {{- / Step 4: Handle nested structure - replace ][with],[ (sibling arrays) / -}} {{- $toc = $toc | replaceRE "\]\[" "],[" -}} {{- / Step 4b: Handle nested structure - replace "[ with ,"children":[ (child arrays) / -}} {{- $toc = $toc | replaceRE ""\[" "","children":[" -}} {{- / Step 5: Add commas between sibling objects - replace }{ with },{ / -}} {{- $toc = $toc | replaceRE "\}\{" "},{" -}} {{- / Step 5b: Fix multiple root-level arrays: replace ],[ at root with just concatenated items / -}} {{- / First wrap in sections / -}} {{- $toc = print "{"sections":" $toc "}" -}} {{- / Step 6: Clean up any remaining malformed patterns / -}} {{- / Fix multiple root-level elements: ]},[ becomes },{ / -}} {{- $toc = $toc | replaceRE "\]\},\[" "]},{" -}} {{- / Fix nested arrays at root: [[...]] pattern / -}} {{- $toc = $toc | replaceRE ""sections":\[\[" ""sections":[" -}} {{- / Fix ]},{ pattern - close sections and start new object - flatten into array / -}} {{- $toc = $toc | replaceRE "\]\},\{" "],{" -}} {{- / Fix closing ]] patterns / -}} {{- $toc = $toc | replaceRE "\]\],\}" "]}" -}} {{- $toc = $toc | replaceRE "\]\]\}" "]}" -}} {{- / Flatten sibling arrays: ],[{ becomes ,{ / -}} {{- $toc = $toc | replaceRE "\],\[\{" ",{" -}} {{- $toc = $toc | replaceRE "\}\],\[\{" "},{" -}} {{- / Remove any leftover empty braces or malformed sequences / -}} {{- $toc = $toc | replaceRE "\[\}" "[]" -}} {{- $toc = $toc | replaceRE "\{\]" "[]" -}} {{- $toc = $toc | replaceRE ",\}" "}" -}} {{- $toc = $toc | replaceRE ",\]" "]" -}} {{- / Fix empty children followed by sibling: "children":[],{ becomes "children":[]},{ / -}} {{- $toc = $toc | replaceRE ""children":\[\],\{" ""children":[]},{" -}} {{- / Fix: [,{ pattern (malformed array start) / -}} {{- $toc = $toc | replaceRE "\[,\{" "[{" -}} {{- / Step 7: Remove all newlines and extra whitespace for clean output / -}} {{- $toc = $toc | replaceRE "\n\s" "" -}} {{- /* Validation: Check for common invalid patterns and fallback to empty if found / -}} {{- / Check for: HTML tags, double brackets, malformed arrays/objects, objects outside sections / -}} {{- $invalid := false -}} {{- if findRE "<" $toc -}}{{- $invalid = true -}}{{- end -}} {{- if findRE "\[\[" $toc -}}{{- $invalid = true -}}{{- end -}} {{- if findRE "\]\]" $toc -}}{{- $invalid = true -}}{{- end -}} {{- if findRE "\}\],\{" $toc -}}{{- $invalid = true -}}{{- end -}} {{- if findRE "\[\],\{" $toc -}}{{- $invalid = true -}}{{- end -}} {{- if findRE "\}\},\{" $toc -}}{{- $invalid = true -}}{{- end -}} {{- if findRE "\]\},\{" $toc -}}{{- $invalid = true -}}{{- end -}} {{- if $invalid -}} {"sections":[]} {{- else -}} {{- / Output the JSON - use safeHTML to prevent quote escaping */ -}} {{- $toc | safeHTML -}} {{- end -}} {{- end -}}
`