Skip to content

Commit 39cb300

Browse files
authored
Preserve HTML entities in headers (#2122)
We now use DocAST more consistently instead of relying on regexes, streamlining the code and fixing several bugs along the way. Closes #2114. Closes #2120.
1 parent 7500815 commit 39cb300

29 files changed

+565
-717
lines changed

assets/css/content/general.css

+2-1
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,10 @@
205205
.content-inner .section-heading i {
206206
font-size: var(--icon-size);
207207
color: var(--mainLight);
208-
margin-top: 0.1em;
208+
top: -2px;
209209
margin-left: calc(-1 * (var(--icon-size) + var(--icon-spacing)));
210210
padding-right: var(--icon-spacing);
211+
position: relative;
211212
opacity: 0;
212213
}
213214

formatters/html/dist/html-elixir-J3PIVQVA.css

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

formatters/html/dist/html-elixir-M6JNNWMH.css

-6
This file was deleted.

formatters/html/dist/html-erlang-5OIFJN4X.css

-6
This file was deleted.

formatters/html/dist/html-erlang-ZK43ZOAC.css

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/ex_doc/doc_ast.ex

+163-101
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import Kernel, except: [to_string: 1]
2+
13
defmodule ExDoc.DocAST do
24
# General helpers for dealing with the documentation AST
35
# (which is the Markdown -> HTML AST).
@@ -31,35 +33,32 @@ defmodule ExDoc.DocAST do
3133
@doc """
3234
Transform AST into string.
3335
"""
34-
def to_string(ast, fun \\ fn _ast, string -> string end)
36+
def to_string(binary) do
37+
IO.iodata_to_binary(to_iodata(binary))
38+
end
3539

36-
def to_string(binary, _fun) when is_binary(binary) do
40+
defp to_iodata(binary) when is_binary(binary) do
3741
ExDoc.Utils.h(binary)
3842
end
3943

40-
def to_string(list, fun) when is_list(list) do
41-
result = Enum.map_join(list, "", &to_string(&1, fun))
42-
fun.(list, result)
44+
defp to_iodata(list) when is_list(list) do
45+
Enum.map(list, &to_iodata/1)
4346
end
4447

45-
def to_string({:comment, _attrs, inner, _meta} = ast, fun) do
46-
fun.(ast, "<!--#{inner}-->")
48+
defp to_iodata({:comment, _attrs, inner, _meta}) do
49+
["<!--", inner, "-->"]
4750
end
4851

49-
def to_string({tag, attrs, _inner, _meta} = ast, fun) when tag in @void_elements do
50-
result = "<#{tag}#{ast_attributes_to_string(attrs)}/>"
51-
fun.(ast, result)
52+
defp to_iodata({tag, attrs, _inner, _meta}) when tag in @void_elements do
53+
"<#{tag}#{ast_attributes_to_string(attrs)}/>"
5254
end
5355

54-
def to_string({tag, attrs, inner, %{verbatim: true}} = ast, fun) do
55-
inner = Enum.join(inner, "")
56-
result = "<#{tag}#{ast_attributes_to_string(attrs)}>" <> inner <> "</#{tag}>"
57-
fun.(ast, result)
56+
defp to_iodata({tag, attrs, inner, %{verbatim: true}}) do
57+
["<#{tag}#{ast_attributes_to_string(attrs)}>", inner, "</#{tag}>"]
5858
end
5959

60-
def to_string({tag, attrs, inner, _meta} = ast, fun) do
61-
result = "<#{tag}#{ast_attributes_to_string(attrs)}>" <> to_string(inner, fun) <> "</#{tag}>"
62-
fun.(ast, result)
60+
defp to_iodata({tag, attrs, inner, _meta}) do
61+
["<#{tag}#{ast_attributes_to_string(attrs)}>", to_iodata(inner), "</#{tag}>"]
6362
end
6463

6564
defp ast_attributes_to_string(attrs) do
@@ -86,17 +85,17 @@ defmodule ExDoc.DocAST do
8685
case content do
8786
# if we already have <pre><code>...</code></pre>, carry on
8887
[{:code, _, _}] ->
89-
{:pre, attrs, parse_erl_ast(content), %{}}
88+
{:pre, attrs, parse_erl_ast(List.wrap(content)), %{}}
9089

9190
# otherwise, turn <pre>...</pre> into <pre><code>...</code></pre>
9291
_ ->
93-
content = [{:code, [], parse_erl_ast(content), %{}}]
92+
content = [{:code, [], parse_erl_ast(List.wrap(content)), %{}}]
9493
{:pre, attrs, content, %{}}
9594
end
9695
end
9796

9897
defp parse_erl_ast({tag, attrs, content}) when is_atom(tag) do
99-
{tag, attrs, parse_erl_ast(content), %{}}
98+
{tag, attrs, parse_erl_ast(List.wrap(content)), %{}}
10099
end
101100

102101
@doc """
@@ -110,6 +109,49 @@ defmodule ExDoc.DocAST do
110109
def extract_title([{:h1, _attrs, inner, _meta} | ast]), do: {:ok, inner, ast}
111110
def extract_title(_ast), do: :error
112111

112+
@doc """
113+
Extracts the headers which have anchors (aka ids) in them.
114+
"""
115+
def extract_headers_with_ids(ast, headers) do
116+
ast
117+
|> reduce_tags([], fn {tag, attrs, inner, _}, acc ->
118+
with true <- tag in headers,
119+
id = Keyword.get(attrs, :id, ""),
120+
text = ExDoc.DocAST.text(inner),
121+
true <- id != "" and text != "" do
122+
[{tag, text, id} | acc]
123+
else
124+
_ -> acc
125+
end
126+
end)
127+
|> Enum.reverse()
128+
end
129+
130+
@doc """
131+
Adds an id attribute to the given headers.
132+
133+
A prefix for the id attribute can be given,
134+
which is automatically URL encoded to avoid
135+
issues.
136+
"""
137+
def add_ids_to_headers(doc_ast, headers, prefix \\ "") do
138+
prefix = URI.encode(prefix)
139+
140+
doc_ast
141+
|> map_reduce_tags(%{}, fn {tag, attrs, inner, meta} = ast, seen ->
142+
if tag in headers and not Keyword.has_key?(attrs, :id) do
143+
possible_id = inner |> text() |> ExDoc.Utils.text_to_id()
144+
id_count = Map.get(seen, possible_id, 0)
145+
partial_id = if id_count >= 1, do: "#{possible_id}-#{id_count}", else: possible_id
146+
seen = Map.put(seen, possible_id, id_count + 1)
147+
{{tag, [id: prefix <> partial_id] ++ attrs, inner, meta}, seen}
148+
else
149+
{ast, seen}
150+
end
151+
end)
152+
|> elem(0)
153+
end
154+
113155
@doc """
114156
Compute a synopsis from a document by looking at its first paragraph.
115157
"""
@@ -132,41 +174,43 @@ defmodule ExDoc.DocAST do
132174
@doc """
133175
Remove ids from elements.
134176
"""
135-
def remove_ids({tag, attrs, inner, meta}),
136-
do: {tag, Keyword.delete(attrs, :href), remove_ids(inner), meta}
137-
138-
def remove_ids(list) when is_list(list),
139-
do: Enum.map(list, &remove_ids/1)
140-
141-
def remove_ids(other),
142-
do: other
177+
def remove_ids(ast) do
178+
map_tags(ast, fn {tag, attrs, inner, meta} ->
179+
{tag, Keyword.delete(attrs, :href), inner, meta}
180+
end)
181+
end
143182

144183
@doc """
145184
Returns text content from the given AST.
146185
"""
147-
def text(ast) do
186+
def text(ast, joiner \\ "") do
148187
ast
149-
|> do_text()
188+
|> do_text(joiner)
150189
|> IO.iodata_to_binary()
151190
|> String.trim()
152191
end
153192

154-
defp do_text(ast) when is_list(ast), do: Enum.map(ast, &do_text/1)
155-
defp do_text(ast) when is_binary(ast), do: ast
156-
defp do_text({_tag, _attr, ast, _meta}), do: text(ast)
193+
defp do_text(ast, joiner) when is_list(ast),
194+
do: Enum.map_intersperse(ast, joiner, &do_text(&1, joiner))
195+
196+
defp do_text(ast, _joiner) when is_binary(ast),
197+
do: ast
198+
199+
defp do_text({_tag, _attr, ast, _meta}, joiner),
200+
do: do_text(ast, joiner)
157201

158202
@doc """
159-
Wraps a list of HTML nodes into `<section>` tags whenever `matcher` returns true.
203+
Wraps a list of HTML nodes into `<section>` tags whenever `headers` returns true.
160204
"""
161-
def sectionize(list, matcher), do: sectionize(list, matcher, [])
205+
def sectionize(list, headers), do: sectionize(list, headers, [])
162206

163-
defp sectionize(list, matcher, acc) do
164-
case pivot(list, acc, matcher) do
207+
defp sectionize(list, headers, acc) do
208+
case pivot(list, acc, headers) do
165209
{acc, {header_tag, header_attrs, _, _} = header, rest} ->
166210
{inner, rest} = Enum.split_while(rest, &not_tag?(&1, header_tag))
167211
class = String.trim_trailing("#{header_tag} #{header_attrs[:class]}")
168-
section = {:section, [class: class], [header | sectionize(inner, matcher, [])], %{}}
169-
sectionize(rest, matcher, [section | acc])
212+
section = {:section, [class: class], [header | sectionize(inner, headers, [])], %{}}
213+
sectionize(rest, headers, [section | acc])
170214

171215
acc ->
172216
acc
@@ -176,53 +220,63 @@ defmodule ExDoc.DocAST do
176220
defp not_tag?({tag, _, _, _}, tag), do: false
177221
defp not_tag?(_, _tag), do: true
178222

179-
defp pivot([head | tail], acc, fun) do
180-
case fun.(head) do
181-
true -> {acc, head, tail}
182-
false -> pivot(tail, [head | acc], fun)
223+
defp pivot([{tag, _, _, _} = head | tail], acc, headers) do
224+
if tag in headers do
225+
{acc, head, tail}
226+
else
227+
pivot(tail, [head | acc], headers)
183228
end
184229
end
185230

186-
defp pivot([], acc, _fun), do: Enum.reverse(acc)
231+
defp pivot([head | tail], acc, headers), do: pivot(tail, [head | acc], headers)
232+
defp pivot([], acc, _headers), do: Enum.reverse(acc)
187233

188234
@doc """
189-
Highlights a DocAST converted to string.
235+
Highlights the code blocks in the AST.
190236
"""
191-
# TODO: Could this be done over the AST instead?
192-
def highlight(html, language, opts \\ []) do
237+
def highlight(ast, language, opts \\ []) do
193238
highlight_info = language.highlight_info()
194239

195-
## Html cannot be parsed with regex, but we try our best...
196-
Regex.replace(
197-
~r/<pre(\s[^>]*)?><code(?:\s+class="([^"\s]*)")?>([^<]*)<\/code><\/pre>/,
198-
html,
199-
&highlight_code_block(&1, &2, &3, &4, highlight_info, opts)
200-
)
201-
end
202-
203-
defp highlight_code_block(full_block, pre_attr, lang, code, highlight_info, outer_opts) do
204-
case pick_language_and_lexer(lang, highlight_info, code) do
205-
{_language, nil, _opts} ->
206-
full_block
207-
208-
{lang, lexer, opts} ->
209-
try do
210-
render_code(pre_attr, lang, lexer, opts, code, outer_opts)
211-
rescue
212-
exception ->
213-
ExDoc.Utils.warn(
214-
[
215-
"crashed while highlighting #{lang} snippet:\n\n",
216-
full_block,
217-
"\n\n",
218-
Exception.format_banner(:error, exception, __STACKTRACE__)
219-
],
220-
__STACKTRACE__
221-
)
222-
223-
full_block
240+
map_tags(ast, fn
241+
{:pre, pre_attrs, [{:code, code_attrs, [code], code_meta}], pre_meta} = ast
242+
when is_binary(code) ->
243+
{lang, code_attrs} = Keyword.pop(code_attrs, :class, "")
244+
245+
case pick_language_and_lexer(lang, highlight_info, code) do
246+
{_lang, nil, _lexer_opts} ->
247+
ast
248+
249+
{lang, lexer, lexer_opts} ->
250+
try do
251+
Makeup.highlight_inner_html(code,
252+
lexer: lexer,
253+
lexer_options: lexer_opts,
254+
formatter_options: opts
255+
)
256+
rescue
257+
exception ->
258+
ExDoc.Utils.warn(
259+
[
260+
"crashed while highlighting #{lang} snippet:\n\n",
261+
ExDoc.DocAST.to_string(ast),
262+
"\n\n",
263+
Exception.format_banner(:error, exception, __STACKTRACE__)
264+
],
265+
__STACKTRACE__
266+
)
267+
268+
ast
269+
else
270+
highlighted ->
271+
code_attrs = [class: "makeup #{lang}", translate: "no"] ++ code_attrs
272+
code_meta = Map.put(code_meta, :verbatim, true)
273+
{:pre, pre_attrs, [{:code, code_attrs, [highlighted], code_meta}], pre_meta}
274+
end
224275
end
225-
end
276+
277+
ast ->
278+
ast
279+
end)
226280
end
227281

228282
defp pick_language_and_lexer("", _highlight_info, "$ " <> _) do
@@ -244,35 +298,43 @@ defmodule ExDoc.DocAST do
244298
end
245299
end
246300

247-
defp render_code(pre_attr, lang, lexer, lexer_opts, code, opts) do
248-
highlight_tag = Keyword.get(opts, :highlight_tag, "span")
301+
## Traversal helpers
249302

250-
highlighted =
251-
code
252-
|> unescape_html()
253-
|> IO.iodata_to_binary()
254-
|> Makeup.highlight_inner_html(
255-
lexer: lexer,
256-
lexer_options: lexer_opts,
257-
formatter_options: [highlight_tag: highlight_tag]
258-
)
303+
@doc """
304+
Maps the tags in the AST, first mapping children tags, then the tag itself.
305+
"""
306+
def map_tags({tag, attrs, inner, meta}, fun),
307+
do: fun.({tag, attrs, Enum.map(inner, &map_tags(&1, fun)), meta})
259308

260-
~s(<pre#{pre_attr}><code class="makeup #{lang}" translate="no">#{highlighted}</code></pre>)
261-
end
309+
def map_tags(list, fun) when is_list(list),
310+
do: Enum.map(list, &map_tags(&1, fun))
262311

263-
entities = [{"&amp;", ?&}, {"&lt;", ?<}, {"&gt;", ?>}, {"&quot;", ?"}, {"&#39;", ?'}]
312+
def map_tags(other, _fun),
313+
do: other
264314

265-
for {encoded, decoded} <- entities do
266-
defp unescape_html(unquote(encoded) <> rest) do
267-
[unquote(decoded) | unescape_html(rest)]
268-
end
269-
end
315+
@doc """
316+
Reduces the tags in the AST, first reducing children tags, then the tag itself.
317+
"""
318+
def reduce_tags({tag, attrs, inner, meta}, acc, fun),
319+
do: fun.({tag, attrs, inner, meta}, Enum.reduce(inner, acc, &reduce_tags(&1, &2, fun)))
270320

271-
defp unescape_html(<<c, rest::binary>>) do
272-
[c | unescape_html(rest)]
273-
end
321+
def reduce_tags(list, acc, fun) when is_list(list),
322+
do: Enum.reduce(list, acc, &reduce_tags(&1, &2, fun))
323+
324+
def reduce_tags(_other, acc, _fun),
325+
do: acc
274326

275-
defp unescape_html(<<>>) do
276-
[]
327+
@doc """
328+
Map-reduces the tags in the AST, first mapping children tags, then the tag itself.
329+
"""
330+
def map_reduce_tags({tag, attrs, inner, meta}, acc, fun) do
331+
{inner, acc} = Enum.map_reduce(inner, acc, &map_reduce_tags(&1, &2, fun))
332+
fun.({tag, attrs, inner, meta}, acc)
277333
end
334+
335+
def map_reduce_tags(list, acc, fun) when is_list(list),
336+
do: Enum.map_reduce(list, acc, &map_reduce_tags(&1, &2, fun))
337+
338+
def map_reduce_tags(other, acc, _fun),
339+
do: {other, acc}
278340
end

lib/ex_doc/formatter/epub.ex

+4-6
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,10 @@ defmodule ExDoc.Formatter.EPUB do
6262

6363
defp generate_extras(config) do
6464
for {_title, extras} <- config.extras,
65-
extra_config <- extras,
66-
not is_map_key(extra_config, :url) do
67-
%{id: id, title: title, title_content: title_content, content: content} = extra_config
68-
69-
output = "#{config.output}/OEBPS/#{id}.xhtml"
70-
html = Templates.extra_template(config, title, title_content, content)
65+
node <- extras,
66+
not is_map_key(node, :url) and node.type != :cheatmd do
67+
output = "#{config.output}/OEBPS/#{node.id}.xhtml"
68+
html = Templates.extra_template(config, node)
7169

7270
if File.regular?(output) do
7371
Utils.warn("file #{Path.relative_to_cwd(output)} already exists", [])

0 commit comments

Comments
 (0)