Skip to content

Commit 91cfb6d

Browse files
committed
Add tagfilter option
1 parent 4f3cf32 commit 91cfb6d

File tree

5 files changed

+131
-3
lines changed

5 files changed

+131
-3
lines changed

index.d.ts

+22
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,26 @@ export interface Options {
1818
* be hast again and will be handled.
1919
*/
2020
passThrough?: Array<string> | null | undefined
21+
22+
/**
23+
* Whether to disallow irregular tags in `raw` nodes according to GFM
24+
* tagfilter
25+
* (default: `false`).
26+
*
27+
* This affects the following tags,
28+
* grouped by their kind:
29+
*
30+
* * `RAWTEXT`: `iframe`, `noembed`, `noframes`, `style`, `xmp`
31+
* * `RCDATA`: `textarea`, `title`
32+
* * `SCRIPT_DATA`: `script`
33+
* * `PLAINTEXT`: `plaintext`
34+
*
35+
* When you know that you do not want authors to write these tags,
36+
* you can enable this option to prevent their use from running amok.
37+
*
38+
* See:
39+
* [*Disallowed Raw HTML* in
40+
* `cmark-gfm`](https://github.github.com/gfm/#disallowed-raw-html-extension-).
41+
*/
42+
tagfilter?: boolean | null | undefined
2143
}

lib/index.js

+12-2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ import {visit} from 'unist-util-visit'
3939
import {webNamespaces} from 'web-namespaces'
4040
import {zwitch} from 'zwitch'
4141

42+
const gfmTagfilterExpression =
43+
/<(\/?)(iframe|noembed|noframes|plaintext|script|style|textarea|title|xmp)(?=[\t\n\f\r />])/gi
44+
4245
// Node types associated with MDX.
4346
// <https://github.com/mdx-js/mdx/blob/8a56312/packages/mdx/lib/node-types.js>
4447
const knownMdxNames = new Set([
@@ -325,7 +328,13 @@ function handleRaw(node, state) {
325328

326329
// Now pass `node.value`.
327330
setPoint(state, pointStart(node))
328-
state.parser.tokenizer.write(node.value, false)
331+
332+
state.parser.tokenizer.write(
333+
state.options.tagfilter
334+
? node.value.replace(gfmTagfilterExpression, '&lt;$1$2')
335+
: node.value,
336+
false
337+
)
329338
// @ts-expect-error: private.
330339
state.parser.tokenizer._runParsingLoop()
331340

@@ -596,10 +605,11 @@ function endTag(node, state) {
596605
tagName === state.parser.tokenizer.lastStartTagName &&
597606
// `<textarea>` and `<title>`
598607
(state.parser.tokenizer.state === TokenizerMode.RCDATA ||
599-
// `<iframe>`, `<noembed>`, `<style>`, `<xmp>`
608+
// `<iframe>`, `<noembed>`, `<noframes>`, `<style>`, `<xmp>`
600609
state.parser.tokenizer.state === TokenizerMode.RAWTEXT ||
601610
// `<script>`
602611
state.parser.tokenizer.state === TokenizerMode.SCRIPT_DATA)
612+
// Note: `<plaintext>` not needed, as it’s the last element.
603613
) {
604614
state.parser.tokenizer.state = TokenizerMode.DATA
605615
}

package.json

+4-1
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,9 @@
115115
}
116116
}
117117
],
118-
"prettier": true
118+
"prettier": true,
119+
"rules": {
120+
"unicorn/prefer-string-replace-all": "off"
121+
}
119122
}
120123
}

readme.md

+18
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,24 @@ Configuration.
131131

132132
If the passed through nodes have children, those children are expected to
133133
be hast again and will be handled.
134+
* `tagfilter?` (`boolean | null | undefined`)
135+
136+
Whether to disallow irregular tags in `raw` nodes according to GFM
137+
tagfilter
138+
(default: `false`).
139+
140+
This affects the following tags,
141+
grouped by their kind:
142+
* `RAWTEXT`: `iframe`, `noembed`, `noframes`, `style`, `xmp`
143+
* `RCDATA`: `textarea`, `title`
144+
* `SCRIPT_DATA`: `script`
145+
* `PLAINTEXT`: `plaintext`
146+
When you know that you do not want authors to write these tags,
147+
you can enable this option to prevent their use from running amok.
148+
149+
See:
150+
[*Disallowed Raw HTML* in
151+
`cmark-gfm`](https://github.github.com/gfm/#disallowed-raw-html-extension-).
134152

135153
### `raw(tree, options)`
136154

test.js

+75
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,81 @@ test('raw', async function (t) {
687687
}
688688
)
689689
})
690+
691+
await t.test('tagfilter', async function (t) {
692+
await t.test('should filter tags', async function () {
693+
const result = raw(
694+
{
695+
type: 'root',
696+
children: [
697+
h('p', [{type: 'raw', value: '<strong> <title> <style> <em>'}]),
698+
{type: 'text', value: '\n'},
699+
{
700+
type: 'raw',
701+
value:
702+
'<blockquote>\n <xmp> is disallowed. <XMP> is also disallowed.\n</blockquote>'
703+
}
704+
]
705+
},
706+
{tagfilter: true}
707+
)
708+
709+
assert.deepEqual(result, {
710+
type: 'root',
711+
children: [
712+
{
713+
type: 'element',
714+
tagName: 'p',
715+
properties: {},
716+
children: [
717+
{
718+
type: 'element',
719+
tagName: 'strong',
720+
properties: {},
721+
children: [
722+
{type: 'text', value: ' <title> <style> '},
723+
{
724+
type: 'element',
725+
tagName: 'em',
726+
properties: {},
727+
children: []
728+
}
729+
]
730+
}
731+
]
732+
},
733+
{
734+
type: 'element',
735+
tagName: 'strong',
736+
properties: {},
737+
children: [
738+
{
739+
type: 'element',
740+
tagName: 'em',
741+
properties: {},
742+
children: [
743+
{type: 'text', value: '\n'},
744+
{
745+
type: 'element',
746+
tagName: 'blockquote',
747+
properties: {},
748+
children: [
749+
{
750+
type: 'text',
751+
value:
752+
'\n <xmp> is disallowed. <XMP> is also disallowed.\n'
753+
}
754+
]
755+
}
756+
]
757+
}
758+
]
759+
}
760+
],
761+
data: {quirksMode: false}
762+
})
763+
})
764+
})
690765
})
691766

692767
test('integration', async function (t) {

0 commit comments

Comments
 (0)