Skip to content

Commit 5276abe

Browse files
committed
New decodeEntity() method to speed up entity decoding.
1 parent 17592ae commit 5276abe

File tree

5 files changed

+102
-2
lines changed

5 files changed

+102
-2
lines changed

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
2.2.0
2+
-----
3+
4+
* A fast `decodeEntity()` method to decode a single HTML entity.
5+
16
2.1.1
27
-----
38

README.md

+26
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,32 @@ Options:
8585
* `attribute` emulates behavior of browser when parsing tag attributes: entities without semicolon are replaced when not followed by equality sign `=`.
8686
* `strict` ignores entities without semicolon.
8787

88+
### decodeEntity(text, options)
89+
90+
Decodes a single HTML entity. Unknown entitiy is left as is.
91+
92+
```js
93+
import {decodeEntity} from 'html-entities';
94+
95+
decodeEntity('<');
96+
// -> '<'
97+
98+
decodeEntity('&copy;', {level: 'html5'});
99+
// -> '©'
100+
101+
decodeEntity('&copy;', {level: 'xml'});
102+
// -> '&copy;'
103+
```
104+
105+
Options:
106+
107+
#### level
108+
109+
* `all` alias to `html5` (default).
110+
* `html5` uses `HTML5` named references.
111+
* `html4` uses `HTML4` named references.
112+
* `xml` uses `XML` named references.
113+
88114
Performance
89115
-----------
90116

package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
},
4141
"repository": {
4242
"type": "git",
43-
"url": "https://github.com/mdevils/node-html-entities.git"
43+
"url": "https://github.com/mdevils/html-entities.git"
4444
},
4545
"main": "./lib/index.js",
4646
"typings": "./lib/index.d.ts",

src/index.ts

+31
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,37 @@ const decodeRegExps: Record<Level, Record<DecodeScope, RegExp>> = {
118118
const fromCharCode = String.fromCharCode;
119119
const outOfBoundsChar = fromCharCode(65533);
120120

121+
const defaultDecodeEntityOptions: CommonOptions = {
122+
level: 'all'
123+
};
124+
125+
export function decodeEntity(
126+
entity: string | undefined | null,
127+
{level = 'all'}: CommonOptions = defaultDecodeEntityOptions
128+
): string {
129+
if (!entity) {
130+
return '';
131+
}
132+
133+
const references = allNamedReferences[level].entities;
134+
const resultByReference = references[entity];
135+
if (resultByReference) {
136+
return resultByReference;
137+
}
138+
if (entity[0] === '&' && entity[1] === '#') {
139+
const secondChar = entity[2];
140+
const code =
141+
secondChar == 'x' || secondChar == 'X' ? parseInt(entity.substr(3), 16) : parseInt(entity.substr(2));
142+
143+
return code >= 0x10ffff
144+
? outOfBoundsChar
145+
: code > 65535
146+
? fromCodePoint(code)
147+
: fromCharCode(numericUnicodeMap[code] || code);
148+
}
149+
return entity;
150+
}
151+
121152
export function decode(
122153
text: string | undefined | null,
123154
{level = 'all', scope = level === 'xml' ? 'strict' : 'body'}: DecodeOptions = defaultDecodeOptions

test/index.test.ts

+39-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import {expect} from 'chai';
22
import * as HE from '../src';
33

44
// eslint-disable-next-line @typescript-eslint/no-var-requires
5-
const {encode, decode} = require(process.env.TEST_LIB ? '../lib' : '../src') as typeof HE;
5+
const {encode, decode, decodeEntity} = require(process.env.TEST_LIB ? '../lib' : '../src') as typeof HE;
66

77
describe('encode()', () => {
88
it('should handle undefined', () => {
@@ -104,3 +104,41 @@ describe('decode()', () => {
104104
});
105105
});
106106
});
107+
108+
describe('decodeEntity()', () => {
109+
it('should handle undefined', () => {
110+
expect(decodeEntity(undefined)).to.equal('');
111+
});
112+
it('should handle null', () => {
113+
expect(decodeEntity(null)).to.equal('');
114+
});
115+
it('should handle empty string', () => {
116+
expect(decodeEntity('')).to.equal('');
117+
});
118+
it('should handle invalid numeric entities', () => {
119+
expect(decodeEntity('&#2013266066;')).to.equal(String.fromCharCode(65533));
120+
});
121+
it('should decode numeric entities without semicolon', () => {
122+
expect(decodeEntity('&#34')).to.equal('"');
123+
});
124+
it('should decode incomplete named entities', () => {
125+
expect(decodeEntity('&uuml')).to.equal('ü');
126+
});
127+
it('should decode proper named entities', () => {
128+
expect(decodeEntity('&amp;')).to.equal('&');
129+
});
130+
it('should decode emoji', () => {
131+
expect(decodeEntity('&#128514;')).to.equal('😂');
132+
});
133+
describe('level', () => {
134+
it('should decode according to the level', () => {
135+
expect(decodeEntity('&rx;', {level: 'all'})).to.equal('℞');
136+
expect(decodeEntity('&rx;', {level: 'html5'})).to.equal('℞');
137+
expect(decodeEntity('&rx;', {level: 'html4'})).to.equal('&rx;');
138+
expect(decodeEntity('&copy;', {level: 'html4'})).to.equal('©');
139+
expect(decodeEntity('&rx;', {level: 'xml'})).to.equal('&rx;');
140+
expect(decodeEntity('&copy;', {level: 'xml'})).to.equal('&copy;');
141+
expect(decodeEntity('&lt;', {level: 'xml'})).to.equal('<');
142+
});
143+
});
144+
});

0 commit comments

Comments
 (0)