Skip to content
This repository was archived by the owner on Sep 29, 2024. It is now read-only.

Commit 59c0983

Browse files
authored
Fix(ish) for unicode encoding in packet (#608)
* Fix-ish the unicode issues * removing unneded spaces * fixing documentation * fixed linting * fixing benchmarking of decode
1 parent 497cb12 commit 59c0983

File tree

4 files changed

+84
-12
lines changed

4 files changed

+84
-12
lines changed

engineio/payload/data_test.go

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,18 @@ var tests = []struct {
2020
{frame.String, packet.OPEN, []byte{}},
2121
},
2222
},
23-
{true, []byte{0x00, 0x01, 0x03, 0xff, '4', 'h', 'e', 'l', 'l', 'o', ' ', 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd},
23+
{true, []byte{0x00, 0x09, 0xff, '4', 'h', 'e', 'l', 'l', 'o', ' ', 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd},
2424
[]Packet{
2525
{frame.String, packet.MESSAGE, []byte("hello 你好")},
2626
},
2727
},
28-
{true, []byte{0x01, 0x01, 0x03, 0xff, 0x04, 'h', 'e', 'l', 'l', 'o', ' ', 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd}, []Packet{
28+
{true, []byte{0x01, 0x09, 0xff, 0x04, 'h', 'e', 'l', 'l', 'o', ' ', 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd}, []Packet{
2929
{frame.Binary, packet.MESSAGE, []byte("hello 你好")},
3030
},
3131
},
3232
{true, []byte{
3333
0x01, 0x07, 0xff, 0x04, 'h', 'e', 'l', 'l', 'o', '\n',
34-
0x00, 0x08, 0xff, '4', 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd, '\n',
34+
0x00, 0x04, 0xff, '4', 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd, '\n',
3535
0x00, 0x06, 0xff, '2', 'p', 'r', 'o', 'b', 'e',
3636
}, []Packet{
3737
{frame.Binary, packet.MESSAGE, []byte("hello\n")},
@@ -43,18 +43,45 @@ var tests = []struct {
4343
{frame.String, packet.OPEN, []byte{}},
4444
},
4545
},
46-
{false, []byte("13:4hello 你好"), []Packet{
46+
{false, []byte("9:4hello 你好"), []Packet{
4747
{frame.String, packet.MESSAGE, []byte("hello 你好")},
4848
},
4949
},
5050
{false, []byte("18:b4aGVsbG8g5L2g5aW9"), []Packet{
5151
{frame.Binary, packet.MESSAGE, []byte("hello 你好")},
5252
},
5353
},
54-
{false, []byte("10:b4aGVsbG8K8:4你好\n6:2probe"), []Packet{
54+
{false, []byte("10:b4aGVsbG8K4:4你好\n6:2probe"), []Packet{
5555
{frame.Binary, packet.MESSAGE, []byte("hello\n")},
5656
{frame.String, packet.MESSAGE, []byte("你好\n")},
5757
{frame.String, packet.PING, []byte("probe")},
5858
},
5959
},
60+
// ↓ is 3 bytes, JavaScript `.length` 1 See https://socket.io/docs/v4/engine-io-protocol/#from-v3-to-v4
61+
{false, []byte("6:412↓453:41↓"), []Packet{
62+
{frame.String, packet.MESSAGE, []byte("12↓45")},
63+
{frame.String, packet.MESSAGE, []byte("1↓")},
64+
},
65+
},
66+
// 🇩🇪 is 8 bytes, 2 unicode chars JavaScript `.length` 4
67+
{false, []byte("6:4hello6:4🇩🇪a5:41234"), []Packet{
68+
{frame.String, packet.MESSAGE, []byte("hello")},
69+
{frame.String, packet.MESSAGE, []byte("🇩🇪a")},
70+
{frame.String, packet.MESSAGE, []byte("1234")},
71+
},
72+
},
73+
// € is 3 bytes, JavaScript `.length` 1
74+
{false, []byte("2:4h3:4€a2:41"), []Packet{
75+
{frame.String, packet.MESSAGE, []byte("h")},
76+
{frame.String, packet.MESSAGE, []byte("€a")},
77+
{frame.String, packet.MESSAGE, []byte("1")},
78+
},
79+
},
80+
//👍 is 4 bytes, JavaScript `.length` 2
81+
{false, []byte("2:4h4:4👍a2:41"), []Packet{
82+
{frame.String, packet.MESSAGE, []byte("h")},
83+
{frame.String, packet.MESSAGE, []byte("👍a")},
84+
{frame.String, packet.MESSAGE, []byte("1")},
85+
},
86+
},
6087
}

engineio/payload/decoder.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,24 @@ func (d *decoder) Read(p []byte) (int, error) {
5353
if d.b64Reader != nil {
5454
return d.b64Reader.Read(p)
5555
}
56-
return d.limitReader.Read(p)
56+
dd, err := d.limitReader.Read(p)
57+
unicodeCount := 0
58+
for i := range p[:dd] {
59+
b := p[i]
60+
if b>>3 == 30 {
61+
// starts with 11110 4 byte unicode char, probably 2 length in JS
62+
unicodeCount = unicodeCount + 2
63+
} else if b>>4 == 14 {
64+
// starts with 1110 3 byte unicode char, probably 2 length in JS
65+
unicodeCount = unicodeCount + 2
66+
} else if b>>5 == 6 {
67+
// starts with 110 2 byte unicode char, , probably 1 length in JS
68+
unicodeCount = unicodeCount + 1
69+
}
70+
}
71+
72+
d.limitReader.N = d.limitReader.N + int64(unicodeCount)
73+
return dd, err
5774
}
5875

5976
func (d *decoder) Close() error {

engineio/payload/decoder_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ func TestDecoderNextReaderError(t *testing.T) {
9696

9797
func BenchmarkStringDecoder(b *testing.B) {
9898
feeder := fakeReaderFeeder{
99-
data: []byte("8:4你好\n6:2probe"),
99+
data: []byte("4:4你好\n6:2probe"),
100100
supportBinary: false,
101101
}
102102
d := decoder{
@@ -148,7 +148,7 @@ func BenchmarkBinaryDecoder(b *testing.B) {
148148
feeder := fakeReaderFeeder{
149149
data: []byte{
150150
0x01, 0x07, 0xff, 0x04, 'h', 'e', 'l', 'l', 'o', '\n',
151-
0x00, 0x08, 0xff, '4', 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd, '\n',
151+
0x00, 0x04, 0xff, '4', 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd, '\n',
152152
0x00, 0x06, 0xff, '2', 'p', 'r', 'o', 'b', 'e',
153153
},
154154
supportBinary: true,

engineio/payload/encoder.go

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bytes"
55
"encoding/base64"
66
"io"
7+
"unicode/utf8"
78

89
"github.com/googollee/go-socket.io/engineio/frame"
910
"github.com/googollee/go-socket.io/engineio/packet"
@@ -56,6 +57,7 @@ func (e *encoder) Write(p []byte) (int, error) {
5657
if e.b64Writer != nil {
5758
return e.b64Writer.Write(p)
5859
}
60+
// Need to scan here and add unicode chars to the list
5961
return e.frameCache.Write(p)
6062
}
6163

@@ -92,16 +94,16 @@ func (e *encoder) Close() error {
9294
}
9395

9496
func (e *encoder) writeTextHeader() error {
95-
l := int64(e.frameCache.Len() + 1) // length for packet type
96-
err := writeTextLen(l, &e.header)
97+
98+
err := writeTextLen(e.calcCodeUnitLength(), &e.header)
9799
if err == nil {
98100
err = e.header.WriteByte(e.pt.StringByte())
99101
}
100102
return err
101103
}
102104

103105
func (e *encoder) writeB64Header() error {
104-
l := int64(e.frameCache.Len() + 2) // length for 'b' and packet type
106+
l := int64(utf8.RuneCount(e.frameCache.Bytes()) + 2) // length for 'b' and packet type
105107
err := writeTextLen(l, &e.header)
106108
if err == nil {
107109
err = e.header.WriteByte('b')
@@ -112,8 +114,34 @@ func (e *encoder) writeB64Header() error {
112114
return err
113115
}
114116

117+
func (e *encoder) calcCodeUnitLength() int64 {
118+
var l int64 = 1
119+
var codeUnitSize int64
120+
bytes := e.frameCache.Bytes()
121+
for i := range bytes {
122+
b := bytes[i]
123+
if b>>3 == 30 {
124+
// starts with 11110 4 byte unicode char, probably 2 length in JS
125+
codeUnitSize = 2
126+
} else if b>>4 == 14 {
127+
// starts with 1110 3 byte unicode char, probably 1 length in JS
128+
codeUnitSize = 1
129+
} else if b>>5 == 6 {
130+
// starts with 110 2 byte unicode char, , probably 1 length in JS
131+
codeUnitSize = 1
132+
} else if b>>6 == 2 {
133+
// starts with 10 just unicode byte
134+
codeUnitSize = 0
135+
} else {
136+
codeUnitSize = 1
137+
}
138+
l = l + codeUnitSize
139+
}
140+
141+
return int64(l)
142+
}
115143
func (e *encoder) writeBinaryHeader() error {
116-
l := int64(e.frameCache.Len() + 1) // length for packet type
144+
l := int64(e.calcCodeUnitLength()) // length for packet type
117145
b := e.pt.StringByte()
118146
if e.ft == frame.Binary {
119147
b = e.pt.BinaryByte()

0 commit comments

Comments
 (0)