Skip to content

gh-119342: Fix OOM vulnerability in plistlib #119343

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions Lib/plistlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@
PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
globals().update(PlistFormat.__members__)

# Data larger than this will be read in chunks, to prevent extreme
# overallocation.
_SAFE_BUF_SIZE = 1 << 20

class UID:
def __init__(self, data):
Expand Down Expand Up @@ -508,12 +511,24 @@ def _get_size(self, tokenL):

return tokenL

def _read(self, size):
cursize = min(size, _SAFE_BUF_SIZE)
data = self._fp.read(cursize)
while True:
if len(data) != cursize:
raise InvalidFileException
if cursize == size:
return data
delta = min(cursize, size - cursize)
data += self._fp.read(delta)
cursize += delta

def _read_ints(self, n, size):
data = self._fp.read(size * n)
data = self._read(size * n)
if size in _BINARY_FORMAT:
return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data)
else:
if not size or len(data) != size * n:
if not size:
raise InvalidFileException()
return tuple(int.from_bytes(data[i: i + size], 'big')
for i in range(0, size * n, size))
Expand Down Expand Up @@ -573,22 +588,16 @@ def _read_object(self, ref):

elif tokenH == 0x40: # data
s = self._get_size(tokenL)
result = self._fp.read(s)
if len(result) != s:
raise InvalidFileException()
result = self._read(s)

elif tokenH == 0x50: # ascii string
s = self._get_size(tokenL)
data = self._fp.read(s)
if len(data) != s:
raise InvalidFileException()
data = self._read(s)
result = data.decode('ascii')

elif tokenH == 0x60: # unicode string
s = self._get_size(tokenL) * 2
data = self._fp.read(s)
if len(data) != s:
raise InvalidFileException()
data = self._read(s)
result = data.decode('utf-16be')

elif tokenH == 0x80: # UID
Expand Down
37 changes: 34 additions & 3 deletions Lib/test/test_plistlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -904,8 +904,7 @@ def test_dump_naive_datetime_with_aware_datetime_option(self):

class TestBinaryPlistlib(unittest.TestCase):

@staticmethod
def decode(*objects, offset_size=1, ref_size=1):
def build(self, *objects, offset_size=1, ref_size=1):
data = [b'bplist00']
offset = 8
offsets = []
Expand All @@ -917,7 +916,11 @@ def decode(*objects, offset_size=1, ref_size=1):
len(objects), 0, offset)
data.extend(offsets)
data.append(tail)
return plistlib.loads(b''.join(data), fmt=plistlib.FMT_BINARY)
return b''.join(data)

def decode(self, *objects, offset_size=1, ref_size=1):
data = self.build(*objects, offset_size=offset_size, ref_size=ref_size)
return plistlib.loads(data, fmt=plistlib.FMT_BINARY)

def test_nonstandard_refs_size(self):
# Issue #21538: Refs and offsets are 24-bit integers
Expand Down Expand Up @@ -1025,6 +1028,34 @@ def test_invalid_binary(self):
with self.assertRaises(plistlib.InvalidFileException):
plistlib.loads(b'bplist00' + data, fmt=plistlib.FMT_BINARY)

def test_truncated_large_data(self):
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
def check(data):
with open(os_helper.TESTFN, 'wb') as f:
f.write(data)
# buffered file
with open(os_helper.TESTFN, 'rb') as f:
with self.assertRaises(plistlib.InvalidFileException):
plistlib.load(f, fmt=plistlib.FMT_BINARY)
# unbuffered file
with open(os_helper.TESTFN, 'rb', buffering=0) as f:
with self.assertRaises(plistlib.InvalidFileException):
plistlib.load(f, fmt=plistlib.FMT_BINARY)
for w in range(20, 64):
s = 1 << w
# data
check(self.build(b'\x4f\x13' + s.to_bytes(8, 'big')))
# ascii string
check(self.build(b'\x5f\x13' + s.to_bytes(8, 'big')))
# unicode string
check(self.build(b'\x6f\x13' + s.to_bytes(8, 'big')))
# array
check(self.build(b'\xaf\x13' + s.to_bytes(8, 'big')))
# dict
check(self.build(b'\xdf\x13' + s.to_bytes(8, 'big')))
# number of objects
check(b'bplist00' + struct.pack('>6xBBQQQ', 1, 1, s, 0, 8))

def test_load_aware_datetime(self):
data = (b'bplist003B\x04>\xd0d\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00'
b'\x01\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix OOM vulnerability in :mod:`plistlib`, when reading a specially prepared
small Plist file could cause consuming an arbitrary amount of memory.
Loading