Skip to content

Commit f5f1ac8

Browse files
gh-112068: C API: Add support of nullable arguments in PyArg_Parse (GH-121303)
1 parent 8421b64 commit f5f1ac8

File tree

11 files changed

+319
-137
lines changed

11 files changed

+319
-137
lines changed

Doc/c-api/arg.rst

+15
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,18 @@ There are three ways strings and buffers can be converted to C:
113113
``z`` (:class:`str` or ``None``) [const char \*]
114114
Like ``s``, but the Python object may also be ``None``, in which case the C
115115
pointer is set to ``NULL``.
116+
It is the same as ``s?`` with the C pointer was initialized to ``NULL``.
116117

117118
``z*`` (:class:`str`, :term:`bytes-like object` or ``None``) [Py_buffer]
118119
Like ``s*``, but the Python object may also be ``None``, in which case the
119120
``buf`` member of the :c:type:`Py_buffer` structure is set to ``NULL``.
121+
It is the same as ``s*?`` with the ``buf`` member of the :c:type:`Py_buffer`
122+
structure was initialized to ``NULL``.
120123

121124
``z#`` (:class:`str`, read-only :term:`bytes-like object` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
122125
Like ``s#``, but the Python object may also be ``None``, in which case the C
123126
pointer is set to ``NULL``.
127+
It is the same as ``s#?`` with the C pointer was initialized to ``NULL``.
124128

125129
``y`` (read-only :term:`bytes-like object`) [const char \*]
126130
This format converts a bytes-like object to a C pointer to a
@@ -377,6 +381,17 @@ Other objects
377381
Non-tuple sequences are deprecated if *items* contains format units
378382
which store a borrowed buffer or a borrowed reference.
379383

384+
``unit?`` (anything or ``None``) [*matching-variable(s)*]
385+
``?`` modifies the behavior of the preceding format unit.
386+
The C variable(s) corresponding to that parameter should be initialized
387+
to their default value --- when the argument is ``None``,
388+
:c:func:`PyArg_ParseTuple` does not touch the contents of the corresponding
389+
C variable(s).
390+
If the argument is not ``None``, it is parsed according to the specified
391+
format unit.
392+
393+
.. versionadded:: next
394+
380395
A few other characters have a meaning in a format string. These may not occur
381396
inside nested parentheses. They are:
382397

Doc/whatsnew/3.14.rst

+5
Original file line numberDiff line numberDiff line change
@@ -1846,6 +1846,11 @@ New features
18461846
file.
18471847
(Contributed by Victor Stinner in :gh:`127350`.)
18481848

1849+
* Add support of nullable arguments in :c:func:`PyArg_ParseTuple` and
1850+
similar functions.
1851+
Adding ``?`` after any format unit makes ``None`` be accepted as a value.
1852+
(Contributed by Serhiy Storchaka in :gh:`112068`.)
1853+
18491854
* Add macros :c:func:`Py_PACK_VERSION` and :c:func:`Py_PACK_FULL_VERSION` for
18501855
bit-packing Python version numbers.
18511856
(Contributed by Petr Viktorin in :gh:`128629`.)

Lib/test/test_capi/test_getargs.py

+117
Original file line numberDiff line numberDiff line change
@@ -1387,6 +1387,123 @@ def test_nested_sequence(self):
13871387
"argument 1 must be sequence of length 1, not 0"):
13881388
parse(([],), {}, '(' + f + ')', ['a'])
13891389

1390+
def test_specific_type_errors(self):
1391+
parse = _testcapi.parse_tuple_and_keywords
1392+
1393+
def check(format, arg, expected, got='list'):
1394+
errmsg = f'must be {expected}, not {got}'
1395+
with self.assertRaisesRegex(TypeError, errmsg):
1396+
parse((arg,), {}, format, ['a'])
1397+
1398+
check('k', [], 'int')
1399+
check('k?', [], 'int or None')
1400+
check('K', [], 'int')
1401+
check('K?', [], 'int or None')
1402+
check('c', [], 'a byte string of length 1')
1403+
check('c?', [], 'a byte string of length 1 or None')
1404+
check('c', b'abc', 'a byte string of length 1',
1405+
'a bytes object of length 3')
1406+
check('c?', b'abc', 'a byte string of length 1 or None',
1407+
'a bytes object of length 3')
1408+
check('c', bytearray(b'abc'), 'a byte string of length 1',
1409+
'a bytearray object of length 3')
1410+
check('c?', bytearray(b'abc'), 'a byte string of length 1 or None',
1411+
'a bytearray object of length 3')
1412+
check('C', [], 'a unicode character')
1413+
check('C?', [], 'a unicode character or None')
1414+
check('C', 'abc', 'a unicode character',
1415+
'a string of length 3')
1416+
check('C?', 'abc', 'a unicode character or None',
1417+
'a string of length 3')
1418+
check('s', [], 'str')
1419+
check('s?', [], 'str or None')
1420+
check('z', [], 'str or None')
1421+
check('z?', [], 'str or None')
1422+
check('es', [], 'str')
1423+
check('es?', [], 'str or None')
1424+
check('es#', [], 'str')
1425+
check('es#?', [], 'str or None')
1426+
check('et', [], 'str, bytes or bytearray')
1427+
check('et?', [], 'str, bytes, bytearray or None')
1428+
check('et#', [], 'str, bytes or bytearray')
1429+
check('et#?', [], 'str, bytes, bytearray or None')
1430+
check('w*', [], 'read-write bytes-like object')
1431+
check('w*?', [], 'read-write bytes-like object or None')
1432+
check('S', [], 'bytes')
1433+
check('S?', [], 'bytes or None')
1434+
check('U', [], 'str')
1435+
check('U?', [], 'str or None')
1436+
check('Y', [], 'bytearray')
1437+
check('Y?', [], 'bytearray or None')
1438+
check('(OO)', 42, '2-item tuple', 'int')
1439+
check('(OO)?', 42, '2-item tuple or None', 'int')
1440+
check('(OO)', (1, 2, 3), 'tuple of length 2', '3')
1441+
1442+
def test_nullable(self):
1443+
parse = _testcapi.parse_tuple_and_keywords
1444+
1445+
def check(format, arg, allows_none=False):
1446+
# Because some format units (such as y*) require cleanup,
1447+
# we force the parsing code to perform the cleanup by adding
1448+
# an argument that always fails.
1449+
# By checking for an exception, we ensure that the parsing
1450+
# of the first argument was successful.
1451+
self.assertRaises(OverflowError, parse,
1452+
(arg, 256), {}, format + '?b', ['a', 'b'])
1453+
self.assertRaises(OverflowError, parse,
1454+
(None, 256), {}, format + '?b', ['a', 'b'])
1455+
self.assertRaises(OverflowError, parse,
1456+
(arg, 256), {}, format + 'b', ['a', 'b'])
1457+
self.assertRaises(OverflowError if allows_none else TypeError, parse,
1458+
(None, 256), {}, format + 'b', ['a', 'b'])
1459+
1460+
check('b', 42)
1461+
check('B', 42)
1462+
check('h', 42)
1463+
check('H', 42)
1464+
check('i', 42)
1465+
check('I', 42)
1466+
check('n', 42)
1467+
check('l', 42)
1468+
check('k', 42)
1469+
check('L', 42)
1470+
check('K', 42)
1471+
check('f', 2.5)
1472+
check('d', 2.5)
1473+
check('D', 2.5j)
1474+
check('c', b'a')
1475+
check('C', 'a')
1476+
check('p', True, allows_none=True)
1477+
check('y', b'buffer')
1478+
check('y*', b'buffer')
1479+
check('y#', b'buffer')
1480+
check('s', 'string')
1481+
check('s*', 'string')
1482+
check('s#', 'string')
1483+
check('z', 'string', allows_none=True)
1484+
check('z*', 'string', allows_none=True)
1485+
check('z#', 'string', allows_none=True)
1486+
check('w*', bytearray(b'buffer'))
1487+
check('U', 'string')
1488+
check('S', b'bytes')
1489+
check('Y', bytearray(b'bytearray'))
1490+
check('O', object, allows_none=True)
1491+
1492+
check('(OO)', (1, 2))
1493+
self.assertEqual(parse((((1, 2), 3),), {}, '((OO)?O)', ['a']), (1, 2, 3))
1494+
self.assertEqual(parse(((None, 3),), {}, '((OO)?O)', ['a']), (NULL, NULL, 3))
1495+
self.assertEqual(parse((((1, 2), 3),), {}, '((OO)O)', ['a']), (1, 2, 3))
1496+
self.assertRaises(TypeError, parse, ((None, 3),), {}, '((OO)O)', ['a'])
1497+
1498+
parse((None,), {}, 'es?', ['a'])
1499+
parse((None,), {}, 'es#?', ['a'])
1500+
parse((None,), {}, 'et?', ['a'])
1501+
parse((None,), {}, 'et#?', ['a'])
1502+
parse((None,), {}, 'O!?', ['a'])
1503+
parse((None,), {}, 'O&?', ['a'])
1504+
1505+
# TODO: More tests for es?, es#?, et?, et#?, O!, O&
1506+
13901507
@unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi')
13911508
def test_gh_119213(self):
13921509
rc, out, err = script_helper.assert_python_ok("-c", """if True:

Lib/test/test_mmap.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -732,7 +732,7 @@ def test_tagname(self):
732732
m2.close()
733733
m1.close()
734734

735-
with self.assertRaisesRegex(TypeError, 'tagname'):
735+
with self.assertRaisesRegex(TypeError, 'must be str or None'):
736736
mmap.mmap(-1, 8, tagname=1)
737737

738738
@cpython_only
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Add support of nullable arguments in :c:func:`PyArg_Parse` and similar
2+
functions. Adding ``?`` after any format unit makes ``None`` be accepted as
3+
a value.

Modules/_ctypes/_ctypes.c

+3-9
Original file line numberDiff line numberDiff line change
@@ -3848,9 +3848,7 @@ _validate_paramflags(ctypes_state *st, PyTypeObject *type, PyObject *paramflags)
38483848
PyObject *name = Py_None;
38493849
PyObject *defval;
38503850
PyObject *typ;
3851-
if (!PyArg_ParseTuple(item, "i|OO", &flag, &name, &defval) ||
3852-
!(name == Py_None || PyUnicode_Check(name)))
3853-
{
3851+
if (!PyArg_ParseTuple(item, "i|U?O", &flag, &name, &defval)) {
38543852
PyErr_SetString(PyExc_TypeError,
38553853
"paramflags must be a sequence of (int [,string [,value]]) tuples");
38563854
return 0;
@@ -3915,10 +3913,8 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds)
39153913
void *handle;
39163914
PyObject *paramflags = NULL;
39173915

3918-
if (!PyArg_ParseTuple(args, "O|O", &ftuple, &paramflags))
3916+
if (!PyArg_ParseTuple(args, "O|O?", &ftuple, &paramflags))
39193917
return NULL;
3920-
if (paramflags == Py_None)
3921-
paramflags = NULL;
39223918

39233919
ftuple = PySequence_Tuple(ftuple);
39243920
if (!ftuple)
@@ -4050,10 +4046,8 @@ PyCFuncPtr_FromVtblIndex(PyTypeObject *type, PyObject *args, PyObject *kwds)
40504046
GUID *iid = NULL;
40514047
Py_ssize_t iid_len = 0;
40524048

4053-
if (!PyArg_ParseTuple(args, "is|Oz#", &index, &name, &paramflags, &iid, &iid_len))
4049+
if (!PyArg_ParseTuple(args, "is|O?z#", &index, &name, &paramflags, &iid, &iid_len))
40544050
return NULL;
4055-
if (paramflags == Py_None)
4056-
paramflags = NULL;
40574051

40584052
ctypes_state *st = get_module_state_by_def(Py_TYPE(type));
40594053
if (!_validate_paramflags(st, type, paramflags)) {

Modules/_interpretersmodule.c

+3-6
Original file line numberDiff line numberDiff line change
@@ -1252,14 +1252,11 @@ interp_get_config(PyObject *self, PyObject *args, PyObject *kwds)
12521252
PyObject *idobj = NULL;
12531253
int restricted = 0;
12541254
if (!PyArg_ParseTupleAndKeywords(args, kwds,
1255-
"O|$p:get_config", kwlist,
1255+
"O?|$p:get_config", kwlist,
12561256
&idobj, &restricted))
12571257
{
12581258
return NULL;
12591259
}
1260-
if (idobj == Py_None) {
1261-
idobj = NULL;
1262-
}
12631260

12641261
int reqready = 0;
12651262
PyInterpreterState *interp = \
@@ -1376,14 +1373,14 @@ capture_exception(PyObject *self, PyObject *args, PyObject *kwds)
13761373
static char *kwlist[] = {"exc", NULL};
13771374
PyObject *exc_arg = NULL;
13781375
if (!PyArg_ParseTupleAndKeywords(args, kwds,
1379-
"|O:capture_exception", kwlist,
1376+
"|O?:capture_exception", kwlist,
13801377
&exc_arg))
13811378
{
13821379
return NULL;
13831380
}
13841381

13851382
PyObject *exc = exc_arg;
1386-
if (exc == NULL || exc == Py_None) {
1383+
if (exc == NULL) {
13871384
exc = PyErr_GetRaisedException();
13881385
if (exc == NULL) {
13891386
Py_RETURN_NONE;

Modules/_json.c

+3-10
Original file line numberDiff line numberDiff line change
@@ -1228,23 +1228,16 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
12281228
static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
12291229

12301230
PyEncoderObject *s;
1231-
PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1231+
PyObject *markers = Py_None, *defaultfn, *encoder, *indent, *key_separator;
12321232
PyObject *item_separator;
12331233
int sort_keys, skipkeys, allow_nan;
12341234

1235-
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
1236-
&markers, &defaultfn, &encoder, &indent,
1235+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!?OOOUUppp:make_encoder", kwlist,
1236+
&PyDict_Type, &markers, &defaultfn, &encoder, &indent,
12371237
&key_separator, &item_separator,
12381238
&sort_keys, &skipkeys, &allow_nan))
12391239
return NULL;
12401240

1241-
if (markers != Py_None && !PyDict_Check(markers)) {
1242-
PyErr_Format(PyExc_TypeError,
1243-
"make_encoder() argument 1 must be dict or None, "
1244-
"not %.200s", Py_TYPE(markers)->tp_name);
1245-
return NULL;
1246-
}
1247-
12481241
s = (PyEncoderObject *)type->tp_alloc(type, 0);
12491242
if (s == NULL)
12501243
return NULL;

Modules/_threadmodule.c

+5-13
Original file line numberDiff line numberDiff line change
@@ -651,12 +651,12 @@ PyThreadHandleObject_join(PyObject *op, PyObject *args)
651651
PyThreadHandleObject *self = PyThreadHandleObject_CAST(op);
652652

653653
PyObject *timeout_obj = NULL;
654-
if (!PyArg_ParseTuple(args, "|O:join", &timeout_obj)) {
654+
if (!PyArg_ParseTuple(args, "|O?:join", &timeout_obj)) {
655655
return NULL;
656656
}
657657

658658
PyTime_t timeout_ns = -1;
659-
if (timeout_obj != NULL && timeout_obj != Py_None) {
659+
if (timeout_obj != NULL) {
660660
if (_PyTime_FromSecondsObject(&timeout_ns, timeout_obj,
661661
_PyTime_ROUND_TIMEOUT) < 0) {
662662
return NULL;
@@ -1919,10 +1919,10 @@ thread_PyThread_start_joinable_thread(PyObject *module, PyObject *fargs,
19191919
PyObject *func = NULL;
19201920
int daemon = 1;
19211921
thread_module_state *state = get_thread_state(module);
1922-
PyObject *hobj = NULL;
1922+
PyObject *hobj = Py_None;
19231923
if (!PyArg_ParseTupleAndKeywords(fargs, fkwargs,
1924-
"O|Op:start_joinable_thread", keywords,
1925-
&func, &hobj, &daemon)) {
1924+
"O|O!?p:start_joinable_thread", keywords,
1925+
&func, state->thread_handle_type, &hobj, &daemon)) {
19261926
return NULL;
19271927
}
19281928

@@ -1932,14 +1932,6 @@ thread_PyThread_start_joinable_thread(PyObject *module, PyObject *fargs,
19321932
return NULL;
19331933
}
19341934

1935-
if (hobj == NULL) {
1936-
hobj = Py_None;
1937-
}
1938-
else if (hobj != Py_None && !Py_IS_TYPE(hobj, state->thread_handle_type)) {
1939-
PyErr_SetString(PyExc_TypeError, "'handle' must be a _ThreadHandle");
1940-
return NULL;
1941-
}
1942-
19431935
if (PySys_Audit("_thread.start_joinable_thread", "OiO", func, daemon,
19441936
hobj) < 0) {
19451937
return NULL;

Modules/mmapmodule.c

+4-11
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
#endif
2424

2525
#include <Python.h>
26-
#include "pycore_abstract.h" // _Py_convert_optional_to_ssize_t()
2726
#include "pycore_bytesobject.h" // _PyBytes_Find()
2827
#include "pycore_fileutils.h" // _Py_stat_struct
2928

@@ -516,7 +515,7 @@ mmap_read_method(PyObject *op, PyObject *args)
516515
mmap_object *self = mmap_object_CAST(op);
517516

518517
CHECK_VALID(NULL);
519-
if (!PyArg_ParseTuple(args, "|O&:read", _Py_convert_optional_to_ssize_t, &num_bytes))
518+
if (!PyArg_ParseTuple(args, "|n?:read", &num_bytes))
520519
return NULL;
521520
CHECK_VALID(NULL);
522521

@@ -1710,7 +1709,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
17101709
DWORD off_lo; /* lower 32 bits of offset */
17111710
DWORD size_hi; /* upper 32 bits of size */
17121711
DWORD size_lo; /* lower 32 bits of size */
1713-
PyObject *tagname = Py_None;
1712+
PyObject *tagname = NULL;
17141713
DWORD dwErr = 0;
17151714
int fileno;
17161715
HANDLE fh = 0;
@@ -1720,7 +1719,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
17201719
"tagname",
17211720
"access", "offset", NULL };
17221721

1723-
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|OiL", keywords,
1722+
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|U?iL", keywords,
17241723
&fileno, &map_size,
17251724
&tagname, &access, &offset)) {
17261725
return NULL;
@@ -1853,13 +1852,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
18531852
m_obj->weakreflist = NULL;
18541853
m_obj->exports = 0;
18551854
/* set the tag name */
1856-
if (!Py_IsNone(tagname)) {
1857-
if (!PyUnicode_Check(tagname)) {
1858-
Py_DECREF(m_obj);
1859-
return PyErr_Format(PyExc_TypeError, "expected str or None for "
1860-
"'tagname', not %.200s",
1861-
Py_TYPE(tagname)->tp_name);
1862-
}
1855+
if (tagname != NULL) {
18631856
m_obj->tagname = PyUnicode_AsWideCharString(tagname, NULL);
18641857
if (m_obj->tagname == NULL) {
18651858
Py_DECREF(m_obj);

0 commit comments

Comments
 (0)