Skip to content

Commit 74a29c8

Browse files
committed
first viable definition for (optionally structured) Numpy array protobuf
0 parents  commit 74a29c8

File tree

4 files changed

+173
-0
lines changed

4 files changed

+173
-0
lines changed

.gitignore

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# test files stuff
2+
/figure/
3+
/out/
4+
5+
# mac os stuff
6+
~$*
7+
.DS_Store
8+
9+
# python stuff
10+
*.egg-info
11+
*.pyc
12+
*.ptx
13+
__pycache__/
14+
.ipynb_checkpoints/
15+
16+
# jetbrains stuff
17+
.idea
18+
*.iml
19+
20+
# cython builds
21+
cython**/*.c
22+
cython**/*.so
23+
cython**/build
24+

npbuf/protobuf/dtype.proto

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
syntax = "proto3";
2+
3+
/**
4+
* aiming for a minimal representation based on the numpy.dtype Python class.
5+
* See https://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html
6+
*/
7+
message Dtype {
8+
enum ByteOrder {
9+
LITTLE_ENDIAN = 0; // '<' order
10+
BIG_ENDIAN = 1; // '>' order
11+
NATIVE = 2; // '=' order
12+
NA = 3; // '|' order (not applicable)
13+
}
14+
15+
// the labels of the Type enum are all parsable by np.dtype()
16+
enum Type {
17+
float64 = 0; // Double precision float: sign bit, 11 bits exponent, 52 bits mantissa
18+
float32 = 1; // Single precision float: sign bit, 8 bits exponent, 23 bits mantissa
19+
float16 = 2; // Half precision float: sign bit, 5 bits exponent, 10 bits mantissa
20+
complex128 = 3; // Complex number, represented by two 64-bit floats (real and imaginary components)
21+
complex64 = 4; // Complex number, represented by two 32-bit floats (real and imaginary components)
22+
uint64 = 5; // Unsigned integer (0 to 18446744073709551615)
23+
uint32 = 6; // Unsigned integer (0 to 4294967295)
24+
uint16 = 7; // Unsigned integer (0 to 65535)
25+
uint8 = 8; // Unsigned integer (0 to 255)
26+
int64 = 9; // Integer (-9223372036854775808 to 9223372036854775807)
27+
int32 = 10; // Integer (-2147483648 to 2147483647)
28+
int16 = 11; // Integer (-32768 to 32767)
29+
int8 = 12; // Byte (-128 to 127)
30+
S128 = 13; // String, 1 byte for every character, 128 bytes total
31+
S64 = 14; // String, 1 byte for every character, 64 bytes total
32+
S32 = 15; // String, 1 byte for every character, 32 bytes total
33+
S16 = 16; // String, 1 byte for every character, 16 bytes total
34+
S8 = 17; // String, 1 byte for every character, 8 bytes total
35+
}
36+
37+
message SubDtype {
38+
Type type = 1;
39+
repeated int64 shape = 101;
40+
}
41+
42+
Type type = 1;
43+
ByteOrder byte_order = 101;
44+
45+
// these fields are only used for structured dtypes
46+
repeated string names = 201; // Included for ordering of fields
47+
map<string, Dtype> fields = 301; // Order of map is not guaranteed
48+
49+
// only used if this dtype describes a sub-array
50+
Dtype subdtype = 1001;
51+
}

npbuf/protobuf/ndarray.proto

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
syntax = "proto3";
2+
3+
import "dtype.proto";
4+
5+
/**
6+
* aiming for a minimal representation based the __array_interface__
7+
* see https://docs.scipy.org/doc/numpy/reference/arrays.interface.html
8+
*/
9+
message NDArray {
10+
repeated int64 shape = 1;
11+
Dtype dtype = 101;
12+
bytes data = 201;
13+
repeated int64 strides = 301;
14+
15+
// TODO: enable compression
16+
// optional bool compressed = X;
17+
}

setup.py

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env python
2+
3+
from Cython.Distutils import build_ext
4+
from distutils.errors import DistutilsExecError
5+
from glob import glob
6+
import os
7+
import setuptools
8+
from setuptools import find_packages, setup
9+
import shutil
10+
from subprocess import check_call
11+
12+
# discover the path to this setup.py file
13+
thisScriptDir = os.path.dirname(os.path.realpath(__file__))
14+
15+
class ProtocCommand(setuptools.Command):
16+
user_options = [
17+
('protoc=', 'p', "(default: 'protoc') path to protoc compiler. The default value can be set via the PROTOC environment variable"),
18+
('raises=', 'r', '(default: True) if true, raise error on protobuf compilation error'),
19+
]
20+
21+
def initialize_options(self):
22+
self.protoc = os.environ['PROTOC'] if 'PROTOC' in os.environ else 'protoc'
23+
self.raises = True
24+
25+
def finalize_options(self):
26+
print(f'searching for protoc executable at: {self.protoc}...')
27+
self.protoc = shutil.which(self.protoc)
28+
if self.protoc is None:
29+
if self.raises:
30+
raise DistutilsExecError('Could not find protoc executable.')
31+
else:
32+
print('Could not find protoc executable. Skipping protobuf compilation.')
33+
else:
34+
print(f'Found. Using protoc at: {self.protoc}')
35+
36+
def run(self):
37+
if self.protoc is None:
38+
# bail if self.raises is False and protoc isn't found
39+
return
40+
41+
protoDir = os.path.join(thisScriptDir, 'npbuf', 'protobuf')
42+
protoOutDir = os.path.join(thisScriptDir, 'npbuf', 'protobuf_py')
43+
protoSrcs = glob(os.path.join(protoDir, '*.proto'))
44+
45+
# clean up any existing compiled protobufs
46+
shutil.rmtree(protoOutDir, ignore_errors=True)
47+
# create a new module for the compiled protobufs, including an `__init__.py`
48+
os.mkdir(protoOutDir)
49+
with open(os.path.join(protoOutDir, '__init__.py'), 'w') as f: pass
50+
51+
# compile protobuf files to .py python modules
52+
protoc_python_cmd = [
53+
self.protoc,
54+
'--proto_path=%s' % protoDir,
55+
'--python_out=%s' % protoOutDir,
56+
]
57+
protoc_python_cmd.extend(protoSrcs)
58+
59+
check_call(protoc_python_cmd)
60+
61+
class DevelopCommand(setuptools.command.develop.develop):
62+
def run(self):
63+
# pass options to the protoc command and run it
64+
protocCommand = self.distribution.get_command_obj('protoc')
65+
protocCommand.database = self.database
66+
self.run_command('protoc')
67+
68+
# run the normal develop command
69+
super().run(self)
70+
71+
setup(
72+
author = 'Max Klein',
73+
cmdclass = {'build_ext': build_ext,
74+
'develop': DevelopCommand,
75+
'protoc': ProtocCommand
76+
},
77+
description = 'provides Python protobuf types that can be used to serialize/deserialize Numpy arrays',
78+
license = 'Apache License, Version 2.0',
79+
name = 'numpy-protobuf',
80+
packages = find_packages(where='.', exclude=('npbuf.protobuf'))
81+
)

0 commit comments

Comments
 (0)