Skip to content

Partially load modules from compiler cache, handling possible race condition #440

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 39 additions & 12 deletions typed_python/SerializationContext.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,31 @@
import types
import traceback
import logging
import numpy
import pickle


_badModuleCache = set()


def pickledByStr(module_name: str, name: str) -> None:
"""Generate the object given the module_name and name.

This mimics pickle's behavior when given a string from __reduce__. The
string is interpreted as the name of a global variable, and pickle.whichmodules
is used to search the module namespace, generating module_name.

Note that 'name' might contain '.' inside of it, since its a 'local name'.
"""
module = importlib.import_module(module_name)

instance = module
for subName in name.split('.'):
instance = getattr(instance, subName)

return instance


def createFunctionWithLocalsAndGlobals(code, globals):
if globals is None:
globals = {}
Expand Down Expand Up @@ -708,26 +728,30 @@ def walkCodeObject(code):
return (createFunctionWithLocalsAndGlobals, args, representation)

if not isinstance(inst, type) and hasattr(type(inst), '__reduce_ex__'):
res = inst.__reduce_ex__(4)
if isinstance(inst, numpy.ufunc):
res = inst.__name__
else:
res = inst.__reduce_ex__(4)

# pickle supports a protocol where __reduce__ can return a string
# giving a global name. We'll already find that separately, so we
# don't want to handle it here. We ought to look at this in more detail
# however
# mimic pickle's behaviour when a string is received.
if isinstance(res, str):
return None
name_tuple = (inst, res)
module_name = pickle.whichmodule(*name_tuple)
res = (pickledByStr, (module_name, res,), pickledByStr)

return res

if not isinstance(inst, type) and hasattr(type(inst), '__reduce__'):
res = inst.__reduce__()
if isinstance(inst, numpy.ufunc):
res = inst.__name__
else:
res = inst.__reduce()

# pickle supports a protocol where __reduce__ can return a string
# giving a global name. We'll already find that separately, so we
# don't want to handle it here. We ought to look at this in more detail
# however
# mimic pickle's behaviour when a string is received.
if isinstance(res, str):
return None
name_tuple = (inst, res)
module_name = pickle.whichmodule(*name_tuple)
res = (pickledByStr, (module_name, res,), pickledByStr)

return res

Expand All @@ -736,6 +760,9 @@ def walkCodeObject(code):
def setInstanceStateFromRepresentation(
self, instance, representation=None, itemIt=None, kvPairIt=None, setStateFun=None
):
if representation is pickledByStr:
return

if representation is reconstructTypeFunctionType:
return

Expand Down
25 changes: 13 additions & 12 deletions typed_python/compiler/binary_shared_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@


class LoadedBinarySharedObject(LoadedModule):
def __init__(self, binarySharedObject, diskPath, functionPointers, globalVariableDefinitions):
super().__init__(functionPointers, globalVariableDefinitions)
def __init__(self, binarySharedObject, diskPath, functionPointers, serializedGlobalVariableDefinitions):
super().__init__(functionPointers, serializedGlobalVariableDefinitions)

self.binarySharedObject = binarySharedObject
self.diskPath = diskPath
Expand All @@ -36,30 +36,32 @@ def __init__(self, binarySharedObject, diskPath, functionPointers, globalVariabl
class BinarySharedObject:
"""Models a shared object library (.so) loadable on linux systems."""

def __init__(self, binaryForm, functionTypes, globalVariableDefinitions):
def __init__(self, binaryForm, functionTypes, serializedGlobalVariableDefinitions, globalDependencies):
"""
Args:
binaryForm - a bytes object containing the actual compiled code for the module
globalVariableDefinitions - a map from name to GlobalVariableDefinition
binaryForm: a bytes object containing the actual compiled code for the module
serializedGlobalVariableDefinitions: a map from name to GlobalVariableDefinition
globalDependencies: a dict from function linkname to the list of global variables it depends on
"""
self.binaryForm = binaryForm
self.functionTypes = functionTypes
self.globalVariableDefinitions = globalVariableDefinitions
self.serializedGlobalVariableDefinitions = serializedGlobalVariableDefinitions
self.globalDependencies = globalDependencies
self.hash = sha_hash(binaryForm)

@property
def definedSymbols(self):
return self.functionTypes.keys()

@staticmethod
def fromDisk(path, globalVariableDefinitions, functionNameToType):
def fromDisk(path, serializedGlobalVariableDefinitions, functionNameToType, globalDependencies):
with open(path, "rb") as f:
binaryForm = f.read()

return BinarySharedObject(binaryForm, functionNameToType, globalVariableDefinitions)
return BinarySharedObject(binaryForm, functionNameToType, serializedGlobalVariableDefinitions, globalDependencies)

@staticmethod
def fromModule(module, globalVariableDefinitions, functionNameToType):
def fromModule(module, serializedGlobalVariableDefinitions, functionNameToType, globalDependencies):
target_triple = llvm.get_process_triple()
target = llvm.Target.from_triple(target_triple)
target_machine_shared_object = target.create_target_machine(reloc='pic', codemodel='default')
Expand All @@ -80,7 +82,7 @@ def fromModule(module, globalVariableDefinitions, functionNameToType):
)

with open(os.path.join(tf, "module.so"), "rb") as so_file:
return BinarySharedObject(so_file.read(), functionNameToType, globalVariableDefinitions)
return BinarySharedObject(so_file.read(), functionNameToType, serializedGlobalVariableDefinitions, globalDependencies)

def load(self, storageDir):
"""Instantiate this .so in temporary storage and return a dict from symbol -> integer function pointer"""
Expand Down Expand Up @@ -127,8 +129,7 @@ def loadFromPath(self, modulePath):
self,
modulePath,
functionPointers,
self.globalVariableDefinitions
self.serializedGlobalVariableDefinitions
)
loadedModule.linkGlobalVariables()

return loadedModule
Loading