import rdflib as R
import traceback
import logging as L
import hashlib
import random
from .mapper import *
from .dataUser import DataUser
from .simpleProperty import *
from .rdfUtils import *
from .graphObject import *
"""
.. autoclass:: DataObject
"""
# in general it should be possible to recover the entire object from its identifier: the object should be representable as a connected graph.
# However, this need not be a connected *RDF* graph. Indeed, graph literals may hold information which can yield triples which are not
# connected by an actual node
def _bnode_to_var(x):
return "?" + x
def get_hash_function(method_name):
if method_name == "sha224":
return hashlib.sha224
elif method_name == "md5":
return hashlib.md5
elif method_name in hashlib.algorithms_available:
return (lambda data: hashlib.new(method_name, data))
[docs]class DataObject(GraphObject, DataUser, metaclass=MappedClass):
"""
An object backed by the database
Attributes
-----------
rdf_type : rdflib.term.URIRef
The RDF type URI for objects of this type
rdf_namespace : rdflib.namespace.Namespace
The rdflib namespace (prefix for URIs) for objects from this class
properties : list of Property
Properties belonging to this object
owner_properties : list of Property
Properties belonging to parents of this object
"""
_openSet = set()
_closedSet = set()
configuration_variables = {
"rdf.namespace" : {
"description" : "Namespaces for DataObject sub-classes will be based off of this. For example, a subclass named A would have a namespace '[rdf.namespace]A/'",
"type" : R.Namespace,
"directly_configureable" : True
},
"dataObject.identifier_hash" : {
"description" : "The hash method used for object identifiers. Defaults to md5.",
"type" : "sha224, md5, or one of the types accepted by hashlib.new()",
"directly_configureable" : True
},
}
@classmethod
[docs] def openSet(self):
""" The open set contains items that must be saved directly in order for their data to be written out """
return self._openSet
[docs] def __init__(self,ident=False,var=False,key=False,generate_key=False,**kwargs):
"""A subclass of DataObject cannot have any positional arguments.
Parameters
----------
ident : rdflib.term.URIRef or str
The identifier for this DataObject
var : str
In lieu of `ident`, sets the variable for this object
key : str or object
In lieu of `ident` or `var`, sets the identifier for this DataObject using the key value.
For a namespace `ex:` and key `a`, the identifier would be `ex:a`.
generate_key : bool
If true generates a random key value
kwargs : dict
Values to set for named properties
"""
try:
super().__init__()
except BadConf as e:
raise Exception("You may need to connect to a database before continuing.")
self._id = False
if ident:
if isinstance(ident, R.URIRef):
self._id = ident
else:
self._id = R.URIRef(ident)
elif var:
self._id_variable = R.Variable(var)
elif key: # TODO: Support a key function that generates the key based on live values of the object (e.g., property values)
self.setKey(key)
elif generate_key:
self.setKey(random.random())
else:
# Randomly generate an identifier if the derived class can't
# come up with one from the start. Ensures we always have something
# that functions as an identifier
v = (random.random(), random.random())
cname = self.__class__.__name__
self._id_variable = self._graph_variable(cname + "_" + hashlib.md5(str(v).encode()).hexdigest())
for x in self.__class__.dataObjectProperties:
self.attachProperty(x)
for x in self.properties:
if x.linkName in kwargs:
self.relate(x.linkName, kwargs[x.linkName])
if isinstance(self, PropertyDataObject):
self.relate('rdf_type_property', RDFProperty.getInstance(), RDFTypeProperty)
elif isinstance(self, DataObjectType):
self.relate('rdf_type_property', RDFSClass.getInstance(), RDFTypeProperty)
elif isinstance(self, RDFProperty):
self.relate('rdf_type_property', RDFSClass.getInstance(), RDFTypeProperty)
elif isinstance(self, RDFSClass):
self.relate('rdf_type_property', self, RDFTypeProperty)
else:
self.relate('rdf_type_property', self.rdf_type_object, RDFTypeProperty)
@classmethod
def identifier_hash_method(self, o):
return get_hash_function(self.conf.get('dataObject.identifier_hash', 'md5'))(o)
def make_identifier_from_properties(self, *properties):
if len(properties) == 0:
raise Exception("No properties provided to make identifier")
data = ""
for propName in properties:
for value in getattr(self, propName).values:
data += value.idl.n3()
if len(data) == 0:
raise Exception("No properties to make identifier")
return self.make_identifier(data)
@property
[docs] def defined(self):
"""Returns `True` if this object has an identifier
To define a custom identifier, override :meth:`defined_augment` to return
True when your custom identifier would be defined. You must also override
:meth:`identifier_augment`
"""
if self._id != False:
return True
else:
return self.defined_augment()
[docs] def defined_augment(self):
""" This fuction must return False if :meth:`identifier_augment` would raise an
:exc:`IdentifierMissingException`. Override it when defining a non-standard
identifier for subclasses of DataObjects.
"""
return False
[docs] def variable(self):
""" Returns the variable to be usedin queries with this DataObject
Raises
------
IdentifierMissingException
"""
if self._id_variable is not None:
return self._id_variable
else:
raise IdentifierMissingException(self)
def setKey(self, key):
if isinstance(key, str):
self._id = self.make_identifier_direct(key)
else:
self._id = self.make_identifier(key)
def relate(self, linkName, other, prop=False):
cls = type(self)
existing_property_names = [x.linkName for x in self.properties]
if linkName in existing_property_names:
p = getattr(self, linkName)
else:
if not prop:
property_type = None
if isinstance(other, DataObject):
property_type = ObjectProperty
else:
property_type = DatatypeProperty
link = type(self).rdf_namespace[linkName]
prop = MappedPropertyClass(linkName, (property_type,), dict(link=link, linkName=linkName, multiple=True))
p = self.attachProperty(prop)
return p.set(other)
def attachProperty(self, prop):
p = prop(owner=self)
if hasattr(self, prop.linkName):
raise Exception("Cannot attach property '{}'. A property must have a different name from any attributes in DataObject".format(prop.linkName))
self.properties.append(p)
setattr(self, p.linkName, p)
return p
def get_defined_component(self):
g = SV()(self)
g.namespace_manager = self.namespace_manager
return g
def __eq__(self,other):
return (isinstance(other,DataObject) and (self.idl == other.idl)) or (isinstance(other, R.URIRef) and self.idl == other)
def __hash__(self):
return hash(self.idl)
def __str__(self):
return self.namespace_manager.normalizeUri(self.idl)
def __repr__(self):
return self.__str__()
def _graph_variable(self,var_name):
return R.Variable(var_name)
@classmethod
def addToOpenSet(cls,o):
cls._openSet.add(o)
@classmethod
def removeFromOpenSet(cls,o):
if o not in cls._closedSet:
cls._openSet.remove(o)
cls._closedSet.add(o)
@classmethod
def extract_unique_part(cls, uri):
if uri.startswith(cls.rdf_namespace):
return uri[:len(cls.rdf_namespace)]
else:
raise Exception("This URI ({}) doesn't start with the appropriate namespace ({})".format(uri, cls.rdf_namespace))
@classmethod
def make_identifier(cls, data):
return R.URIRef(cls.rdf_namespace["a"+cls.identifier_hash_method(str(data).encode()).hexdigest()])
@classmethod
def make_identifier_direct(cls, string):
if not isinstance(string, str):
raise Exception("make_identifier_direct only accepts strings")
from urllib.parse import quote
return R.URIRef(cls.rdf_namespace[quote(string)])
[docs] def identifier(self):
""" The identifier for this object in the rdf graph.
This identifier may be randomly generated, but an identifier returned from the
graph can be used to retrieve the specific object that it refers to.
If it is desireable to customize the identifier, a subclass of DataObject should
override :meth:`identifier_augment` rather than this method.
Returns
-------
:class:`rdflib.term.URIRef`
"""
if self._id != False:
return self._id
else:
return self.identifier_augment()
[docs] def identifier_augment(self):
""" Override this method to define an identifier in lieu of one explicity set.
One must also override :meth:`defined_augment` to return True whenever this
method could return a valid identifier. :exc:`IdentifierMissingException` should be
raised if an identifier cannot be generated by this method.
Raises
------
IdentifierMissingException
"""
raise IdentifierMissingException(self)
[docs] def triples(self, query=False, visited_list=False):
""" Returns 3-tuples of the connected component of the object graph
starting from this object.
Returns
--------
An iterable of triples
"""
return self.get_defined_component()
[docs] def graph_pattern(self, query=False, shorten=False):
""" Get the graph pattern for this object.
It should be as simple as converting the result of triples() into a BGP
Parameters
----------
query : bool
Indicates whether or not the graph_pattern is to be used for querying
(as in a SPARQL query) or for storage
shorten : bool
Indicates whether to shorten the URLs with the namespace manager
attached to the ``self``
"""
nm = None
if shorten:
nm = self.namespace_manager
return triples_to_bgp(self.get_defined_component(), namespace_manager=nm)
def load(self):
for ident in GraphObjectQuerier(self, self.rdf)():
types = set()
for rdf_type in self.rdf.objects(ident, R.RDF['type']):
types.add(rdf_type)
the_type = get_most_specific_rdf_type(types)
yield oid(ident, the_type)
[docs] def save(self):
""" Write in-memory data to the database. Derived classes should call this to update
the store.
Dual to retract.
"""
self.add_statements(self.get_defined_component())
[docs] def retract(self):
""" Remove this object from the data store.
Retract removes an object and everything it points to, transitively, and everything
which points to it.
Dual to save.
"""
self.retract_statements(self.get_defined_component())
[docs] def save_object(self):
""" Write in-memory data to the database. Derived classes should call this to update
the store.
Dual to retract_object.
"""
self.add_statements(DescendantTripler(self)())
[docs] def retract_object(self):
""" Remove this object from the data store.
Retract removes an object and everything it points to, transitively, and everything
which points to it.
Dual to save_object.
"""
self.retract_statements(HeroTripler(self)())
[docs] def retract_objectG(self):
""" Remove this object from the data store.
Retract removes an object and everything it points to, transitively, and everything
which points to it.
Dual to save_objectG.
"""
g = HeroTripler(self, self.rdf)()
self.retract_statements(g)
def __getitem__(self, x):
try:
return DataUser.__getitem__(self, x)
except KeyError:
raise Exception("You attempted to get the value `%s' from `%s'. It isn't here. Perhaps you misspelled the name of a Property?" % (x, self))
[docs] def getOwners(self, property_name):
""" Return the owners along a property pointing to this object """
res = []
for x in self.owner_properties:
if isinstance(x, SimpleProperty):
if str(x.linkName) == str(property_name):
res.append(x.owner)
return res
class DataObjectType(DataObject):
pass
class DataObjectSingleton(DataObject):
instance = None
def __init__(self, *args, **kwargs):
if type(self)._gettingInstance:
DataObject.__init__(self, *args, **kwargs)
else:
raise Exception("You must call getInstance to get "+type(self).__name__)
@classmethod
def getInstance(cls):
if cls.instance is None:
cls._gettingInstance = True
cls.instance = cls()
cls._gettingInstance = False
return cls.instance
[docs]class RDFSClass(DataObjectSingleton): # This maybe becomes a DataObject later
""" The DataObject corresponding to rdfs:Class """
# XXX: This class may be changed from a singleton later to facilitate dumping
# and reloading the object graph
rdf_type = R.RDFS['Class']
def __init__(self):
super().__init__(R.RDFS["Class"])
[docs]class RDFProperty(DataObjectSingleton):
""" The DataObject corresponding to rdf:Property """
rdf_type = R.RDF['Property']
def __init__(self):
super().__init__(R.RDF["Property"])
class RDFTypeProperty(ObjectProperty):
link = R.RDF['type']
linkName = "rdf_type_property"
owner_type = DataObject
value_type = RDFSClass
multiple = True
class RDFSSubClassOfProperty(ObjectProperty):
link = R.RDFS['subClassOf']
linkName = "rdfs_subClassOf"
owner_type = RDFSClass
value_type = RDFSClass
multiple = True
[docs]class PropertyDataObject(DataObjectType):
""" A PropertyDataObject represents the property-as-object.
Try not to confuse this with the Property class
"""
class RDFSDomainProperty(ObjectProperty):
link = R.RDFS['domain']
linkName = "rdfs_domain"
owner_type = RDFProperty
value_type = RDFSClass
multiple = True
class RDFSRangeProperty(ObjectProperty):
link = R.RDFS['range']
linkName = "rdfs_range"
owner_type = RDFProperty
value_type = RDFSClass
multiple = True
[docs]class ObjectCollection(DataObject):
"""
A convenience class for working with a collection of objects
Example::
v = ObjectCollection('unc-13 neurons and muscles')
n = P.Neuron()
m = P.Muscle()
n.receptor('UNC-13')
m.receptor('UNC-13')
for x in n.load():
v.value(x)
for x in m.load():
v.value(x)
# Save the group for later use
v.save()
...
# get the list back
u = ObjectCollection('unc-13 neurons and muscles')
nm = list(u.value())
Parameters
----------
group_name : string
A name of the group of objects
Attributes
----------
name : DatatypeProperty
The name of the group of objects
group_name : DataObject
an alias for ``name``
member : ObjectProperty
An object in the group
add : ObjectProperty
an alias for ``value``
"""
_ = ['member']
datatypeProperties = [{'name':'name', 'multiple':False}]
def __init__(self,group_name=False,**kwargs):
DataObject.__init__(self,key=group_name,**kwargs)
self.add = self.member
self.group_name = self.name
self.name(group_name)
def identifier(self, query=False):
return self.make_identifier(self.group_name)