##############################
# Auto-generated. Do not Edit
##############################
import re
import pandas as pd
import json
from pylipd.classes.variable import Variable
from pylipd.utils import uniqid
from pylipd.classes.variable import Variable
[docs]
class DataTable:
"""Auto-generated LinkedEarth class representing `DataTable`."""
def __init__(self):
"""Initialize a new DataTable instance."""
self.fileName: str = None
self.missingValue: str = None
self.variables: list[Variable] = []
self.misc = {}
self.ontns = "http://linked.earth/ontology#"
self.ns = "http://linked.earth/lipd"
self.type = "http://linked.earth/ontology#DataTable"
self.id = self.ns + "/" + uniqid("DataTable.")
[docs]
@staticmethod
def from_data(id, data) -> 'DataTable':
"""Instantiate `DataTable` from an ontology-style data graph.
Parameters
----------
id : str
The node identifier for this object.
data : dict
Dictionary mapping node ids to their predicate lists.
Returns
-------
DataTable
The populated `DataTable` instance.
"""
self = DataTable()
self.id = id
mydata = data[id]
for key in mydata:
value = mydata[key]
obj = None
if key == "type":
for val in value:
self.type = val["@id"]
elif key == "hasFileName":
for val in value:
if "@value" in val:
obj = val["@value"]
self.fileName = obj
elif key == "hasMissingValue":
for val in value:
if "@value" in val:
obj = val["@value"]
self.missingValue = obj
elif key == "hasVariable":
for val in value:
if "@id" in val:
obj = Variable.from_data(val["@id"], data)
else:
obj = val["@value"]
self.variables.append(obj)
else:
for val in value:
obj = None
if "@id" in val:
obj = data[val["@id"]]
elif "@value" in val:
obj = val["@value"]
self.set_non_standard_property(key, obj)
return self
[docs]
def to_data(self, data={}):
"""Serialize the object into a JSON-LD compatible dictionary.
Parameters
----------
data : dict, optional
Existing data dictionary to extend.
Returns
-------
dict
The updated data dictionary.
"""
data[self.id] = {}
data[self.id]["type"] = [
{
"@id": self.type,
"@type": "uri"
}
]
if len(self.variables):
data[self.id]["hasVariable"] = []
for value_obj in self.variables:
if type(value_obj) is str:
obj = {
"@value": value_obj,
"@type": "literal",
"@datatype": "http://www.w3.org/2001/XMLSchema#string"
}
else:
obj = {
"@id": value_obj.id,
"@type": "uri"
}
data = value_obj.to_data(data)
data[self.id]["hasVariable"].append(obj)
if self.fileName:
value_obj = self.fileName
obj = {
"@value": value_obj,
"@type": "literal",
"@datatype": "http://www.w3.org/2001/XMLSchema#string"
}
data[self.id]["hasFileName"] = [obj]
if self.missingValue:
value_obj = self.missingValue
obj = {
"@value": value_obj,
"@type": "literal",
"@datatype": "http://www.w3.org/2001/XMLSchema#string"
}
data[self.id]["hasMissingValue"] = [obj]
for key in self.misc:
value = self.misc[key]
data[self.id][key] = []
ptype = None
tp = type(value).__name__
if tp == "int":
ptype = "http://www.w3.org/2001/XMLSchema#integer"
elif tp == "float" or tp == "double":
ptype = "http://www.w3.org/2001/XMLSchema#float"
elif tp == "str":
if re.match(r"\d{4}-\d{2}-\d{2}( |T)\d{2}:\d{2}:\d{2}", value):
ptype = "http://www.w3.org/2001/XMLSchema#datetime"
elif re.match(r"\d{4}-\d{2}-\d{2}", value):
ptype = "http://www.w3.org/2001/XMLSchema#date"
else:
ptype = "http://www.w3.org/2001/XMLSchema#string"
elif tp == "bool":
ptype = "http://www.w3.org/2001/XMLSchema#boolean"
data[self.id][key].append({
"@value": value,
"@type": "literal",
"@datatype": ptype
})
return data
[docs]
def to_json(self):
"""Return a lightweight JSON representation (used by LiPD).
Returns
-------
dict
A dictionary representation of this object.
"""
data = {
"@id": self.id
}
if len(self.variables):
data["columns"] = []
for value_obj in self.variables:
obj = value_obj.to_json()
data["columns"].append(obj)
if self.fileName:
value_obj = self.fileName
obj = value_obj
data["filename"] = obj
if self.missingValue:
value_obj = self.missingValue
obj = value_obj
data["missingValue"] = obj
for key in self.misc:
value = self.misc[key]
data[key] = value
return data
[docs]
@staticmethod
def from_json(data) -> 'DataTable':
"""Instantiate `DataTable` from its lightweight JSON representation.
Parameters
----------
data : dict
The JSON dictionary to parse.
Returns
-------
DataTable
The populated `DataTable` instance.
"""
self = DataTable()
for key in data:
pvalue = data[key]
if key == "@id":
self.id = pvalue
elif key == "columns":
for value in pvalue:
obj = Variable.from_json(value)
self.variables.append(obj)
elif key == "filename":
value = pvalue
obj = value
self.fileName = obj
elif key == "missingValue":
value = pvalue
obj = value
self.missingValue = obj
else:
self.set_non_standard_property(key, pvalue)
return self
[docs]
def set_non_standard_property(self, key, value):
"""Store a predicate that is not defined in the ontology schema.
This is useful for forward-compatibility with new properties that are
not yet part of the official schema.
Parameters
----------
key : str
The property name.
value : any
The property value.
"""
if key not in self.misc:
self.misc[key] = value
[docs]
def get_non_standard_property(self, key):
"""Return a single non-standard property by key.
Parameters
----------
key : str
The property name.
Returns
-------
any
The property value.
"""
return self.misc[key]
[docs]
def get_all_non_standard_properties(self):
"""Return the dictionary of all non-standard properties.
Returns
-------
dict
Dictionary of all non-standard properties.
"""
return self.misc
[docs]
def add_non_standard_property(self, key, value):
"""Append a value to a list-valued non-standard property.
Parameters
----------
key : str
The property name.
value : any
The value to append.
"""
if key not in self.misc:
self.misc[key] = []
self.misc[key].append(value)
[docs]
def getFileName(self) -> str:
"""Get fileName.
Returns
-------
str
The current value of fileName.
"""
return self.fileName
[docs]
def setFileName(self, fileName:str):
"""Set fileName.
Parameters
----------
fileName : str
The value to assign.
"""
assert isinstance(fileName, str), f"Error: '{fileName}' is not of type str"
self.fileName = fileName
[docs]
def getMissingValue(self) -> str:
"""Get missingValue.
Returns
-------
str
The current value of missingValue.
"""
return self.missingValue
[docs]
def setMissingValue(self, missingValue:str):
"""Set missingValue.
Parameters
----------
missingValue : str
The value to assign.
"""
assert isinstance(missingValue, str), f"Error: '{missingValue}' is not of type str"
self.missingValue = missingValue
[docs]
def getVariables(self) -> list[Variable]:
"""Get variables list.
Returns
-------
list[Variable]
A list of Variable objects.
"""
return self.variables
[docs]
def setVariables(self, variables:list[Variable]):
"""Set the variables list.
Parameters
----------
variables : list[Variable]
The list to assign.
"""
assert isinstance(variables, list), "Error: variables is not a list"
assert all(isinstance(x, Variable) for x in variables), f"Error: '{variables}' is not of type Variable"
self.variables = variables
[docs]
def addVariable(self, variables:Variable):
"""Add a value to the variables list.
Parameters
----------
variables : Variable
The value to append.
"""
assert isinstance(variables, Variable), f"Error: '{variables}' is not of type Variable"
self.variables.append(variables)
# Special Functions manually added for DataTable class
[docs]
def getDataFrame(self, use_standard_names=False) -> pd.DataFrame:
cols = []
for v in self.variables:
colname = v.getName()
if use_standard_names and v.getStandardVariable() is not None:
colname = v.getStandardVariable().getLabel()
cols.append(colname)
df = pd.DataFrame(columns=cols)
for v in self.variables:
colname = v.getName()
if use_standard_names and v.getStandardVariable() is not None:
colname = v.getStandardVariable().getLabel()
df[colname] = json.loads(v.getValues())
# Create metadata as a dictionary and add to dataframe attr
df.attrs = {}
for v in self.variables:
colname = v.getName()
if use_standard_names and v.getStandardVariable() is not None:
colname = v.getStandardVariable().getLabel()
df.attrs[colname] = v.to_json()
del df.attrs[colname]["hasValues"]
return df
[docs]
def setDataFrame(self, df: pd.DataFrame):
# Create new set of variable objects using the metadata
self.variables = []
for colname in df.attrs:
v = Variable.from_json(df.attrs[colname])
v.setValues(json.dumps(df[colname].to_list()))
self.addVariable(v)