Source code for datareactor.atoms.base
import logging
from datareactor.dataset import Dataset, DerivedColumn
logger = logging.getLogger(__name__)
[docs]class Atom():
"""Generate derived columns for a dataset.
Each `Atom` is responsible for generating one or more derived columns for
the target table.
"""
[docs] def transform(self, dataset):
"""Generate derived columns for the dataset.
The `transform` function takes in a dataset and returns a sequence of
derived columns.
Args:
dataset (Dataset): The dataset.
Returns:
(:obj:`list` of :obj:`DerivedColumn`): The derived columns.
"""
derived_columns = []
assert isinstance(dataset, Dataset)
for table_name in dataset.metadata.get_table_names():
logger.info("Generating columns in table %s using %s." % (
table_name,
self.__class__.__name__
))
for derived_column in self.derive(dataset, table_name):
assert isinstance(derived_column, DerivedColumn)
derived_columns.append(derived_column)
return derived_columns
[docs] def derive(self, dataset, table_name):
"""Generate derived columns for the specified table.
The `derive` function takes in a dataset and the name of the target
column. It returns a list of derived columns which can be concatenated
to the target table.
Args:
dataset (Dataset): The dataset.
table_name (str): The name of the target table.
Returns:
(:obj:`list` of :obj:`DerivedColumn`): The derived columns.
"""
raise NotImplementedError()