import numpy as np
from datareactor.atoms.base import Atom
from datareactor.dataset import DerivedColumn
[docs]class AddNumericalAtom(Atom):
"""Add a random set of columns.
The `AddNumericalAtom` generates a derived column which contains the sum
of a random set of numerical columns in the same table.
"""
[docs] def derive(self, dataset, table_name):
"""Add a column containing random values.
"""
df = dataset.tables[table_name].select_dtypes("number")
if len(df.columns) > 1:
cols = np.random.choice(df.columns, size=5)
df = df[list(set(cols))].copy().fillna(0.0)
new_col = df.sum(axis=1)
derived_column = DerivedColumn()
derived_column.table_name = table_name
derived_column.values = new_col.values
derived_column.field = {
"name": "add_numerical",
"data_type": "numerical"
}
derived_column.constraint = {
"constraint_type": "lineage",
"related_fields": [
{"table": table_name, "field": col_name} for col_name in df.columns
],
"fields_under_consideration": [
{"table": table_name, "field": "add_numerical"}
]
}
yield derived_column