Source code for datareactor.atoms.addnumerical

import numpy as np

from datareactor.atoms.base import Atom
from datareactor.dataset import DerivedColumn


[docs]class AddNumericalAtom(Atom): """Add a random set of columns. The `AddNumericalAtom` generates a derived column which contains the sum of a random set of numerical columns in the same table. """
[docs] def derive(self, dataset, table_name): """Add a column containing random values. """ df = dataset.tables[table_name].select_dtypes("number") if len(df.columns) > 1: cols = np.random.choice(df.columns, size=5) df = df[list(set(cols))].copy().fillna(0.0) new_col = df.sum(axis=1) derived_column = DerivedColumn() derived_column.table_name = table_name derived_column.values = new_col.values derived_column.field = { "name": "add_numerical", "data_type": "numerical" } derived_column.constraint = { "constraint_type": "lineage", "related_fields": [ {"table": table_name, "field": col_name} for col_name in df.columns ], "fields_under_consideration": [ {"table": table_name, "field": "add_numerical"} ] } yield derived_column