Source code for xplainable.preprocessing.transformers.dataset

""" Copyright Xplainable Pty Ltd, 2023"""

from .base import XBaseTransformer

import pandas.api.types as pdtypes
import pandas as pd
import numpy as np
import pandas as pd
import scipy.signal as ss


[docs]class DropCols(XBaseTransformer):
    """ Drops specified columns from a dataset.

    Args:
        columns (str): The columns to be dropped.
    """
    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(self, columns=None):
        super().__init__()
        self.columns = columns

    def __call__(self, df, *args, **kwargs):
        from ipywidgets import interactive
        from ...utils import xwidgets
        
        def _set_params(columns=xwidgets.SelectMultiple(options=df.columns)):

            self.columns = list(columns)

        return interactive(_set_params)

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Drops specified columns from a dataset.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """
        
        df = df.copy()

        # Apply column dropping
        for c in self.columns:
            if c in df.columns:
                df = df.drop(columns=[c])

        return df

[docs]class DropNaNs(XBaseTransformer):
    """ Drops nan rows from a dataset.

    Args:
        subset (list, optional): A subset of columns to apply the transfomer.
    """
    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(self, subset=None):
        super().__init__()
        self.subset = subset

    def __call__(self, df, *args, **kwargs):
        from ipywidgets import interactive
        from ...utils import xwidgets
        
        def _set_params(
            subset=xwidgets.SelectMultiple(options=[None]+list(df.columns))):

            self.subset = list(subset)

        return interactive(_set_params)

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Drops nan rows from a dataset.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """

        subset = list(df.columns) if self.subset is None else self.subset
        subset = [i for i in subset if i in df.columns]

        return df.copy().dropna(subset=subset)

[docs]class Operation(XBaseTransformer):
    """Applies operation to multiple columns (in order) into new feature.

    Args:
        columns (list): Column names to add.
        alias (str): Name of newly created column.
        drop (bool): Drops original columns if True
    """
    
    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(self, columns=[], operation=None, alias: str = None, drop: bool = False):
        super().__init__()
        self.columns = columns
        self.operation = operation
        self.alias = alias if alias else " + ".join([c for c in columns])
        self.drop = drop

    def __call__(self, dataset, *args, **kwargs):
        from ipywidgets import interactive
        import ipywidgets as widgets
        from ...utils import xwidgets
        
        cols = list(dataset.columns)
        
        def _set_params(
            columns_to_apply=xwidgets.SelectMultiple(
                description='Columns: ',
                value=[cols[0]],
                options=cols,
                allow_duplicates=False),
            tags = xwidgets.TagsInput(
                value=[cols[0]],
                allowed_tags=cols
            ),
            operation=['add', 'multiply', 'average', 'concatenate'],
            alias=xwidgets.Text(''),
            drop_columns=xwidgets.Checkbox(value=True)):

            self.columns = list(columns_to_apply)
            self.operation = operation
            self.alias = alias
            self.drop = drop_columns
        
        widget = interactive(_set_params)
        dd = widget.children[0]
        tags = widget.children[1]
        
        widget.children[3].layout = widgets.Layout(margin='10px 0 20px 0')
        widgets.link((dd, 'value'), (tags, 'value'))
        label = widgets.HTML('Drag to reorder')
        label.layout = widgets.layout = widgets.Layout(margin='10px 0 0 0')

        tags_display = widgets.VBox([dd, label, tags])
        tags_display.layout = widgets.Layout(
            margin='0 0 0 30px',
            width='280px'
            )
        widget.children = (tags_display,) + widget.children[2:]
        
        return widget

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Applies operation to multiple columns (in order) into new feature.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """

        df = df.copy()

        if self.operation == 'add':
            if not all([pdtypes.is_numeric_dtype(df[col]) for col in self.columns]):
                raise TypeError("Cannot add string and numeric columns")
            df[self.alias] = df[self.columns].sum(axis=1)

        elif self.operation == 'multiply':
            if not all([pdtypes.is_numeric_dtype(df[col]) for col in self.columns]):
                raise TypeError("Cannot add string and numeric columns")
            df[self.alias] = df[self.columns].apply(np.prod, axis=1)

        elif self.operation == 'average':
            if not all([pdtypes.is_numeric_dtype(df[col]) for col in self.columns]):
                raise TypeError("Cannot add string and numeric columns")
            df[self.alias] = df[self.columns].mean(axis=1)

        elif self.operation == 'concatenate':
            for col in self.columns:
                if not pdtypes.is_string_dtype(df[col]):
                    df[col] = df[col].astype(str)
            df[self.alias] = df[self.columns].agg('-'.join, axis=1)

        if self.drop:
            df = df.drop(columns=self.columns)

        return df


[docs]class TextTrimMulti(XBaseTransformer):
    """ Drops or keeps first/last n characters of a categorical column.

    Args:
        selector (str): [first, last].
        n (int): Number of characters to identify.
        action (str): [keep, drop] the identified characters.
    """

    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(
        self, column='', selector=None, n=0,
        action='keep', drop_col=False, alias=''):
        
        self.column = column
        self.selector = selector
        self.n = n
        self.action = action
        self.drop_col = drop_col
        self.alias = alias

    def __call__(self, dataset, *args, **kwargs):
        from ipywidgets import interactive
        import ipywidgets as widgets
        from ...utils import xwidgets

        cols = list(dataset.columns)
        
        def _set_params(
            column=xwidgets.Dropdown(
                description='Column: ',
                value=cols[0],
                options=cols),
            selector = widgets.Dropdown(options=["first", "last"]),
            n = widgets.IntText(n=1),
            action = widgets.Dropdown(options=['keep', 'drop']),
            alias = widgets.Text(''),
            drop_col = widgets.Checkbox(description="drop original")
        ):
            self.selector = selector
            self.n = n
            self.action = action
            self.alias = alias
            self.drop_col = drop_col
            self.column = column

        return interactive(_set_params)

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Drops or keeps first/last n characters of a categorical column.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """
        df = df.copy()
        ser = df[self.column]

        if self.action == 'keep':
            if self.selector == "first":
                df[self.alias] = ser.str[:self.n]

            else:
                df[self.alias] = ser.str[-self.n:]
        else:
            if self.selector == "first":
                df[self.alias] = ser.str[self.n:]

            else:
                df[self.alias] = ser.str[:-self.n]

        if self.drop_col:
            df = df.drop(columns=[self.column])

        return df


[docs]class ChangeNames(XBaseTransformer):
    """ Changes names of columns in a dataset
    
    Args:
        col_names (dict): Dictionary of old and new column names.
    """

    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(self, col_names={}):
        super().__init__()
        self.col_names = col_names

    def __call__(self, df, *args, **kwargs):
        from ipywidgets import interactive
        import ipywidgets as widgets
        from ...utils import xwidgets
      
        def _set_params(*args, **kwargs):
            args = locals()
            self.col_names = dict(args)['kwargs']
        
        col_names = {col: xwidgets.Text(col) for col in df.columns}
        
        w = interactive(_set_params, **col_names)
        
        for c in w.children[:-1]:
            c.description = ''
        
        return w

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Changes names of columns in a dataset

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """
        df = df.copy()
        col_names = {i: v for i, v in self.col_names.items() if i in df.columns}
        return df.rename(columns=col_names)


[docs]class OrderBy(XBaseTransformer):
    """ Orders the dataset by the values of a given series.

    Args:
        order_by (str): The series to order by.
        ascending (bool): Orders in ascending order if True.
    """

    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(self, order_by=None, ascending=True):
        super().__init__()
        self.order_by = order_by
        self.ascending = ascending

    def __call__(self, df, *args, **kwargs):
        from ipywidgets import interactive
        from ...utils import xwidgets
      
        def _set_params(
            order_by = xwidgets.SelectMultiple(
                description='Order by: ', options=[None]+list(df.columns)),
            direction = xwidgets.ToggleButtons(
                description='Direction: ',
                options=['ascending', 'descending'])):

            self.order_by = list(order_by)
            self.ascending = True if direction == 'ascending' else False

        return interactive(_set_params)

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Orders the dataset by the values of a given series.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """

        return df.sort_values(self.order_by, ascending=self.ascending)


[docs]class GroupbyShift(XBaseTransformer):
    """ Shifts a series up or down n steps within specified group.

    Args:
        target (str): The target feature to shift.
        step (int): The number of steps to shift.
        as_new (bool): Creates new column if True.
        group_by (str): The column to group by.
        order_by (str): The column to order by.
        descending (bool): Orders the value descending if True.
    """

    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(self, columns=None, step=0, as_new=True, col_names=[], \
        group_by=None, order_by=None, descending=None):

        super().__init__()
        self.columns = columns
        self.step = step
        self.as_new = as_new
        self.col_names = col_names

        self.group_by = group_by
        self.order_by = order_by
        self.descending = descending

    def __call__(self, df, *args, **kwargs):
        from ipywidgets import interactive
        from ...utils import xwidgets
        
        def _set_params(
            group_by = xwidgets.SelectMultiple(
                description='Group by: ', options=[None]+list(df.columns)),
            order_by = xwidgets.SelectMultiple(
                description='Order by: ', options=[None]+list(df.columns)),
            descending = xwidgets.Checkbox(value=False),
            columns=xwidgets.SelectMultiple(
                description='Columns: ', options=[None]+list(df.columns)),
            step = xwidgets.IntText(value=0, min=-1000, max=1000),
            as_new = xwidgets.Checkbox(value=False)
            ):

            self.columns = columns
            self.group_by = list(group_by)
            self.order_by = list(order_by)
            self.descending = descending
            self.step = step

            self.as_new = as_new

            # build new col names if as_new
            if as_new and len(columns) > 0:
                for target in columns:
                    col_name = f'{target}_shift_{step}'
                    if len(self.group_by) > 0:
                        col_name += "_gb_"
                        col_name += '_'.join(self.group_by)

                    if len(self.order_by) > 0:
                        col_name += "_ob_"
                        col_name += '_'.join(self.order_by)
                
                    self.col_names.append(col_name)

        return interactive(_set_params)

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Shifts a series up or down n steps within specified group.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """

        # Order values if
        if self.order_by and self.order_by[0] is not None:
            df = df.sort_values(self.order_by, ascending=not self.descending)

        for col_name, target in zip(self.col_names, self.columns):
            if self.group_by and self.group_by[0] is not None:
                df[col_name] = df.groupby(
                    self.group_by)[target].shift(self.step)
            else:
                df[col_name] = df[target].shift(self.step)

        return df


[docs]class FillMissing(XBaseTransformer):
    """Fills missing values of all columns with a specified value/strategy."""

    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(self, fill_with={}, fill_values={}):
        super().__init__()
        self.fill_with = fill_with
        self.fill_values = fill_values

    def __call__(self, df, *args, **kwargs):
        from ipywidgets import interactive
        from ...utils import xwidgets
      
        def _set_params(*args, **kwargs):
            args = locals()
            self.fill_with = dict(args)['kwargs']

        def get_widget(col):
            if pdtypes.is_numeric_dtype(df[col]):
                return xwidgets.Dropdown(options=["mean", "median", "mode"])
            else:
                return xwidgets.Text(value='missing')

        col_xwidgets = {col: get_widget(col) for col in df.columns}  

        return interactive(_set_params, **col_xwidgets)

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """  Fills missing values of all columns with a specified value/strategy.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """
        
        for i, v in self.fill_values.items():
            if i not in df.columns:
                continue
            df[i] = df[i].fillna(v)

        return df

[docs]    def fit(self, df: pd.DataFrame) -> 'FillMissing':
        """ Calculates the fill_value for all columns in the dataset.

        The fill values are based on a specified strategy for each column.

        Args:
            df (pd.DataFrame): The dataset to fit

        Returns:
            FillMissing: The fitted transformer.
        """

        for i, v in self.fill_with.items():
            # Calculate fill_value if mean, median or mode
            if v == 'mean':
                self.fill_values[i] = round(np.nanmean(df[i]), 4)

            elif v == 'median':
                self.fill_values[i] = np.nanmedian(df[i])

            elif v == 'mode':
                self.fill_values[i] = df[i].mode()

            else:
                self.fill_values[i] = v

            # Maintain fill value type (if int)
            if pdtypes.is_integer_dtype(df[i]):
                self.fill_values[i] = int(self.fill_values[i])

        return self


[docs]class SetDTypes(XBaseTransformer):
    """Sets the data type of all columns in the dataset.
    
    Args:
        types (dict): Dictionary of column names and data types.
    """

    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(self, types={}):
        super().__init__()
        self.types = types

    def __call__(self, df, *args, **kwargs):
        from ipywidgets import interactive
        from ...utils import xwidgets
      
        def _set_params(*args, **kwargs):
            args = locals()
            self.types = dict(args)['kwargs']

        def get_widget(col):
            if pdtypes.is_float_dtype(df[col]):
                options=["float", "integer", "string"]
                value = 'float'

            elif pdtypes.is_integer_dtype(df[col]):
                options=["float", "integer", "string"]
                value = 'integer'

            elif pdtypes.is_string_dtype(df[col]):
                #options=["string"]
                options=["float", "integer", "string"]
                # if all(df[col].str.isdigit()):
                #     options += ["float", "integer"]
                value = 'string'
            
            elif pdtypes.is_datetime64_dtype(df[col]):
                options = ["date", "string"]
                value = "date"

            elif pdtypes.is_bool_dtype(df[col]):
                options = ["boolean", "string", "integer", "float"]
                value = "boolean"

            w = xwidgets.Dropdown(
                options=options,
                value=value,
                style={'description_width': 'initial'}
                )

            return w

        col_xwidgets = {col: get_widget(col) for col in df.columns}  

        return interactive(_set_params, **col_xwidgets)

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Sets the data type of all columns in the dataset.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """

        for i, v in self.types.items():
            
            if i not in df.columns:
                continue

            if v == 'string':
                df[i] = df[i].astype(str)
                continue
            
            df[i] = pd.to_numeric(df[i], errors='coerce')

            if v == 'integer':
                # If missing value are present, cannot cast to int
                try:
                    df[i] = df[i].astype(int)
                except Exception:
                    continue

        return df


[docs]class TextSplit(XBaseTransformer):
    """ Splits a string column into multiple columns on a specified separator.

    Args:
        target (str): The columns to split.
        separator (str): The separator to split on.
        max_splits (int): The maximum number of splits to make.
    """

    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(self, target=None, separator=None, max_splits=0):

        self.target = target
        self.separator = separator
        self.max_splits = max_splits

    def __call__(self, df, *args, **kwargs):
        from ipywidgets import interactive
        from ...utils import xwidgets
        
        def _set_params(
            target=xwidgets.Dropdown(
                options=[None]+[i for i in df.columns if \
                    pdtypes.is_string_dtype(df[i])]),
                separator = xwidgets.Text(value=""),
                max_splits = xwidgets.IntText(range=[0,10])):

            self.target = target
            self.separator = separator
            self.max_splits = max([max_splits, 0])

        return interactive(_set_params)

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Splits a string column into multiple columns on a specified separator.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """

        new_cols = df[self.target].astype(str).str.split(
            self.separator, expand=True, n=self.max_splits)
        new_cols.columns = [f'{self.target}_{i}' for i in new_cols]
        df[new_cols.columns] = new_cols
        df = df.drop(columns=[self.target])

        return df


[docs]class ChangeCases(XBaseTransformer):
    """ Changes the case of all specified categorical columns.

    Args:
        columns (list): To apply the case change to.
        case (str): 'upper' or 'lower'.
    """

    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(self, columns=[], case='lower'):
        super().__init__()
        self.columns = columns
        self.case = case

    def __call__(self, df, *args, **kwargs):
        from ipywidgets import interactive
        from ...utils import xwidgets
        
        def _set_params(
            columns = xwidgets.SelectMultiple(
                description='Columns: ',
                options=[None]+[i for i in df.columns if \
                    pdtypes.is_string_dtype(df[i])]),

            case = xwidgets.Dropdown(
                description='Case: ',
                options=["lower", "upper"])):
            
            self.columns = list(columns)
            self.case = case
        
        return interactive(_set_params)

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Changes the case of all specified categorical columns.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """

        for col in self.columns:
            if col not in df.columns:
                continue

            if self.case == 'lower':
                df[col] = df[col].str.lower()

            elif self.case == 'upper':
                df[col] = df[col].str.upper()

            else:
                raise ValueError("case change must be either 'lower' or 'upper'")

        return df


[docs]class GroupedSignalSmoothing(XBaseTransformer):
    """ Smooths signal data within specified group.

    Args:
        target (str): The target feature to shift.
        as_new (bool): Creates new column if True.
        group_by (str): The column to group by.
        order_by (str): The column to order by.
        descending (bool): Orders the value descending if True.
    """

    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(self, target=None, group_by=None,\
        order_by=None, descending=None):

        super().__init__()
        self.target = target

        self.group_by = group_by
        self.order_by = order_by
        self.descending = descending

    def __call__(self, df, *args, **kwargs):
        from ipywidgets import interactive
        from ...utils import xwidgets
        
        def _set_params(
            group_by = xwidgets.SelectMultiple(
                description='Group by: ', options=[None]+list(df.columns)),
            order_by = xwidgets.SelectMultiple(
                description='Order by: ', options=[None]+list(df.columns)),
            descending = xwidgets.Checkbox(value=False),
            targets=xwidgets.SelectMultiple(options=[None]+list(df.columns))
            ):

            self.targets = targets
            self.group_by = list(group_by)
            self.order_by = list(order_by)
            self.descending = descending

        return interactive(_set_params)

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Smooths signal data within specified group.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """

        # Order values if
        if self.order_by and self.order_by[0] is not None:
            df = df.sort_values(self.order_by, ascending=not self.descending)

        if self.group_by and self.group_by[0] is not None:
            for t in self.targets:
                df[t] = df.groupby(self.group_by)[t].transform(
                    lambda x: ss.savgol_filter(x, window_length=19, \
                        polyorder=1, deriv=0, mode='interp'))
        else:
            df[t] = ss.savgol_filter(
                df[t], window_length=19, polyorder=1, deriv=0, mode='interp')

        return df


[docs]class DateTimeExtract(XBaseTransformer):
    """ Extracts Datetime values from datetime object.
    
    Args:
        target (str): The datetime column to extract from.
        year (bool): Extracts year if True.
        month (bool): Extracts month if True.
        day (bool): Extracts day if True.
        weekday (bool): Extracts weekday if True.
        day_name (bool): Extracts day name if True.
        hour (bool): Extracts hour if True.
        minute (bool): Extracts minute if True.
        second (bool): Extracts second if True.
        drop (bool): Drops original datetime column if True.
    """

    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(self, target=None, year=False, month=False, day=False, \
        weekday=False, day_name=False, hour=False, minute=False, \
            second=False, drop=False):

        self.target = target
        self.year = year
        self.month = month
        self.day = day
        self.weekday = weekday
        self.day_name = day_name
        self.hour = hour
        self.minute = minute
        self.second = second
        self.drop = drop


    def __call__(self, df, *args, **kwargs):
        from ipywidgets import interactive
        import ipywidgets as widgets
        
        def _set_params(
            target=widgets.Dropdown(options=df.columns),
            year=widgets.ToggleButton(value=False),
            month=widgets.ToggleButton(value=False),
            day=widgets.ToggleButton(value=False),
            weekday=widgets.ToggleButton(value=False),
            day_name=widgets.ToggleButton(value=False),
            hour=widgets.ToggleButton(value=False),
            minute=widgets.ToggleButton(value=False),
            second=widgets.ToggleButton(value=False),
            drop = widgets.Checkbox(value=False)
        ):

            self.target = target
            self.year = year
            self.month = month
            self.day = day
            self.weekday = weekday
            self.day_name = day_name
            self.hour = hour
            self.minute = minute
            self.second = second
            self.drop = drop

        w = interactive(_set_params)
        _target = w.children[0]
        left = widgets.VBox(w.children[1:5])
        right = widgets.VBox(w.children[5:9])
        buttons = widgets.HBox([left, right])
        _drop = w.children[9]
        elements = widgets.VBox([_target, buttons, _drop])
                
        return elements

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Extracts Datetime values from datetime object.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """

        df = df.copy()

        try:
            df[self.target] = pd.to_datetime(df[self.target])
        except:
            raise TypeError(f"{self.target} can not be coerced to datetime")

        if self.year:
            df[f'{self.target}_year'] = df[self.target].dt.year
        
        if self.month:
            df[f'{self.target}_month'] = df[self.target].dt.month

        if self.day:
            df[f'{self.target}_day'] = df[self.target].dt.day

        if self.weekday:
            df[f'{self.target}_weekday'] = df[self.target].dt.weekday

        if self.day_name:
            df[f'{self.target}_day_name'] = df[self.target].dt.day_name()

        if self.hour:
            df[f'{self.target}_hour'] = df[self.target].dt.hour

        if self.minute:
            df[f'{self.target}_minute'] = df[self.target].dt.minute

        if self.second:
            df[f'{self.target}_second'] = df[self.target].dt.second

        if self.drop:
            df = df.drop(columns=[self.target])

        return df


[docs]class RollingOperation(XBaseTransformer):
    """Applies operation to multiple columns (in order) into new feature.

    Args:
        columns (list): Column names to add.
        alias (str): Name of newly created column.
        drop (bool): Drops original columns if True
    """
    
    # Informs the embedded GUI which data types this transformer supports.
    supported_types = ['dataset']

    def __init__(
        self,
        groupby=None,
        orderby=None,
        direction=None,
        columns=[],
        window=None,
        operation=None,
        drop: bool = False
        ):

        super().__init__()
        self.groupby = groupby
        self.orderby = orderby
        self.direction = direction
        self.columns = columns
        self.window = window
        self.operation = operation
        self.drop = drop

    def __call__(self, df, *args, **kwargs):
        from ipywidgets import interactive
        import ipywidgets as widgets
        from ...utils import xwidgets
        
        cols = list(df.columns)
        
        def _set_params(
            group_by = xwidgets.SelectMultiple(
                description='Group by: ', options=[None]+list(df.columns)),
            order_by = xwidgets.SelectMultiple(
                description='Order by: ', options=[None]+list(df.columns)),
            direction = xwidgets.ToggleButtons(
                description='Direction: ',
                options=['ascending', 'descending']),
            columns_to_apply=xwidgets.SelectMultiple(
                description='Columns: ',
                value=[cols[0]],
                options=cols,
                allow_duplicates=False),
            window=xwidgets.IntText(min=2, value=3),
            operation=['mean', 'sum', 'max', 'min'],
            drop_columns=xwidgets.Checkbox(value=True)):

            self.groupby = list(group_by)
            self.orderby = list(order_by)
            self.direction = direction
            self.columns = list(columns_to_apply)
            self.operation = operation
            self.window = window
            self.drop = drop_columns
        
        widget = interactive(_set_params)

        widget.children[6].layout = widgets.Layout(margin='10px 0 20px 0')

        widget.layout = widgets.Layout(
            margin='0 0 0 30px',
            width='280px'
            )

        return widget

[docs]    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Applies operation to multiple columns (in order) into new feature.

        Args:
            df (pd.DataFrame): The dataset to transform.

        Returns:
            pd.DataFrame: The transformed dataset.
        """

        df = df.copy()

        assert all(
            [pdtypes.is_numeric_dtype(df[col]) for col in self.columns]
            ), "Selected columns must be numeric"

        asc = True if self.direction == 'ascending' else False

        for col in self.columns:
            alias = f'{col}_{self.operation}_{self.window}'

            if self.operation == 'mean':
                if self.groupby:
                    df[alias] = df.sort_values(
                        self.orderby, ascending=asc).groupby(self.groupby).rolling(
                            self.window).mean()[col].values
                else:
                    df[alias] = df[col].rolling(self.window).mean()

            elif self.operation == 'sum':
                if self.groupby:
                    df[alias] = df.sort_values(
                        self.orderby, ascending=asc).groupby(self.groupby).rolling(
                            self.window).sum()[col].values
                else:
                    df[alias] = df[col].rolling(self.window).sum()

            elif self.operation == 'max':
                if self.groupby:
                    df[alias] = df.sort_values(
                        self.orderby, ascending=asc).groupby(self.groupby).rolling(
                            self.window).max()[col].values
                else:
                    df[alias] = df[col].rolling(self.window).max()

            elif self.operation == 'min':
                if self.groupby:
                    df[alias] = df.sort_values(
                        self.orderby, ascending=asc).groupby(self.groupby).rolling(
                            self.window).min()[col].values
                else:
                    df[alias] = df[col].rolling(self.window).min()

            if self.drop:
                df = df.drop(columns=self.columns)

        return df