Skip to content

init_predict_dataset function doesn't scale the temporal virable. #14

@Pluto-Nemo

Description

@Pluto-Nemo

The init_predict_dataset function doesn't scale the temporal virable, which would make the predict result in a wrong boundary while using gtnnwr model.

Here's the following new function modified by me.

def init_predict_dataset(data, train_dataset, x_column, spatial_column=None, temp_column=None,
                         process_fn="minmax_scale", scale_sync=True, use_class=predictDataset,
                         spatial_fun=BasicDistance, temporal_fun=Manhattan_distance, max_size=-1, is_need_STNN=False):
    """
    initialize predict dataset

    :param data: input data
    :param train_dataset: train data
    :param x_column: attribute column name
    :param spatial_column: spatial distance column name
    :param temp_column: temporal distance column name
    :param process_fn: data process function
    :param scale_sync: scale sync or not
    :param max_size: max size of predict dataset
    :param use_class: dataset class
    :param spatial_fun: spatial distance calculate function
    :param temporal_fun: temporal distance calculate function
    :param is_need_STNN: is need STNN or not
    :return: predict_dataset
    """
    if spatial_fun is None:
        # if dist_fun is None, raise error
        raise ValueError(
            "dist_fun must be a function that can process the data")

    if spatial_column is None:
        # if dist_column is None, raise error
        raise ValueError(
            "dist_column must be a column name in data")

    # initialize the predict_dataset
    if train_dataset.scale_fn == "minmax_scale":
        process_params = [[train_dataset.x_scale_info['min'], train_dataset.x_scale_info['max']]]
    elif train_dataset.scale_fn == "standard_scale":
        process_params = [[train_dataset.x_scale_info['mean'], train_dataset.x_scale_info['std']]]
    else:
        raise ValueError("scale_fn must be minmax_scale or standard_scale")
    # print("ProcessParams:",process_params)
    if scale_sync:
        predict_dataset = use_class(data=data, x_column=x_column, process_fn=process_fn, scale_info=process_params,
                                    is_need_STNN=is_need_STNN)
    else:
        predict_dataset = use_class(data=data, x_column=x_column, process_fn=process_fn, is_need_STNN=is_need_STNN)

    # train_data = train_dataset.dataframe
    reference_data = train_dataset.reference

    if not is_need_STNN:
        # if not use STNN, calculate spatial/temporal distance matrix and concatenate them
        if train_dataset.simple_distance:
            predict_dataset.distances = spatial_fun(
                data[spatial_column].values, reference_data[spatial_column].values)

            if temp_column is not None:
                # if temp_column is not None, calculate temporal distance matrix
                predict_dataset.temporal = temporal_fun(
                    data[temp_column].values, reference_data[temp_column].values)

                predict_dataset.distances = np.concatenate(
                    (predict_dataset.distances[:, :, np.newaxis], predict_dataset.temporal[:, :, np.newaxis]),
                    axis=2)  # concatenate spatial and temporal distance matrix
        else:
            predict_dataset.distances = np.repeat(data[spatial_column].values[:, np.newaxis, :],
                                                    len(reference_data),
                                                    axis=1)
            predict_temp_distance = np.repeat(reference_data[spatial_column].values[:, np.newaxis, :],
                                                predict_dataset.datasize,
                                                axis=1)
            predict_dataset.distances = np.concatenate(
                (predict_dataset.distances, np.transpose(predict_temp_distance, (1, 0, 2))), axis=2)

            if temp_column is not None:
                predict_dataset.temporal = np.repeat(data[temp_column].values[:, np.newaxis, :],
                                                        len(reference_data),
                                                        axis=1)
                predict_temp_temporal = np.repeat(reference_data[temp_column].values[:, np.newaxis, :],
                                                    predict_dataset.datasize,
                                                    axis=1)
                predict_dataset.temporal = np.concatenate(
                    (predict_dataset.temporal, np.transpose(predict_temp_temporal, (1, 0, 2))), axis=2)
            predict_dataset.distances = np.concatenate(
                (predict_dataset.distances, predict_dataset.temporal), axis=2)

    else:
        # if use STNN, calculate spatial/temporal point matrix
        # spatial distances matrix
        predict_dataset.distances = np.repeat(data[spatial_column].values[:, np.newaxis, :], len(reference_data),
                                                axis=1)
        predict_temp_distance = np.repeat(reference_data[spatial_column].values[:, np.newaxis, :],
                                            predict_dataset.datasize,
                                            axis=1)
        predict_dataset.distances = np.concatenate(
            (predict_dataset.distances, np.transpose(predict_temp_distance, (1, 0, 2))), axis=2)

        # temporal distances matrix
        if temp_column is not None:
            predict_dataset.temporal = np.repeat(data[temp_column].values[:, np.newaxis, :], len(reference_data),
                                                    axis=1)
            predict_temp_temporal = np.repeat(reference_data[temp_column].values[:, np.newaxis, :],
                                                predict_dataset.datasize,
                                                axis=1)
            predict_dataset.temporal = np.concatenate(
                (predict_dataset.temporal, np.transpose(predict_temp_temporal, (1, 0, 2))), axis=2)
    if process_fn == "minmax_scale":
        predict_dataset.distances = predict_dataset.minmax_scaler(predict_dataset.distances,
                                                                    train_dataset.distances_scale_param['min'],
                                                                    train_dataset.distances_scale_param['max'])
    else:
        predict_dataset.distances = predict_dataset.standard_scaler(predict_dataset.distances,
                                                                    train_dataset.distances_scale_param['mean'],
                                                                    train_dataset.distances_scale_param['var'])
    if process_fn == "minmax_scale":
        predict_dataset.temporal = predict_dataset.minmax_scaler(predict_dataset.temporal,
                                                                    train_dataset.temporal_scale_param['min'],
                                                                    train_dataset.temporal_scale_param['max'])
    else:
        predict_dataset.temporal = predict_dataset.standard_scaler(predict_dataset.temporal,
                                                                    train_dataset.temporal_scale_param['min'],
                                                                    train_dataset.temporal_scale_param['max'])
    
    
    # initialize dataloader for train/val/test dataset
    if max_size < 0:
        max_size = len(predict_dataset)
    predict_dataset.dataloader = DataLoader(
        predict_dataset, batch_size=max_size, shuffle=False)

    return predict_dataset

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions