-
Notifications
You must be signed in to change notification settings - Fork 27
Open
Description
The init_predict_dataset
function doesn't scale the temporal virable, which would make the predict result in a wrong boundary while using gtnnwr model.
Here's the following new function modified by me.
def init_predict_dataset(data, train_dataset, x_column, spatial_column=None, temp_column=None,
process_fn="minmax_scale", scale_sync=True, use_class=predictDataset,
spatial_fun=BasicDistance, temporal_fun=Manhattan_distance, max_size=-1, is_need_STNN=False):
"""
initialize predict dataset
:param data: input data
:param train_dataset: train data
:param x_column: attribute column name
:param spatial_column: spatial distance column name
:param temp_column: temporal distance column name
:param process_fn: data process function
:param scale_sync: scale sync or not
:param max_size: max size of predict dataset
:param use_class: dataset class
:param spatial_fun: spatial distance calculate function
:param temporal_fun: temporal distance calculate function
:param is_need_STNN: is need STNN or not
:return: predict_dataset
"""
if spatial_fun is None:
# if dist_fun is None, raise error
raise ValueError(
"dist_fun must be a function that can process the data")
if spatial_column is None:
# if dist_column is None, raise error
raise ValueError(
"dist_column must be a column name in data")
# initialize the predict_dataset
if train_dataset.scale_fn == "minmax_scale":
process_params = [[train_dataset.x_scale_info['min'], train_dataset.x_scale_info['max']]]
elif train_dataset.scale_fn == "standard_scale":
process_params = [[train_dataset.x_scale_info['mean'], train_dataset.x_scale_info['std']]]
else:
raise ValueError("scale_fn must be minmax_scale or standard_scale")
# print("ProcessParams:",process_params)
if scale_sync:
predict_dataset = use_class(data=data, x_column=x_column, process_fn=process_fn, scale_info=process_params,
is_need_STNN=is_need_STNN)
else:
predict_dataset = use_class(data=data, x_column=x_column, process_fn=process_fn, is_need_STNN=is_need_STNN)
# train_data = train_dataset.dataframe
reference_data = train_dataset.reference
if not is_need_STNN:
# if not use STNN, calculate spatial/temporal distance matrix and concatenate them
if train_dataset.simple_distance:
predict_dataset.distances = spatial_fun(
data[spatial_column].values, reference_data[spatial_column].values)
if temp_column is not None:
# if temp_column is not None, calculate temporal distance matrix
predict_dataset.temporal = temporal_fun(
data[temp_column].values, reference_data[temp_column].values)
predict_dataset.distances = np.concatenate(
(predict_dataset.distances[:, :, np.newaxis], predict_dataset.temporal[:, :, np.newaxis]),
axis=2) # concatenate spatial and temporal distance matrix
else:
predict_dataset.distances = np.repeat(data[spatial_column].values[:, np.newaxis, :],
len(reference_data),
axis=1)
predict_temp_distance = np.repeat(reference_data[spatial_column].values[:, np.newaxis, :],
predict_dataset.datasize,
axis=1)
predict_dataset.distances = np.concatenate(
(predict_dataset.distances, np.transpose(predict_temp_distance, (1, 0, 2))), axis=2)
if temp_column is not None:
predict_dataset.temporal = np.repeat(data[temp_column].values[:, np.newaxis, :],
len(reference_data),
axis=1)
predict_temp_temporal = np.repeat(reference_data[temp_column].values[:, np.newaxis, :],
predict_dataset.datasize,
axis=1)
predict_dataset.temporal = np.concatenate(
(predict_dataset.temporal, np.transpose(predict_temp_temporal, (1, 0, 2))), axis=2)
predict_dataset.distances = np.concatenate(
(predict_dataset.distances, predict_dataset.temporal), axis=2)
else:
# if use STNN, calculate spatial/temporal point matrix
# spatial distances matrix
predict_dataset.distances = np.repeat(data[spatial_column].values[:, np.newaxis, :], len(reference_data),
axis=1)
predict_temp_distance = np.repeat(reference_data[spatial_column].values[:, np.newaxis, :],
predict_dataset.datasize,
axis=1)
predict_dataset.distances = np.concatenate(
(predict_dataset.distances, np.transpose(predict_temp_distance, (1, 0, 2))), axis=2)
# temporal distances matrix
if temp_column is not None:
predict_dataset.temporal = np.repeat(data[temp_column].values[:, np.newaxis, :], len(reference_data),
axis=1)
predict_temp_temporal = np.repeat(reference_data[temp_column].values[:, np.newaxis, :],
predict_dataset.datasize,
axis=1)
predict_dataset.temporal = np.concatenate(
(predict_dataset.temporal, np.transpose(predict_temp_temporal, (1, 0, 2))), axis=2)
if process_fn == "minmax_scale":
predict_dataset.distances = predict_dataset.minmax_scaler(predict_dataset.distances,
train_dataset.distances_scale_param['min'],
train_dataset.distances_scale_param['max'])
else:
predict_dataset.distances = predict_dataset.standard_scaler(predict_dataset.distances,
train_dataset.distances_scale_param['mean'],
train_dataset.distances_scale_param['var'])
if process_fn == "minmax_scale":
predict_dataset.temporal = predict_dataset.minmax_scaler(predict_dataset.temporal,
train_dataset.temporal_scale_param['min'],
train_dataset.temporal_scale_param['max'])
else:
predict_dataset.temporal = predict_dataset.standard_scaler(predict_dataset.temporal,
train_dataset.temporal_scale_param['min'],
train_dataset.temporal_scale_param['max'])
# initialize dataloader for train/val/test dataset
if max_size < 0:
max_size = len(predict_dataset)
predict_dataset.dataloader = DataLoader(
predict_dataset, batch_size=max_size, shuffle=False)
return predict_dataset
Metadata
Metadata
Assignees
Labels
No labels