Skip to content

Commit e011e67

Browse files
authored
FAI-901: Increased input/ouput conversion flexibility (#127)
* Allowed for native python types, single values, and lists as valid conversions for input/outputs * black and linting
1 parent fde2f46 commit e011e67

File tree

2 files changed

+154
-51
lines changed

2 files changed

+154
-51
lines changed

src/trustyai/utils/data_conversions.py

Lines changed: 63 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,26 @@
2020
# UNION TYPES FOR INPUTS AND OUTPUTS
2121
# if a TrustyAI function wants AN input/output, it should accept this union type:
2222
OneInputUnionType = Union[
23-
np.ndarray, pd.DataFrame, pd.Series, List[Feature], PredictionInput
23+
int,
24+
float,
25+
np.integer,
26+
np.inexact,
27+
np.ndarray,
28+
pd.DataFrame,
29+
pd.Series,
30+
List[Feature],
31+
PredictionInput,
2432
]
2533
OneOutputUnionType = Union[
26-
np.ndarray, pd.DataFrame, pd.Series, List[Output], PredictionOutput
34+
int,
35+
float,
36+
np.integer,
37+
np.inexact,
38+
np.ndarray,
39+
pd.DataFrame,
40+
pd.Series,
41+
List[Output],
42+
PredictionOutput,
2743
]
2844

2945
# if a TrustyAI function wants a LIST of inputs/outputs, it should accept this union type:
@@ -64,9 +80,16 @@ def dec(obj):
6480

6581
_conversion_docstrings = {
6682
"one_input": [
67-
":class:`numpy.ndarray`, :class:`pandas.DataFrame`, :class:`pandas.Series`, List[:class:`Feature`], or :class:`PredictionInput`",
83+
"int, float, :class:`numpy.number`, List[Union[int, float, :class:`numpy.number`]], "
84+
":class:`numpy.ndarray`, :class:`pandas.DataFrame`, :class:`pandas.Series`, "
85+
"List[:class:`Feature`], or :class:`PredictionInput`",
6886
"""
6987
88+
* If there's only a single input feature, an ``int``, ``float``, or any of the
89+
`Numpy equivalents <https://numpy.org/doc/stable/user/basics.types.html>`_
90+
can be used.
91+
* A list of ``int``, ``float``, or any of the
92+
`Numpy equivalents <https://numpy.org/doc/stable/user/basics.types.html>`_.
7093
* Numpy array of shape ``[1, n_features]`` or ``[n_features]``
7194
* Pandas DataFrame with 1 row and ``n_features`` columns
7295
* Pandas Series with `n_features` rows
@@ -76,9 +99,16 @@ def dec(obj):
7699
""",
77100
],
78101
"one_output": [
79-
":class:`numpy.ndarray`, :class:`pandas.DataFrame`, List[:class:`Output`], or :class:`PredictionOutput`",
102+
"int, float, :class:`numpy.number`, List[Union[int, float, :class:`numpy.number`]], "
103+
":class:`numpy.ndarray`, :class:`pandas.DataFrame`, :class:`pandas.Series`, "
104+
" List[:class:`Output`], or :class:`PredictionOutput`",
80105
"""
81106
107+
* If there's only a single output, an ``int``, ``float``, or any of the
108+
`Numpy equivalents <https://numpy.org/doc/stable/user/basics.types.html>`_
109+
can be used.
110+
* A list of ``int``, ``float``, or any of the
111+
`Numpy equivalents <https://numpy.org/doc/stable/user/basics.types.html>`_.
82112
* Numpy array of shape ``[1, n_outputs]`` or ``[n_outputs]``
83113
* Pandas DataFrame with 1 row and ``n_outputs`` columns
84114
* Pandas Series with `n_outputs` rows
@@ -140,7 +170,15 @@ def one_input_convert(
140170
python_inputs: OneInputUnionType, feature_domains: FeatureDomain = None
141171
) -> PredictionInput:
142172
"""Convert an object of OneInputUnionType into a PredictionInput."""
143-
if isinstance(python_inputs, np.ndarray):
173+
if isinstance(python_inputs, (int, float, np.number)):
174+
python_inputs = np.array([[python_inputs]])
175+
pi = numpy_to_prediction_object(python_inputs, trustyai.model.feature)[0]
176+
elif isinstance(python_inputs, list) and all(
177+
(isinstance(x, (int, float, np.number)) for x in python_inputs)
178+
):
179+
python_inputs = np.array(python_inputs).reshape(1, -1)
180+
pi = numpy_to_prediction_object(python_inputs, trustyai.model.feature)[0]
181+
elif isinstance(python_inputs, np.ndarray):
144182
if len(python_inputs.shape) == 1:
145183
python_inputs = python_inputs.reshape(1, -1)
146184
pi = numpy_to_prediction_object(python_inputs, trustyai.model.feature)[0]
@@ -183,20 +221,30 @@ def many_inputs_convert(
183221
# === output functions =============================================================================
184222
def one_output_convert(python_outputs: OneOutputUnionType) -> PredictionOutput:
185223
"""Convert an object of OneOutputUnionType into a PredictionOutput"""
186-
if isinstance(python_outputs, np.ndarray):
224+
if isinstance(python_outputs, (int, np.integer, float, np.inexact)):
225+
python_outputs = np.array([[python_outputs]])
226+
po = numpy_to_prediction_object(python_outputs, trustyai.model.output)[0]
227+
elif isinstance(python_outputs, list) and all(
228+
(isinstance(x, (int, float, np.number)) for x in python_outputs)
229+
):
230+
python_outputs = np.array(python_outputs).reshape(1, -1)
231+
po = numpy_to_prediction_object(python_outputs, trustyai.model.output)[0]
232+
elif isinstance(python_outputs, np.ndarray):
187233
if len(python_outputs.shape) == 1:
188234
python_outputs = python_outputs.reshape(1, -1)
189-
return numpy_to_prediction_object(python_outputs, trustyai.model.output)[0]
190-
if isinstance(python_outputs, pd.DataFrame):
191-
return df_to_prediction_object(python_outputs, trustyai.model.output)[0]
192-
if isinstance(python_outputs, pd.Series):
193-
return df_to_prediction_object(
235+
po = numpy_to_prediction_object(python_outputs, trustyai.model.output)[0]
236+
elif isinstance(python_outputs, pd.DataFrame):
237+
po = df_to_prediction_object(python_outputs, trustyai.model.output)[0]
238+
elif isinstance(python_outputs, pd.Series):
239+
po = df_to_prediction_object(
194240
pd.DataFrame([python_outputs]), trustyai.model.output
195241
)[0]
196-
if isinstance(python_outputs, PredictionOutput):
197-
return python_outputs
198-
# fallback is List[Output]
199-
return PredictionOutput(python_outputs)
242+
elif isinstance(python_outputs, PredictionOutput):
243+
po = python_outputs
244+
else:
245+
# fallback is List[Output]
246+
po = PredictionOutput(python_outputs)
247+
return po
200248

201249

202250
def many_outputs_convert(

tests/general/test_conversions.py

Lines changed: 91 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -153,64 +153,114 @@ def test_feature_domains():
153153

154154

155155
def test_one_input_conversion():
156+
"""Test one input conversions to one PredInput"""
156157
numpy1 = np.arange(0, 10)
157-
numpy2 = np.arange(0, 10).reshape(1, 10)
158-
series = pd.Series(numpy1, index=["input-{}".format(i) for i in range(10)])
159-
df = pd.DataFrame(numpy2, columns=["input-{}".format(i) for i in range(10)])
158+
numpy2 = numpy1.reshape(1, 10)
159+
160+
to_convert = [
161+
numpy1,
162+
numpy2,
163+
pd.Series(numpy1, index=["input-{}".format(i) for i in range(10)]),
164+
pd.DataFrame(numpy2, columns=["input-{}".format(i) for i in range(10)]),
165+
numpy1.tolist()
166+
]
160167

161-
ta_numpy1 = one_input_convert(numpy1)
162-
ta_numpy2 = one_input_convert(numpy2)
163-
ta_series = one_input_convert(series)
164-
ta_df = one_input_convert(df)
168+
converted = [one_input_convert(x) for x in to_convert]
165169

166-
assert ta_numpy1.equals(ta_numpy2)
167-
assert ta_numpy2.equals(ta_series)
168-
assert ta_series.equals(ta_df)
170+
for i in range(len(converted)-1):
171+
assert converted[i].equals(converted[i+1])
169172

170173

171174
def test_one_input_conversion_domained():
175+
"""Test one input conversions with domains to one PredInput"""
172176
n_feats = 5
173177
np.random.seed(0)
174-
178+
numpy1 = np.arange(0, n_feats)
179+
numpy2 = numpy1.reshape(1, n_feats)
175180
domain_bounds = [[np.random.rand(), np.random.rand()] for _ in range(n_feats)]
176181
domains = [feature_domain((lb, ub)) for lb, ub in domain_bounds]
177-
numpy1 = np.arange(0, n_feats)
178-
numpy2 = np.arange(0, n_feats).reshape(1, n_feats)
179-
series = pd.Series(numpy1, index=["input-{}".format(i) for i in range(n_feats)])
180-
df = pd.DataFrame(numpy2, columns=["input-{}".format(i) for i in range(n_feats)])
181182

182-
ta_numpy1 = one_input_convert(numpy1, feature_domains=domains)
183-
ta_numpy2 = one_input_convert(numpy2, feature_domains=domains)
184-
ta_series = one_input_convert(series, feature_domains=domains)
185-
ta_df = one_input_convert(df, feature_domains=domains)
183+
to_convert = [
184+
numpy1,
185+
numpy2,
186+
pd.Series(numpy1, index=["input-{}".format(i) for i in range(n_feats)]),
187+
pd.DataFrame(numpy2, columns=["input-{}".format(i) for i in range(n_feats)]),
188+
numpy1.tolist()
189+
]
190+
converted = [one_input_convert(x, feature_domains=domains) for x in to_convert]
191+
192+
for i in range(len(converted) - 1):
193+
for j in range(n_feats):
194+
assert converted[i].getFeatures().get(j).getDomain().getLowerBound()\
195+
== domain_bounds[j][0]
196+
assert converted[i].getFeatures().get(j).getDomain().getUpperBound()\
197+
== domain_bounds[j][1]
198+
199+
assert converted[i].equals(converted[i + 1])
200+
201+
202+
def test_one_input_one_feature_conversion():
203+
"""Test one input, one feature conversions to one PredInput"""
204+
numpy1 = np.arange(0, 1)
205+
numpy2 = numpy1.reshape(1, 1)
186206

187-
for converted in [ta_numpy1, ta_numpy2, ta_df, ta_series]:
188-
for i in range(n_feats):
189-
assert converted.getFeatures().get(i).getDomain().getLowerBound() == domain_bounds[i][0]
190-
assert converted.getFeatures().get(i).getDomain().getUpperBound() == domain_bounds[i][1]
207+
to_convert = [
208+
numpy1,
209+
numpy2,
210+
pd.Series(numpy1, index=["input-{}".format(i) for i in range(1)]),
211+
pd.DataFrame(numpy2, columns=["input-{}".format(i) for i in range(1)]),
212+
numpy1.tolist(),
213+
numpy1.tolist()[0]
214+
]
191215

192-
assert ta_numpy1.equals(ta_numpy2)
193-
assert ta_numpy2.equals(ta_series)
194-
assert ta_series.equals(ta_df)
216+
converted = [one_input_convert(x) for x in to_convert]
217+
218+
for i in range(len(converted) - 1):
219+
assert converted[i].equals(converted[i + 1])
195220

196221

197222
def test_one_output_conversion():
223+
"""Test one output conversions to one PredOutput"""
198224
numpy1 = np.arange(0, 10)
199-
numpy2 = np.arange(0, 10).reshape(1, 10)
200-
series = pd.Series(numpy1, index=["output-{}".format(i) for i in range(10)])
201-
df = pd.DataFrame(numpy2, columns=["output-{}".format(i) for i in range(10)])
225+
numpy2 = numpy1.reshape(1, 10)
226+
227+
to_convert = [
228+
numpy1,
229+
numpy2,
230+
pd.Series(numpy1, index=["output-{}".format(i) for i in range(10)]),
231+
pd.DataFrame(numpy2, columns=["output-{}".format(i) for i in range(10)]),
232+
numpy1.tolist()
233+
]
234+
235+
converted = [one_output_convert(x) for x in to_convert]
236+
237+
for i in range(len(converted) - 1):
238+
assert converted[i].equals(converted[i + 1])
239+
240+
241+
def test_one_output_one_value_conversion():
242+
"""Test one output, one value conversions to one PredOutput"""
243+
numpy1 = np.arange(0, 1)
244+
numpy2 = numpy1.reshape(1, 1)
245+
246+
to_convert = [
247+
numpy1,
248+
numpy2,
249+
pd.Series(numpy1, index=["output-{}".format(i) for i in range(1)]),
250+
pd.DataFrame(numpy2, columns=["output-{}".format(i) for i in range(1)]),
251+
numpy1.tolist(),
252+
numpy1.tolist()[0]
253+
]
202254

203-
ta_numpy1 = one_output_convert(numpy1)
204-
ta_numpy2 = one_output_convert(numpy2)
205-
ta_series = one_output_convert(series)
206-
ta_df = one_output_convert(df)
255+
converted = [one_output_convert(x) for x in to_convert]
207256

208-
assert ta_numpy1.equals(ta_numpy2)
209-
assert ta_numpy2.equals(ta_series)
210-
assert ta_series.equals(ta_df)
257+
for i in range(len(converted) - 1):
258+
assert converted[i].equals(converted[i + 1])
211259

212260

213261
def test_many_outputs_conversion():
262+
"""Test many output conversions to PredOutputs, using one row to produce
263+
List[PredOutputs] with one item"""
214264
numpy1 = np.arange(0, 10)
215265
numpy2 = np.arange(0, 10).reshape(1, 10)
216266
df = pd.DataFrame(numpy2, columns=["output-{}".format(i) for i in range(10)])
@@ -225,6 +275,7 @@ def test_many_outputs_conversion():
225275

226276

227277
def test_many_outputs_conversion2():
278+
"""Test many output conversions to many PredOutputs"""
228279
numpy1 = np.arange(0, 100).reshape(10, 10)
229280
df = pd.DataFrame(numpy1, columns=["output-{}".format(i) for i in range(10)])
230281

@@ -236,6 +287,8 @@ def test_many_outputs_conversion2():
236287

237288

238289
def test_many_inputs_conversion():
290+
"""Test many input conversions to PredOutputs, using one row to produce
291+
List[PredInputs] with one item"""
239292
numpy1 = np.arange(0, 10)
240293
numpy2 = np.arange(0, 10).reshape(1, 10)
241294
df = pd.DataFrame(numpy2, columns=["input-{}".format(i) for i in range(10)])
@@ -250,6 +303,7 @@ def test_many_inputs_conversion():
250303

251304

252305
def test_many_inputs_conversion2():
306+
"""Test many input conversions to many PredInputs"""
253307
numpy1 = np.arange(0, 100).reshape(10, 10)
254308
df = pd.DataFrame(numpy1, columns=["input-{}".format(i) for i in range(10)])
255309

@@ -261,6 +315,7 @@ def test_many_inputs_conversion2():
261315

262316

263317
def test_many_inputs_conversion_domained():
318+
"""Test many input conversions to many PredInputs with domains"""
264319
n_feats = 5
265320
n_datapoints = 100
266321
np.random.seed(0)

0 commit comments

Comments
 (0)