Hey, if I separated a
pandas_image_spec
and a
sklearn_image_spec
, and a task has requires both image_specs - is it possible to give a list of image_specs in
container_image
or how to reconcile?
e.g. in `pima_diabetes`:
sklearn_image_spec = ImageSpec(
base_image="<http://ghcr.io/flyteorg/flytekit:py3.8-1.8.2|ghcr.io/flyteorg/flytekit:py3.8-1.8.2>",
registry="localhost:30000",
packages=["scikit-learn", "xgboost"]
)
pandas_image_spec = ImageSpec(
base_image="<http://ghcr.io/flyteorg/flytekit:py3.8-1.8.2|ghcr.io/flyteorg/flytekit:py3.8-1.8.2>",
registry="localhost:30000",
packages=["pandas", "numpy"]
)
@task(container_image=sklearn_image_spec, cache_version="1.0", cache=True, limits=Resources(mem="200Mi"))
def split_traintest_dataset(
dataset: FlyteFile[typing.TypeVar("csv")], seed: int, test_split_ratio: float
) -> Tuple[StructuredDataset, StructuredDataset, StructuredDataset, StructuredDataset]:
column_names = [k for k in DATASET_COLUMNS.keys()]
df = pd.read_csv(dataset, names=column_names)
x = df[column_names[:8]]
y = df[[column_names[-1]]]
return train_test_split(x, y, test_size=test_split_ratio, random_state=seed)