Hi all, we are getting the following error in some...
# flyte-support
c
Hi all, we are getting the following error in some of our workflows when passing
pandas.DataFrame
between tasks (Flyte 1.14 both backend and flytekit). We have verified that the parquet file being pointed to valid, can be opened using
pandas.read_parquet
when it's local. The file is stored on Oracle Cloud Storage, but being accessed using S3-compatible API. Any clues?
Copy code
Trace:

    Traceback (most recent call last):
      File "/home/.local/lib/python3.11/site-packages/s3fs/core.py", line 114, in _error_wrapper
        return await func(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/aiobotocore/client.py", line 383, in _make_api_call
        raise error_class(parsed_response, operation_name)
    botocore.exceptions.ClientError: An error occurred (400) when calling the HeadObject operation: Bad Request

    The above exception was the direct cause of the following exception:

    Traceback (most recent call last):
      File "/home/.local/lib/python3.11/site-packages/flytekit/core/base_task.py", line 741, in dispatch_execute
        native_inputs = self._literal_map_to_python_input(input_literal_map, exec_ctx)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/flytekit/core/base_task.py", line 610, in _literal_map_to_python_input
        return TypeEngine.literal_map_to_kwargs(ctx, literal_map, self.python_interface.inputs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/flytekit/core/utils.py", line 312, in wrapper
        return func(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/flytekit/core/type_engine.py", line 1479, in literal_map_to_kwargs
        return synced(ctx, lm, python_types, literal_types)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/flytekit/utils/asyn.py", line 100, in wrapped
        return self.run_sync(coro_func, *args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/flytekit/utils/asyn.py", line 93, in run_sync
        return self._runner_map[name].run(coro)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/flytekit/utils/asyn.py", line 72, in run
        res = fut.result(None)
              ^^^^^^^^^^^^^^^^
      File "/usr/local/lib/python3.11/concurrent/futures/_base.py", line 456, in result
        return self.__get_result()
               ^^^^^^^^^^^^^^^^^^^
      File "/usr/local/lib/python3.11/concurrent/futures/_base.py", line 401, in __get_result
        raise self._exception
      File "/home/.local/lib/python3.11/site-packages/flytekit/core/type_engine.py", line 1517, in _literal_map_to_kwargs
        await asyncio.gather(*kwargs.values())
      File "/home/.local/lib/python3.11/site-packages/flytekit/core/type_engine.py", line 1441, in async_to_python_value
        pv = await transformer.async_to_python_value(ctx, lv, expected_python_type)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/flytekit/types/structured/structured_dataset.py", line 1020, in async_to_python_value
        return self.open_as(ctx, lv.scalar.structured_dataset, df_type=expected_python_type, updated_metadata=metad)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/flytekit/types/structured/structured_dataset.py", line 1055, in open_as
        result = decoder.decode(ctx, sd, updated_metadata)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/flytekit/types/structured/basic_dfs.py", line 137, in decode
        return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/pandas/io/parquet.py", line 503, in read_parquet
        return impl.read(
               ^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/pandas/io/parquet.py", line 251, in read
        result = self.api.parquet.read_table(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/pyarrow/parquet/core.py", line 1793, in read_table
        dataset = ParquetDataset(
                  ^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/pyarrow/parquet/core.py", line 1348, in __init__
        finfo = filesystem.get_file_info(path_or_paths)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "pyarrow/_fs.pyx", line 590, in pyarrow._fs.FileSystem.get_file_info
      File "pyarrow/error.pxi", line 155, in pyarrow.lib.pyarrow_internal_check_status
      File "pyarrow/error.pxi", line 89, in pyarrow.lib.check_status
      File "pyarrow/_fs.pyx", line 1498, in pyarrow._fs._cb_get_file_info
      File "/home/.local/lib/python3.11/site-packages/pyarrow/fs.py", line 322, in get_file_info
        info = <http://self.fs.info|self.fs.info>(path)
               ^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/fsspec/asyn.py", line 118, in wrapper
        return sync(self.loop, func, *args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/fsspec/asyn.py", line 103, in sync
        raise return_result
      File "/home/.local/lib/python3.11/site-packages/fsspec/asyn.py", line 56, in _runner
        result[0] = await coro
                    ^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/s3fs/core.py", line 1426, in _info
        out = await self._call_s3(
              ^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/s3fs/core.py", line 371, in _call_s3
        return await _error_wrapper(
               ^^^^^^^^^^^^^^^^^^^^^
      File "/home/.local/lib/python3.11/site-packages/s3fs/core.py", line 146, in _error_wrapper
        raise err
    OSError: [Errno 22] Bad Request

Message:

    OSError: [Errno 22] Bad Request
a
is this particular to dataframes?
c
It seems like Didn't see it happening on other types, but did see it on different unrelated workflows (probably with same package versions installed)