Frank Shen
01/06/2023, 6:51 PMKevin Su
01/06/2023, 7:08 PMsc.hadoopConfiguration.set("fs.s3n.awsAccessKeyId", accessKeyId)
sc.hadoopConfiguration.set("fs.s3n.awsSecretAccessKey", secretAccessKey)
Frank Shen
01/06/2023, 7:11 PM@task(
task_config=Spark(
spark_conf={...
}
),
)
def create_spark_df() -> Annotated[StructuredDataset, columns]:
sess = flytekit.current_context().spark_session
return StructuredDataset(
dataframe=sess.createDataFrame(
[
("Alice", 5),
("Bob", 10),
("Charlie", 15),
],
["name", "age"],
)
)
Kevin Su
01/06/2023, 7:27 PMFrank Shen
01/06/2023, 7:30 PM/env/lib/python3.8/site-packages/py4j/protocol.py\", line 326, in get_return_value\n raise Py4JJavaError(\npy4j.protocol.Py4JJavaError: An error occurred while calling o39.parquet.\n: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found