salmon-refrigerator-32115
12/13/2023, 10:22 PMpip list|grep flyte
flyteidl 1.10.6
flytekit 1.10.3b2
flytekitplugins-spark 1.10.3b2
tall-lock-23197
glamorous-carpet-83516
12/14/2023, 7:24 PMglamorous-carpet-83516
12/14/2023, 7:25 PMsalmon-refrigerator-32115
12/14/2023, 7:29 PMglamorous-carpet-83516
12/14/2023, 7:32 PMsalmon-refrigerator-32115
12/14/2023, 7:52 PMsalmon-refrigerator-32115
12/14/2023, 7:53 PMglamorous-carpet-83516
12/14/2023, 7:53 PMimport datetime
import os
import random
from operator import add
from click.testing import CliRunner
import flytekit
from flytekit import Resources, Secret, task, workflow, ImageSpec
from flytekit.clis.sdk_in_container import pyflyte
from flytekitplugins.spark import Databricks
SECRET_GROUP = "token-info"
SECRET_NAME = "token_secret"
image = ImageSpec(base_image="pingsutw/databricks:v4", registry="pingsutw")
@task(
task_config=Databricks(
# this configuration is applied to the spark cluster
spark_conf={
"spark.driver.memory": "600M",
"spark.executor.memory": "600M",
"spark.executor.cores": "1",
"spark.executor.instances": "1",
"spark.driver.cores": "1",
},
executor_path="/databricks/python3/bin/python",
applications_path="dbfs:///FileStore/tables/entrypoint.py",
databricks_conf={
"run_name": "flytekit databricks plugin example",
"new_cluster": {
"spark_version": "13.3.x-scala2.12",
"node_type_id": "m6i.large", # TODO: test m6i.large, i3.xlarge
"num_workers": 3,
"aws_attributes": {
"availability": "ON_DEMAND",
"instance_profile_arn": "arn:aws:iam::xxxxxx:instance-profile/databricks-agent",
"ebs_volume_type": "GENERAL_PURPOSE_SSD",
"ebs_volume_count": 1,
"ebs_volume_size": 100,
},
},
"timeout_seconds": 3600,
"max_retries": 1,
},
databricks_instance="xxxxxxx.cloud.databricks.com",
),
limits=Resources(mem="2000M"),
# container_image=image,
container_image="pingsutw/databricks:v7"
)
def hello_spark(partitions: int) -> float:
print("Starting Spark with Partitions: {}".format(partitions))
n = 100000 * partitions
sess = flytekit.current_context().spark_session
count = (
sess.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
)
pi_val = 4.0 * count / n
print("Pi val is :{}".format(pi_val))
return pi_val
def f(_):
x = random.random() * 2 - 1
y = random.random() * 2 - 1
return 1 if x**2 + y**2 <= 1 else 0
@task(cache_version="1")
def print_every_time(value_to_print: float, date_triggered: datetime.datetime) -> int:
print("My printed value: {} @ {}".format(value_to_print, date_triggered))
return 1
@workflow
def wf(
triggered_date: datetime.datetime = datetime.datetime.now(),
) -> float:
"""
Using the workflow is still as any other workflow. As image is a property of the task, the workflow does not care
about how the image is configured.
"""
pi = hello_spark(partitions=50)
print_every_time(value_to_print=pi, date_triggered=triggered_date)
return pi
if __name__ == '__main__':
runner = CliRunner()
result = runner.invoke(pyflyte.main,
["run",
"--raw-output-data-prefix",
"<s3://flyte-batch/spark/>",
"/Users/kevin/git/flytekit/flyte-example/databricks_wf",
"wf"])
print(result.output)
salmon-refrigerator-32115
12/14/2023, 7:54 PMglamorous-carpet-83516
12/14/2023, 7:54 PMglamorous-carpet-83516
12/14/2023, 7:55 PMglamorous-carpet-83516
12/14/2023, 7:55 PMglamorous-carpet-83516
12/14/2023, 7:56 PMsalmon-refrigerator-32115
12/14/2023, 7:56 PMglamorous-carpet-83516
12/14/2023, 7:57 PMsalmon-refrigerator-32115
12/14/2023, 7:58 PMglamorous-carpet-83516
12/14/2023, 8:14 PMsalmon-refrigerator-32115
12/14/2023, 8:20 PMsalmon-refrigerator-32115
12/15/2023, 12:37 AMpyflyte run --raw-output-data-prefix <s3://mlp-flyte-workflow/test> databricks.py wf
Running Execution on local.
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/12/14 16:36:55 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2023-12-14 16:36:56,557422 ERROR {"asctime": "2023-12-14 16:36:56,557", "name": "flytekit", "levelname": "ERROR", "message": "Agent failed to run the task with error: Please make sure to add task.py:189
secret_requests=[Secret(group=FLYTE_DATABRICKS_ACCESS_TOKEN, key=None)] in @task. Unable to find secret for key None in group FLYTE_DATABRICKS_ACCESS_TOKEN in Env
Var:_FSEC_FLYTE_DATABRICKS_ACCESS_TOKEN and FilePath: /etc/secrets/flyte_databricks_access_token"}
salmon-refrigerator-32115
12/15/2023, 12:38 AM@task(
secret_requests=[
Secret(
group=SECRET_GROUP,
key=SECRET_NAME,
mount_requirement=Secret.MountType.ENV_VAR,
)
],
What should be in the secret_requests?glamorous-carpet-83516
12/15/2023, 12:39 AMpyflyte run --remote databricks.py wf
salmon-refrigerator-32115
12/15/2023, 12:39 AMglamorous-carpet-83516
12/15/2023, 12:40 AMsecret_requests
salmon-refrigerator-32115
12/15/2023, 12:40 AMtar: Removing leading `/' from member names
╭───────────────────── Traceback (most recent call last) ──────────────────────╮
│ /databricks/python3/bin/pyflyte-execute:8 in <module> │
│ │
│ ❱ 8 │ sys.exit(execute_task_cmd()) │
│ │
│ /databricks/python3/lib/python3.9/site-packages/click/core.py:1157 in │
│ __call__ │
│ │
│ ❱ 1157 │ │ return self.main(*args, **kwargs) │
│ │
│ /databricks/python3/lib/python3.9/site-packages/click/core.py:1078 in main │
│ │
│ ❱ 1078 │ │ │ │ │ rv = self.invoke(ctx) │
│ │
│ /databricks/python3/lib/python3.9/site-packages/click/core.py:1434 in invoke │
│ │
│ ❱ 1434 │ │ │ return ctx.invoke(self.callback, **ctx.params) │
│ │
│ /databricks/python3/lib/python3.9/site-packages/click/core.py:783 in invoke │
│ │
│ ❱ 783 │ │ │ │ return __callback(*args, **kwargs) │
│ │
│ /databricks/python3/lib/python3.9/site-packages/flytekit/bin/entrypoint.py:4 │
│ 93 in execute_task_cmd │
│ │
│ ❱ 493 │ _execute_task( │
│ │
│ /databricks/python3/lib/python3.9/site-packages/flytekit/exceptions/scopes.p │
│ y:143 in f │
│ │
│ ❱ 143 │ │ │ return outer_f(inner_f, args, kwargs) │
│ │
│ /databricks/python3/lib/python3.9/site-packages/flytekit/exceptions/scopes.p │
│ y:173 in system_entry_point │
│ │
│ ❱ 173 │ │ │ │ return wrapped(*args, **kwargs) │
│ │
│ /databricks/python3/lib/python3.9/site-packages/flytekit/bin/entrypoint.py:3 │
│ 65 in _execute_task │
│ │
│ ❱ 365 │ │ _task_def = resolver_obj.load_task(loader_args=resolver_args) │
│ │
│ /databricks/python3/lib/python3.9/site-packages/flytekit/core/utils.py:319 │
│ in wrapper │
│ │
│ ❱ 319 │ │ │ │ return func(*args, **kwargs) │
│ │
│ /databricks/python3/lib/python3.9/site-packages/flytekit/core/python_auto_co │
│ ntainer.py:251 in load_task │
│ │
│ ❱ 251 │ │ task_def = getattr(task_module, task_name) │
╰──────────────────────────────────────────────────────────────────────────────╯
AttributeError: module 'databricks' has no attribute 'hello_spark'
Getting <s3://mlp-flyte-artifact/flytesnacks/development/6GNCBGLZ5JD6QPBXXBNY7V632A======/script_mode.tar.gz> to /root/
Stream closed EOF for flytesnacks-development/f1362fb6223764094a5d-n0-0 (f1362fb6223764094a5d-n0-0)
salmon-refrigerator-32115
12/15/2023, 12:42 AMpyflyte run --raw-output-data-prefix <s3://mlp-flyte-workflow/test> databricks.py wf
Running Execution on local.
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/12/14 16:34:43 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2023-12-14 16:34:45,425583 ERROR {"asctime": "2023-12-14 16:34:45,425", "name": "flytekit", "levelname": "ERROR", "message": "Agent failed to run the task with error: Please make sure to add task.py:189
secret_requests=[Secret(group=FLYTE_DATABRICKS_ACCESS_TOKEN, key=None)] in @task. Unable to find secret for key None in group FLYTE_DATABRICKS_ACCESS_TOKEN in Env
Var:_FSEC_FLYTE_DATABRICKS_ACCESS_TOKEN and FilePath: /etc/secrets/flyte_databricks_access_token"}
salmon-refrigerator-32115
12/15/2023, 12:42 AMglamorous-carpet-83516
12/15/2023, 12:43 AM/etc/secrets/FLYTE_DATABRICKS_ACCESS_TOKEN
locallysalmon-refrigerator-32115
12/15/2023, 12:44 AMsalmon-refrigerator-32115
12/15/2023, 12:44 AMsalmon-refrigerator-32115
12/15/2023, 12:44 AMglamorous-carpet-83516
12/15/2023, 12:44 AMglamorous-carpet-83516
12/15/2023, 12:51 AMsalmon-refrigerator-32115
12/15/2023, 12:59 AMsalmon-refrigerator-32115
12/15/2023, 1:08 AMpyflyte run --raw-output-data-prefix <s3://mlp-flyte-workflow/test> databricks.py wf
Running Execution on local.
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/12/14 17:03:08 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2023-12-14 17:03:10,995716 ERROR {"asctime": "2023-12-14 17:03:10,995", "name": "flytekit", "levelname": "ERROR", "message": "Agent failed to run the task with error: Cannot connect to host task.py:189
<http://wbd-dcp-mlp-dev.cloud.databricks.com:443|wbd-dcp-mlp-dev.cloud.databricks.com:443> ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain
(_ssl.c:1131)')]"}
glamorous-carpet-83516
12/15/2023, 1:09 AMglamorous-carpet-83516
12/15/2023, 1:10 AMglamorous-carpet-83516
12/15/2023, 1:11 AMget
REST API from local to make sure you can connect to the databricks platform?
curl --netrc --request GET --header "Authorization: Bearer $DATABRICKS_TOEN" \
'<https://dbc-2889d011-7c0b.cloud.databricks.com/api/2.0/jobs/runs/get?run_id=236244201527146>'
salmon-refrigerator-32115
12/15/2023, 1:19 AMcurl --netrc --request GET --header "Authorization: Bearer $DATABRICKS_TOEN" \
'<https://wbd-dcp-mlp-dev.cloud.databricks.com/api/2.0/jobs/runs/get?run_id=33469631515226>'
curl: (3) URL rejected: Malformed input to a URL function
zsh: no such file or directory: <https://wbd-dcp-mlp-dev.cloud.databricks.com/api/2.0/jobs/runs/get?run_id=33469631515226>
salmon-refrigerator-32115
12/15/2023, 1:20 AM$DATABRICKS_TOEN
glamorous-carpet-83516
12/15/2023, 1:21 AMsalmon-refrigerator-32115
12/15/2023, 1:21 AMcurl: (3) URL rejected: Malformed input to a URL function
zsh: no such file or directory: <https://wbd-dcp-mlp-dev.cloud.databricks.com/api/2.0/jobs/runs/get?run_id=33469631515226>
salmon-refrigerator-32115
12/15/2023, 1:23 AMglamorous-carpet-83516
12/15/2023, 1:26 AModdCopy codecurl: (3) URL rejected: Malformed input to a URL function
salmon-refrigerator-32115
12/15/2023, 1:28 AMcurl --netrc --request GET --header "Authorization: Bearer xxx" '<https://wbd-dcp-mlp-dev.cloud.databricks.com/api/2.0/jobs/runs/get?run_id=33469631515226>'
{"job_id":884271762736911,"run_id":33469631515226,"creator_user_name":"<mailto:frank.shen@wbd.com|frank.shen@wbd.com>","number_in_job":33469631515226,"original_attempt_run_id":33469631515226,"state":{"life_cycle_state":"TERMINATED","result_state":"SUCCESS","state_message":"","user_cancelled_or_timedout":false},"task":{"sql_task":{"query":{"query_id":"bea7b086-e308-4317-be79-55d4ac006f73"},"warehouse_id":"e414e987f3e25b85"}},"cluster_spec":{},"start_time":1702602985495,"setup_duration":0,"execution_duration":132000,"cleanup_duration":0,"end_time":1702603118520,"run_duration":133025,"trigger":"ONE_TIME","run_name":"frank_test","run_page_url":"<https://wbd-dcp-mlp-dev.cloud.databricks.com/?o=7539614086893660#job/884271762736911/run/33469631515226>","run_type":"JOB_RUN","attempt_number":0,"format":"SINGLE_TASK"}%
glamorous-carpet-83516
12/15/2023, 1:28 AMglamorous-carpet-83516
12/15/2023, 1:30 AMsalmon-refrigerator-32115
12/15/2023, 1:31 AMglamorous-carpet-83516
12/15/2023, 1:31 AMsalmon-refrigerator-32115
12/15/2023, 1:31 AMglamorous-carpet-83516
12/15/2023, 1:32 AMsalmon-refrigerator-32115
12/15/2023, 1:32 AMglamorous-carpet-83516
12/15/2023, 1:33 AMglamorous-carpet-83516
12/15/2023, 1:36 AMglamorous-carpet-83516
12/15/2023, 1:40 AMglamorous-carpet-83516
12/15/2023, 1:43 AMglamorous-carpet-83516
12/15/2023, 1:44 AMglamorous-carpet-83516
12/15/2023, 1:48 AMsalmon-refrigerator-32115
12/15/2023, 1:49 AMERROR {"asctime": "2023-12-14 17:45:53,424", "name": "flytekit", "levelname": "ERROR", "message": "Agent failed to run the task with error: Failed to create databricks job with error: task.py:189
{'error_code': 'INVALID_PARAMETER_VALUE', 'message': 'The instance profile arn (arn:aws:iam::245085526351:role/dcp-mlp-mlp-instance-profile) is ill-formed.'}"}
salmon-refrigerator-32115
12/15/2023, 1:49 AM@task(
task_config=Databricks(
# this configuration is applied to the spark cluster
spark_conf={
"spark.driver.memory": "600M",
"spark.executor.memory": "600M",
"spark.executor.cores": "1",
"spark.executor.instances": "1",
"spark.driver.cores": "1",
},
executor_path="/databricks/python3/bin/python",
applications_path="dbfs:///FileStore/tables/entrypoint.py",
databricks_conf={
"run_name": "flytekit databricks plugin example",
"new_cluster": {
"spark_version": "13.3.x-scala2.12",
"node_type_id": "m6i.large", # TODO: test m6i.large, i3.xlarge
"num_workers": 3,
"aws_attributes": {
"availability": "ON_DEMAND",
"instance_profile_arn": "arn:aws:iam::245085526351:role/dcp-mlp-mlp-instance-profile",
"ebs_volume_type": "GENERAL_PURPOSE_SSD",
"ebs_volume_count": 1,
"ebs_volume_size": 100,
},
},
"timeout_seconds": 3600,
"max_retries": 1,
},
databricks_instance="<http://wbd-dcp-mlp-dev.cloud.databricks.com|wbd-dcp-mlp-dev.cloud.databricks.com>",
),
limits=Resources(mem="2000M"),
# container_image=image,
container_image="<http://876262748715.dkr.ecr.us-east-1.amazonaws.com/mlforge/flyte:0.2.0-pr-59-9bcc9043|876262748715.dkr.ecr.us-east-1.amazonaws.com/mlforge/flyte:0.2.0-pr-59-9bcc9043>"
)
salmon-refrigerator-32115
12/15/2023, 1:49 AMglamorous-carpet-83516
12/15/2023, 1:50 AMglamorous-carpet-83516
12/15/2023, 1:52 AMglamorous-carpet-83516
12/15/2023, 1:52 AMsalmon-refrigerator-32115
12/15/2023, 1:54 AMWorkflows
Runs
flytekit databricks plugin example run
Output
run failed with error message
Unexpected user error while preparing the cluster for the job. Cause: INVALID_PARAMETER_VALUE: Custom containers is turned off for your deployment. Please contact your workspace administrator to use this feature.
glamorous-carpet-83516
12/15/2023, 1:55 AMglamorous-carpet-83516
12/15/2023, 1:55 AMcurl -X PATCH -n -H "Authorization: Bearer <your-personal-access-token>" \
https://<databricks-instance>/api/2.0/workspace-conf \
-d '{"enableDcs": "true"}'
glamorous-carpet-83516
12/15/2023, 1:55 AMglamorous-carpet-83516
12/15/2023, 1:55 AMsalmon-refrigerator-32115
12/15/2023, 2:02 AMsalmon-refrigerator-32115
12/15/2023, 2:04 AMglamorous-carpet-83516
12/15/2023, 2:05 AMglamorous-carpet-83516
12/15/2023, 2:06 AMglamorous-carpet-83516
12/15/2023, 2:06 AMsalmon-refrigerator-32115
12/15/2023, 2:08 AMglamorous-carpet-83516
12/15/2023, 2:22 AMsalmon-refrigerator-32115
12/15/2023, 2:23 AMsalmon-refrigerator-32115
12/15/2023, 2:23 AMglamorous-carpet-83516
12/15/2023, 11:46 AM