Leonardo Almeida
05/06/2022, 8:10 PMimport typing
import pandas as pd
import numpy as np
from flytekit import task, workflow
@task
def generate_normal_df(n:int, mean: float, sigma: float) -> pd.DataFrame:
return pd.DataFrame({"numbers": np.random.normal(mean, sigma,size=n)})
@task
def compute_stats(df: pd.DataFrame) -> typing.Tuple[float, float]:
return float(df["numbers"].mean()), float(df["numbers"].std())
@workflow
def wf(n: int = 200, mean: float = 0.0, sigma: float = 1.0) -> typing.Tuple[float, float]:
return compute_stats(df=generate_normal_df(n=n, mean=mean, sigma=sigma))
When I run the code using the local cluster using flytectl demo start (k3s) it works. But in production environment using EKS and ALB does not work.
pyflyte run --remote example.py wf --n 500 --mean 42 --sigma 2 SIGINT(2) ↵ 10055 17:06:22
{"asctime": "2022-05-06 17:06:31,287", "name": "flytekit.cli", "levelname": "ERROR", "message": "Non-auth RPC error <_InactiveRpcError of RPC that terminated with:\n\tstatus = StatusCode.UNAVAILABLE\n\tdetails = \"failed to connect to all addresses\"\n\tdebug_error_string = \"{\"created\":\"@1651867591.287595583\",\"description\":\"Failed to pick subchannel\",\"file\":\"src/core/ext/filters/client_channel/client_channel.cc\",\"file_line\":3128,\"referenced_errors\":[{\"created\":\"@1651867591.287594125\",\"description\":\"failed to connect to all addresses\",\"file\":\"src/core/lib/transport/error_utils.cc\",\"file_line\":163,\"grpc_status\":14}]}\"\n>, sleeping 200ms and retrying"}
{"asctime": "2022-05-06 17:06:32,274", "name": "flytekit.cli", "levelname": "ERROR", "message": "Non-auth RPC error <_InactiveRpcError of RPC that terminated with:\n\tstatus = StatusCode.UNAVAILABLE\n\tdetails = \"failed to connect to all addresses\"\n\tdebug_error_string = \"{\"created\":\"@1651867592.274550785\",\"description\":\"Failed to pick subchannel\",\"file\":\"src/core/ext/filters/client_channel/client_channel.cc\",\"file_line\":3128,\"referenced_errors\":[{\"created\":\"@1651867592.274550285\",\"description\":\"failed to connect to all addresses\",\"file\":\"src/core/lib/transport/error_utils.cc\",\"file_line\":163,\"grpc_status\":14}]}\"\n>, sleeping 400ms and retrying"}
Traceback (most recent call last):
File "/usr/local/bin/pyflyte", line 8, in <module>
sys.exit(main())
File "/home/lalmeida/.local/lib/python3.8/site-packages/click/core.py", line 1128, in __call__
return self.main(*args, **kwargs)
File "/home/lalmeida/.local/lib/python3.8/site-packages/click/core.py", line 1053, in main
rv = self.invoke(ctx)
File "/home/lalmeida/.local/lib/python3.8/site-packages/click/core.py", line 1659, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/lalmeida/.local/lib/python3.8/site-packages/click/core.py", line 1659, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/lalmeida/.local/lib/python3.8/site-packages/click/core.py", line 1659, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/lalmeida/.local/lib/python3.8/site-packages/click/core.py", line 1395, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/lalmeida/.local/lib/python3.8/site-packages/click/core.py", line 754, in invoke
return __callback(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/flytekit/clis/sdk_in_container/run.py", line 471, in _run
wf = remote.register_script(
File "/usr/local/lib/python3.8/dist-packages/flytekit/remote/remote.py", line 536, in register_script
upload_location, md5_bytes = fast_register_single_script(
File "/usr/local/lib/python3.8/dist-packages/flytekit/tools/script_mode.py", line 117, in fast_register_single_script
upload_location = create_upload_location_fn(content_md5=md5)
File "/usr/local/lib/python3.8/dist-packages/flytekit/clients/friendly.py", line 998, in get_upload_signed_url
return super(SynchronousFlyteClient, self).create_upload_location(
File "/usr/local/lib/python3.8/dist-packages/flytekit/clients/raw.py", line 40, in handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/flytekit/clients/raw.py", line 834, in create_upload_location
return self._dataproxy_stub.CreateUploadLocation(create_upload_location_request, metadata=self._metadata)
File "/usr/local/lib/python3.8/dist-packages/grpc/_channel.py", line 946, in __call__
return _end_unary_response_blocking(state, call, False, None)
File "/usr/local/lib/python3.8/dist-packages/grpc/_channel.py", line 849, in _end_unary_response_blocking
raise _InactiveRpcError(state)
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses"
debug_error_string = "{"created":"@1651867592.675517306","description":"Failed to pick subchannel","file":"src/core/ext/filters/client_channel/client_channel.cc","file_line":3128,"referenced_errors":[{"created":"@1651867592.675516568","description":"failed to connect to all addresses","file":"src/core/lib/transport/error_utils.cc","file_line":163,"grpc_status":14}]}"
Do you have any ideas? I'm using helm with values-eks.yamlYee
~/.flyte/config.yaml
and ~/.flyte/config
file if they exist, and also env | grep -i flyte
Leonardo Almeida
05/06/2022, 8:43 PMadmin:
# For GRPC endpoints you might want to use dns:///flyte.myexample.com
endpoint: dns:///internal-k8s-flyte-a58b2df7fa-1695464265.us-east-1.elb.amazonaws.com
authType: Pkce
insecure: true # only required when using insecure ingress. Secure ingress may cause an unavailable desc error to true option.
Yee
Leonardo Almeida
05/06/2022, 8:44 PMYee
Leonardo Almeida
05/06/2022, 8:47 PMkubectl get ingress -n flyte 10067 17:46:32
NAME CLASS HOSTS ADDRESS PORTS AGE
flyte-core <none> * <http://internal-k8s-flyte-a58b2df7fa-1695464265.us-east-1.elb.amazonaws.com|internal-k8s-flyte-a58b2df7fa-1695464265.us-east-1.elb.amazonaws.com> 80 4h49m
flyte-core-grpc <none> * <http://internal-k8s-flyte-a58b2df7fa-1695464265.us-east-1.elb.amazonaws.com|internal-k8s-flyte-a58b2df7fa-1695464265.us-east-1.elb.amazonaws.com> 80 4h49m
Yee
Leonardo Almeida
05/09/2022, 9:22 PMgrpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNKNOWN
details = "failed to create a signed url. Error: WebIdentityErr: failed to retrieve credentials
caused by: ValidationError: Request ARN is invalid
status code: 400, request id: 6b14cd70-5a90-43a7-a8f2-e03997c937db"
debug_error_string = "{"created":"@1652131218.092115614","description":"Error received from peer ipv4:10.0.28.93:443","file":"src/core/lib/surface/call.cc","file_line":903,"grpc_message":"failed to create a signed url. Error: WebIdentityErr: failed to retrieve credentials\ncaused by: ValidationError: Request ARN is invalid\n\tstatus code: 400, request id: 6b14cd70-5a90-43a7-a8f2-e03997c937db","grpc_status":2}"
>
Yee
Leonardo Almeida
05/10/2022, 1:28 PM