Is it possible to have a runtime argument specify number of Flyte #flyte-support

Is it possible to have a runtime argument specify ...

worried-airplane-87065

01/09/2025, 7:03 PM

Is it possible to have a runtime argument specify number of GPUs? I've been trying something like this but I'm running into all sorts of errors

Copy code

from dataclasses import dataclass

import torch
from flytekit import Resources, task, workflow


@dataclass
class RuntimeGPU:
    available: bool
    device_count: int


# Need to set limits on task.
# <https://github.com/flyteorg/flytekit/pull/2151>
@task(
    requests=Resources(cpu="4", mem="32Gi", gpu="2"),
    limits=Resources(cpu="4", mem="32Gi", gpu="2"),
)
def cuda_available() -> RuntimeGPU:
    return RuntimeGPU(
        available=torch.cuda.is_available(), device_count=torch.cuda.device_count()
    )


@task
def compute_resources(num_gpus: int) -> Resources:
    return Resources(cpu="4", mem="32Gi", gpu=str(num_gpus))


@workflow
def runtime_gpu(num_gpus: int) -> RuntimeGPU:
    resources = compute_resources(num_gpus)

    gpu_info = cuda_available().with_overrides(
        requests=resources,
        limits=resources,
    )
    return gpu_info

freezing-airport-6809

01/09/2025, 7:40 PM

using Dynamic you can

freezing-airport-6809

01/09/2025, 7:41 PM

@dynamic

worried-airplane-87065

01/09/2025, 7:49 PM

So just changing

Copy code

# change to dynamic
@dynamic 
def runtime_gpu(num_gpus: int) -> RuntimeGPU:
    resources = compute_resources(num_gpus)

    gpu_info = cuda_available().with_overrides(
        requests=resources,
        limits=resources,
    )
    return gpu_info

worried-airplane-87065

01/09/2025, 7:50 PM

I also read this https://github.com/flyteorg/flyte/issues/475 and it seems like you can't do it at runtime?

worried-airplane-87065

01/09/2025, 7:53 PM

For some reason my task resource request becomes empty

Copy code

resources:{2 items
limits:[0 items
]
requests:[0 items
]
}

worried-airplane-87065

01/09/2025, 8:02 PM

Ahhh ok I got it working. It seems like the Flyte UI (resources isn't an accurate reflection of the compute that the task ends up using). The task output correctly shows 1 gpu when I run it with --gpu="1". Posting here in case anyone needs this. Workflow

Copy code

from dataclasses import dataclass

import torch
from flytekit import Resources, dynamic, task, workflow


@dataclass
class RuntimeGPU:
    available: bool
    device_count: int


# Need to set limits on task.
# <https://github.com/flyteorg/flytekit/pull/2151>
@task(
    requests=Resources(cpu="4", mem="32Gi", gpu="2"),
    limits=Resources(cpu="4", mem="32Gi", gpu="2"),
)
def cuda_available() -> RuntimeGPU:
    return RuntimeGPU(
        available=torch.cuda.is_available(), device_count=torch.cuda.device_count()
    )


@dynamic
def dynamic_gpu_wf(num_gpus: str) -> RuntimeGPU:
    gpu_info = cuda_available().with_overrides(
        requests=Resources(cpu="4", mem="32Gi", gpu=num_gpus),
        limits=Resources(cpu="4", mem="32Gi", gpu=num_gpus),
    )
    return gpu_info


@workflow
def runtime_gpu(num_gpus: str) -> RuntimeGPU:
    return dynamic_gpu_wf(num_gpus=num_gpus)

Open in Slack

Previous Next