<#2924 [BUG] Spurious cache misses on dicts that a...
# flyte-github
a
#2924 [BUG] Spurious cache misses on dicts that are nested (_calculate_cache_key is nondeterministic) Issue created by jdanbrown Describe the bug • Spurious cache misses caused by nondeterministic dict key ordering when dicts are contained deeply within a task's inputs, i.e. not just a single dict at the top level • Follow up to flyteorg/flytekit#1126, where this was fixed for top-level dicts but it appears to still be broken for dicts nested more deeply • Below is a repro showing the behavior using
_calculate_cache_key
directly Expected behavior • Caching and cache keys should handle the ordering of keys in a dict deterministically, otherwise any dict in a task's input will make cache behavior surprising, unreliable, and effectively useless Additional context to reproduce
Copy code
import flytekit
flytekit.__version__
# 1.1.2

from typing import Dict

from flytekit.core.context_manager import FlyteContextManager
from flytekit.core.local_cache import LocalTaskCache, _calculate_cache_key
from flytekit.core.type_engine import TypeEngine
from flytekit.models.literals import LiteralMap
import pandas as pd

def calculate_cache_key_multiple_times(x, n=1000):
    return pd.Series([
        _calculate_cache_key(
            task_name='task_name',
            cache_version='cache_version',
            input_literal_map=LiteralMap(literals={
                'd': TypeEngine.to_literal(
                    ctx=FlyteContextManager.current_context(),
                    expected=TypeEngine.to_literal_type(Dict),
                    python_type=Dict,
                    python_val=x,
                ),
            }),
        )
        for _ in range(1000)
    ]).value_counts()

# Works: Top-level dict
calculate_cache_key_multiple_times(dict(a=1, b=2, c=3))
# task_name-cache_version-befdecbedf3326069d65e9eb3fd19627    1000
# dtype: int64

# Fails: Nested dict
calculate_cache_key_multiple_times(dict(x=dict(a=1, b=2, c=3)))
# task_name-cache_version-2ab7d5574323f72f6ea65071764f62db    194
# task_name-cache_version-4411dff72f6b8639dc8bfacf09e22854    169
# task_name-cache_version-7e45e245aaa6fd107f0aa3fc6285c2bc    166
# task_name-cache_version-8fadb0aeae75179ecfe0f3f4dd23d59d    163
# task_name-cache_version-955fadbcc11cb7d2a5e68d45b1a76cad    160
# task_name-cache_version-e409a264a0d77817321d50bd841d02d1    148
# dtype: int64

# Fails: Even more deeply nested dict
calculate_cache_key_multiple_times(dict(xs=[dict(a=1, b=2, c=3), dict(y=dict(a=10, b=20, c=30))]))
# task_name-cache_version-fd78629a9c9af36ffb0d91035b7bc488    44
# task_name-cache_version-b424b801688a32c4d9db2a1c1a9cfa4b    36
# task_name-cache_version-a0750b423aca01fdd4e8a7083f3eff45    35
# task_name-cache_version-263b68860a07f41970805195c835577b    34
# task_name-cache_version-f2e4f476c7e9d44dd3448c0f56d42739    32
# task_name-cache_version-84eddae03ae5e9b88ce53d643da5646b    32
# task_name-cache_version-9968553efeb91852777a6e1ef4819356    31
# task_name-cache_version-0e7485ff89dc7281199f9f0cf4998de3    31
# task_name-cache_version-83b1d88ac36806ad6bb92e65ba4e1023    31
# task_name-cache_version-a03681104f5b636da8124590d5a04a82    30
# task_name-cache_version-55cdb8ecccf905ff4884b1f0ec6cb61a    30
# task_name-cache_version-fff3a0d28d19739819ed2f9ba54c7937    30
# task_name-cache_version-a2126baf4150ca4c5a92c53c5981af03    29
# task_name-cache_version-f606ef6e85467e07a751927633a6f89a    29
# task_name-cache_version-9ad94b361429b65bbad35ea59d325c33    29
# task_name-cache_version-34e0a144346168ec12bf530b1499cf81    29
# task_name-cache_version-d3316e34044105b96a6b3e403495d285    28
# task_name-cache_version-302a73a2dd0c9fdd017dda639e0a7a46    28
# task_name-cache_version-08227914b2bbb70bec7d9d72d926e06f    28
# task_name-cache_version-aef63b179a416e4b66eb2c8c04f20dd0    28
# task_name-cache_version-173db4c030da3826d6b248e8168cc31b    28
# task_name-cache_version-3f528921b47cc3ea7661c391e78d538e    28
# task_name-cache_version-73b1f6cced357ce196ac0513ad75b475    27
# task_name-cache_version-944df7ed103cd38669d91877ba95279e    27
# task_name-cache_version-8935b61725ad9aa26f105f6697001e87    27
# task_name-cache_version-2019c860c84f004c7f05b681d02d79a7    27
# task_name-cache_version-42d535337971811dcd2c09bd5867d825    26
# task_name-cache_version-fea33027cbeb65acb01024f3774f4f58    26
# task_name-cache_version-17358a05dbfb21918a183165e5008f89    24
# task_name-cache_version-1d127910b89884a198766f5f0f7c44ea    23
# task_name-cache_version-41293353347bd6079797c3c3a8d63355    22
# task_name-cache_version-c739559c91949c86216a2ec9c770331f    21
# task_name-cache_version-d3a69868098fd85edc0c7fa8b6f88dea    21
# task_name-cache_version-17421c3a7c664e727b48c85595d6d6c0    19
# task_name-cache_version-b5c3b63aefdba368908b4078226a7c5b    17
# task_name-cache_version-4b87197f69490c46a1a23767e8c4cb2f    13
# dtype: int64
Screenshots No response Are you sure this issue hasn't been raised already? ☑︎ Yes Have you read the Code of Conduct? ☑︎ Yes flyteorg/flyte