Skip to main content

Python module

cache_params

KVCacheParams

class max.nn.kv_cache.cache_params.KVCacheParams(dtype: max._core.dtype.DType, n_kv_heads: int, head_dim: int, enable_prefix_caching: bool = False, enable_kvcache_swapping_to_host: bool = False, host_kvcache_swap_space_gb: Optional[float] = None, cache_strategy: max.nn.kv_cache.cache_params.KVCacheStrategy = <KVCacheStrategy.CONTINUOUS: 'continuous'>, page_size: Optional[int] = None, n_devices: int = 1)

Parameters:

  • dtype (DType )
  • n_kv_heads (int )
  • head_dim (int )
  • enable_prefix_caching (bool )
  • enable_kvcache_swapping_to_host (bool )
  • host_kvcache_swap_space_gb (float | None )
  • cache_strategy (KVCacheStrategy )
  • page_size (int | None )
  • n_devices (int )

cache_strategy

cache_strategy: KVCacheStrategy = 'continuous'

dtype

dtype: DType

dtype_shorthand

property dtype_shorthand: str

The textual representation in shorthand of the dtype.

enable_kvcache_swapping_to_host

enable_kvcache_swapping_to_host: bool = False

enable_prefix_caching

enable_prefix_caching: bool = False

head_dim

head_dim: int

host_kvcache_swap_space_gb

host_kvcache_swap_space_gb: float | None = None

n_devices

n_devices: int = 1

n_kv_heads

n_kv_heads: int

page_size

page_size: int | None = None

static_cache_shape

property static_cache_shape: tuple[str, str, str, str, str]

KVCacheStrategy

class max.nn.kv_cache.cache_params.KVCacheStrategy(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

CONTINUOUS

CONTINUOUS = 'continuous'

MODEL_DEFAULT

MODEL_DEFAULT = 'model_default'

PAGED

PAGED = 'paged'

kernel_substring()

kernel_substring()

Returns the common substring that we include in the kernel name for this caching strategy.

Return type:

str

uses_opaque()

uses_opaque()

Return type:

bool