Config

`helical.models.hyena_dna.HyenaDNAConfig`

Configuration class for Hyena DNA model.

Parameters:

Name	Type	Description	Default
`model_name`	`Literal['hyenadna-tiny-1k-seqlen', 'hyenadna-tiny-1k-seqlen-d256']`	The name of the model.	`"hyenadna-tiny-1k-seqlen"`
`batch_size`	`int`	The batch size to use for all tasks.	`5`
`n_layer`	`int`	The number of layers in the model.	`2`
`vocab_size`	`int`	The size of the vocabulary.	`12`
`resid_dropout`	`float`	The dropout rate for residual connections.	`0.0`
`embed_dropout`	`float`	The dropout rate for embedding layer.	`0.1`
`fused_mlp`	`bool`	Whether to use fused MLP.	`False`
`fused_dropout_add_ln`	`bool`	Whether to use fused dropout and layer normalization.	`True`
`residual_in_fp32`	`bool`	Whether to use FP32 for residual connections.	`True`
`checkpoint_mixer`	`bool`	Whether to use checkpointing for mixer layers.	`False`
`checkpoint_mlp`	`bool`	Whether to use checkpointing for MLP layers.	`False`
`pad_vocab_size_multiple`	`int`	The multiple to pad the vocabulary size.	`8`
`return_hidden_state`	`bool`	Whether to return the hidden state.	`True`
`device`	`Literal['cpu', 'cuda']`	The device to use.	`"cpu"`
`layer`	`dict`	Dictionary containing layer-specific parameters.	`{"_name_": "hyena", "emb_dim": 5, "filter_order": 64, "local_order": 3, "l_max": 1026, "modulate": True, "w": 10, "lr": 6e-4, "wd": 0.0, "lr_pos_emb": 0.0}`

Attributes:

Name	Type	Description
`model_map`	`dict`	A dictionary mapping model names to their corresponding configuration parameters.
`config`	`dict`	A dictionary containing the configuration parameters for the Hyena DNA model.

Raises:

Type	Description
`ValueError`	If the specified model name is not found in the available models.

Source code in helical/models/hyena_dna/hyena_dna_config.py

class HyenaDNAConfig:
    """
    Configuration class for Hyena DNA model.

    Parameters
    ----------
    model_name : Literal["hyenadna-tiny-1k-seqlen", "hyenadna-tiny-1k-seqlen-d256"], optional, default="hyenadna-tiny-1k-seqlen"
        The name of the model.
    batch_size : int, optional, default=5
        The batch size to use for all tasks.
    n_layer : int, optional, default=2
        The number of layers in the model.
    vocab_size : int, optional, default=12
        The size of the vocabulary.
    resid_dropout : float, optional, default=0.0
        The dropout rate for residual connections.
    embed_dropout : float, optional, default=0.1
        The dropout rate for embedding layer.
    fused_mlp : bool, optional, default=False
        Whether to use fused MLP.
    fused_dropout_add_ln : bool, optional, default=True
        Whether to use fused dropout and layer normalization.
    residual_in_fp32 : bool, optional, default=True
        Whether to use FP32 for residual connections.
    checkpoint_mixer : bool, optional, default=False
        Whether to use checkpointing for mixer layers.
    checkpoint_mlp : bool, optional, default=False
        Whether to use checkpointing for MLP layers.
    pad_vocab_size_multiple : int, optional, default=8
        The multiple to pad the vocabulary size.
    return_hidden_state : bool, optional, default=True
        Whether to return the hidden state.
    device : Literal["cpu", "cuda"], optional, default="cpu"
        The device to use.
    layer : dict, optional, default={"_name_": "hyena", "emb_dim": 5, "filter_order": 64, "local_order": 3, "l_max": 1026, "modulate": True, "w": 10, "lr": 6e-4, "wd": 0.0, "lr_pos_emb": 0.0}
        Dictionary containing layer-specific parameters.

    Attributes
    ----------
    model_map : dict
        A dictionary mapping model names to their corresponding configuration parameters.
    config : dict
        A dictionary containing the configuration parameters for the Hyena DNA model.

    Raises
    ------
    ValueError
        If the specified model name is not found in the available models.

    """

    def __init__(
        self,
        model_name: Literal[
            "hyenadna-tiny-1k-seqlen", "hyenadna-tiny-1k-seqlen-d256"
        ] = "hyenadna-tiny-1k-seqlen",
        batch_size: int = 5,
        n_layer: int = 2,
        vocab_size: int = 12,
        resid_dropout: float = 0.0,
        embed_dropout: float = 0.1,
        fused_mlp: bool = False,
        fused_dropout_add_ln: bool = True,
        residual_in_fp32: bool = True,
        checkpoint_mixer: bool = False,
        checkpoint_mlp: bool = False,
        pad_vocab_size_multiple: int = 8,
        return_hidden_state: bool = True,
        device: Literal["cpu", "cuda"] = "cpu",
        layer: dict = {
            "_name_": "hyena",
            "emb_dim": 5,
            "filter_order": 64,
            "local_order": 3,
            "l_max": 1026,
            "modulate": True,
            "w": 10,
            "lr": 6e-4,
            "wd": 0.0,
            "lr_pos_emb": 0.0,
        },
    ):

        # model specific parameters
        self.model_map = {
            "hyenadna-tiny-1k-seqlen": {
                "d_model": 128,
                "d_inner": 512,
                "max_length": 1024,  # for max_length see https://github.com/HazyResearch/hyena-dna/blob/main/huggingface.py
            },
            "hyenadna-tiny-1k-seqlen-d256": {
                "d_model": 256,
                "d_inner": 1024,
                "max_length": 1024,
            },
        }

        if model_name not in self.model_map:
            raise ValueError(
                f"Model name {model_name} not found in available models: {self.model_map.keys()}"
            )

        list_of_files_to_download = [f"hyena_dna/{model_name}.ckpt"]

        self.config = {
            "model_name": model_name,
            "model_path": Path(CACHE_DIR_HELICAL, f"hyena_dna/{model_name}.ckpt"),
            "list_of_files_to_download": list_of_files_to_download,
            "batch_size": batch_size,
            "d_model": self.model_map[model_name]["d_model"],
            "n_layer": n_layer,
            "d_inner": self.model_map[model_name]["d_inner"],
            "vocab_size": vocab_size,
            "resid_dropout": resid_dropout,
            "embed_dropout": embed_dropout,
            "fused_mlp": fused_mlp,
            "fused_dropout_add_ln": fused_dropout_add_ln,
            "residual_in_fp32": residual_in_fp32,
            "checkpoint_mixer": checkpoint_mixer,
            "checkpoint_mlp": checkpoint_mlp,
            "pad_vocab_size_multiple": pad_vocab_size_multiple,
            "return_hidden_state": return_hidden_state,
            "device": device,
            "layer": layer,
            "max_length": self.model_map[model_name]["max_length"],
        }