Skip to main content
firectl reinforcement-fine-tuning-job create [flags]

Examples

firectl reinforcement-fine-tuning-job create \
	--base-model llama-v3-8b-instruct \
	--dataset sample-dataset \
	--epochs 5 \
	--output-model name-of-the-trained-model \
	--evaluator accounts/my-account/evaluators/abc123

# Create from source job:
firectl reinforcement-fine-tuning-job create \
	--source-job my-previous-job \
	--output-model new-model

Flags

      --base-model string                         The base model for the reinforcement fine-tuning job. Only one of base-model or warm-start-from should be specified.
      --dataset string                            The dataset for the reinforcement fine-tuning job. (Required)
      --output-model string                       The output model for the reinforcement fine-tuning job.
      --job-id string                             The ID of the reinforcement fine-tuning job. If not set, it will be autogenerated.
      --warm-start-from string                    The model to warm start from. If set, base-model must not be set.
      --source-job string                         The source reinforcement fine-tuning job to copy configuration from. If other flags are set, they will override the source job's configuration.
      --evaluator string                          The evaluator resource name to use for the reinforcement fine-tuning job. (Required)
      --mcp-server string                         The MCP server resource name to use for the reinforcement fine-tuning job. (Optional)
      --epochs int32                              The number of epochs for the reinforcement fine-tuning job. (default 1)
      --learning-rate float32                     The learning rate for the reinforcement fine-tuning job. (default 0.0001)
      --max-context-length int32                  Maximum token length for sequences within each training batch. Shorter sequences are concatenated; longer sequences are truncated.
      --batch-size int32                          The batch size measured in tokens. Maximum number of tokens packed into each training batch/step. A single sequence will not be split across batches.
      --batch-size-samples int32                  Number of samples per gradient update. If set to k, gradients update after every k samples. By default (0), gradients update based on batch-size (tokens).
      --gradient-accumulation-steps int32         The number of batches to accumulate gradients before updating the model parameters. The effective batch size will be batch-size multiplied by this value. (default 1)
      --learning-rate-warmup-steps int32          The number of learning rate warmup steps for the reinforcement fine-tuning job.
      --lora-rank int32                           The rank of the LoRA layers for the reinforcement fine-tuning job. (default 8)
      --optimizer-weight-decay float32            Weight decay (L2 regularization) for the optimizer. Default in trainer is 0.01.
                                                  
      --wandb-api-key string                      [WANDB_API_KEY] WandB API Key. (Required if any WandB flag is set)
      --wandb-project string                      [WANDB_PROJECT] WandB Project. (Required if any WandB flag is set)
      --wandb-entity string                       [WANDB_ENTITY] WandB Entity. (Required if any WandB flag is set)
      --wandb                                     Enable WandB
                                                  
                                                  
      --aws-credentials-secret string             [AWS_CREDENTIALS_SECRET] AWS credentials secret (mutually exclusive with --aws-iam-role)
      --aws-iam-role string                       [AWS_IAM_ROLE_ARN] AWS IAM role ARN (mutually exclusive with --aws-credentials-secret)
                                                  
                                                  
      --azure-credentials-secret string           [AZURE_CREDENTIALS_SECRET] Azure credentials secret
      --azure-managed-identity-client-id string   [AZURE_MANAGED_IDENTITY_CLIENT_ID] Azure managed identity client ID for Workload Identity Federation
      --azure-tenant-id string                    [AZURE_TENANT_ID] Azure tenant ID (required with --azure-managed-identity-client-id)
                                                  
      --temperature float32                       The randomness of the model's word or token selection during text generation.
      --top-p float32                             Top-p sampling, selecting the smallest set of candidate words whose cumulative probability exceeds the top-p.
      --response-candidates-count int32           The number of response candidates to generate per input.
      --max-output-tokens int32                   The maximum number of tokens to generate in the response.
      --top-k int32                               Top-k sampling parameter, limits the token selection to the top k tokens.
      --extra-body string                         Additional parameters for the inference request as a JSON string. For example: '{"stop": ["\n"]}'
      --quiet                                     If set, only errors will be printed.
      --max-concurrent-rollouts int32             Maximum number of concurrent rollouts during the RFT job. If not set, defaults to the value set in @evaluation_test header.
      --max-concurrent-evaluations int32          Maximum number of concurrent evaluations during the RFT job. If not set, defaults to the value set in @evaluation_test header.
      --rl-loss-method string                     RL loss method for underlying trainers. One of {grpo,dapo,gspo-token}.
      --rl-kl-beta float32                        Override KL beta for GRPO-like methods. Must be >= 0.
      --chunk-size int32                          The minimum chunk size to split the dataset before the RL flow. Set to -1 to disable chunking. (default 200)
      --max-inference-replica-count int32         Maximum number of replicas used for batch inference. If not specified, defaults to 1.
      --dry-run                                   Print the request proto without running it.
  -o, --output Output                             Set the output format to "text", "json", or "flag". (default text)
  -h, --help                                      help for create

Global flags

  -a, --account-id string   The Fireworks account ID. If not specified, reads account_id from ~/.fireworks/auth.ini.
      --api-key string      An API key used to authenticate with Fireworks.
  -p, --profile string      fireworks auth and settings profile to use.