Create Reinforcement Fine-tuning Step

curl --request POST \
  --url https://api.fireworks.ai/v1/accounts/{account_id}/rlorTrainerJobs \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "displayName": "<string>",
  "dataset": "<string>",
  "evaluationDataset": "<string>",
  "evalAutoCarveout": true,
  "trainingConfig": {
    "outputModel": "<string>",
    "baseModel": "<string>",
    "warmStartFrom": "<string>",
    "jinjaTemplate": "<string>",
    "learningRate": 123,
    "maxContextLength": 123,
    "loraRank": 123,
    "region": "REGION_UNSPECIFIED",
    "epochs": 123,
    "batchSize": 123,
    "gradientAccumulationSteps": 123,
    "learningRateWarmupSteps": 123,
    "batchSizeSamples": 123,
    "optimizerWeightDecay": 123
  },
  "rewardWeights": [
    "<string>"
  ],
  "wandbConfig": {
    "enabled": true,
    "apiKey": "<string>",
    "project": "<string>",
    "entity": "<string>",
    "runId": "<string>"
  },
  "awsS3Config": {
    "credentialsSecret": "<string>",
    "iamRoleArn": "<string>"
  },
  "azureBlobStorageConfig": {
    "credentialsSecret": "<string>",
    "managedIdentityClientId": "<string>",
    "tenantId": "<string>"
  },
  "keepAlive": true,
  "rolloutDeploymentName": "<string>",
  "lossConfig": {
    "method": "METHOD_UNSPECIFIED",
    "klBeta": 123
  },
  "nodeCount": 123,
  "serviceMode": true,
  "hotLoadDeploymentId": "<string>",
  "usePurpose": "<string>",
  "forwardOnly": true
}
'

{
  "name": "<string>",
  "displayName": "<string>",
  "createTime": "2023-11-07T05:31:56Z",
  "completedTime": "2023-11-07T05:31:56Z",
  "dataset": "<string>",
  "evaluationDataset": "<string>",
  "evalAutoCarveout": true,
  "state": "JOB_STATE_UNSPECIFIED",
  "status": {
    "code": "OK",
    "message": "<string>"
  },
  "createdBy": "<string>",
  "trainingConfig": {
    "outputModel": "<string>",
    "baseModel": "<string>",
    "warmStartFrom": "<string>",
    "jinjaTemplate": "<string>",
    "learningRate": 123,
    "maxContextLength": 123,
    "loraRank": 123,
    "region": "REGION_UNSPECIFIED",
    "epochs": 123,
    "batchSize": 123,
    "gradientAccumulationSteps": 123,
    "learningRateWarmupSteps": 123,
    "batchSizeSamples": 123,
    "optimizerWeightDecay": 123
  },
  "rewardWeights": [
    "<string>"
  ],
  "wandbConfig": {
    "enabled": true,
    "apiKey": "<string>",
    "project": "<string>",
    "entity": "<string>",
    "runId": "<string>",
    "url": "<string>"
  },
  "awsS3Config": {
    "credentialsSecret": "<string>",
    "iamRoleArn": "<string>"
  },
  "azureBlobStorageConfig": {
    "credentialsSecret": "<string>",
    "managedIdentityClientId": "<string>",
    "tenantId": "<string>"
  },
  "jobProgress": {
    "percent": 123,
    "epoch": 123,
    "totalInputRequests": 123,
    "totalProcessedRequests": 123,
    "successfullyProcessedRequests": 123,
    "failedRequests": 123,
    "outputRows": 123,
    "inputTokens": 123,
    "outputTokens": 123,
    "cachedInputTokenCount": 123
  },
  "keepAlive": true,
  "rolloutDeploymentName": "<string>",
  "lossConfig": {
    "method": "METHOD_UNSPECIFIED",
    "klBeta": 123
  },
  "nodeCount": 123,
  "acceleratorSeconds": {},
  "serviceMode": true,
  "directRouteHandle": "<string>",
  "hotLoadDeploymentId": "<string>",
  "usePurpose": "<string>",
  "forwardOnly": true
}

POST

accounts

{account_id}

rlorTrainerJobs

Create Reinforcement Fine-tuning Step

curl --request POST \
  --url https://api.fireworks.ai/v1/accounts/{account_id}/rlorTrainerJobs \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "displayName": "<string>",
  "dataset": "<string>",
  "evaluationDataset": "<string>",
  "evalAutoCarveout": true,
  "trainingConfig": {
    "outputModel": "<string>",
    "baseModel": "<string>",
    "warmStartFrom": "<string>",
    "jinjaTemplate": "<string>",
    "learningRate": 123,
    "maxContextLength": 123,
    "loraRank": 123,
    "region": "REGION_UNSPECIFIED",
    "epochs": 123,
    "batchSize": 123,
    "gradientAccumulationSteps": 123,
    "learningRateWarmupSteps": 123,
    "batchSizeSamples": 123,
    "optimizerWeightDecay": 123
  },
  "rewardWeights": [
    "<string>"
  ],
  "wandbConfig": {
    "enabled": true,
    "apiKey": "<string>",
    "project": "<string>",
    "entity": "<string>",
    "runId": "<string>"
  },
  "awsS3Config": {
    "credentialsSecret": "<string>",
    "iamRoleArn": "<string>"
  },
  "azureBlobStorageConfig": {
    "credentialsSecret": "<string>",
    "managedIdentityClientId": "<string>",
    "tenantId": "<string>"
  },
  "keepAlive": true,
  "rolloutDeploymentName": "<string>",
  "lossConfig": {
    "method": "METHOD_UNSPECIFIED",
    "klBeta": 123
  },
  "nodeCount": 123,
  "serviceMode": true,
  "hotLoadDeploymentId": "<string>",
  "usePurpose": "<string>",
  "forwardOnly": true
}
'

{
  "name": "<string>",
  "displayName": "<string>",
  "createTime": "2023-11-07T05:31:56Z",
  "completedTime": "2023-11-07T05:31:56Z",
  "dataset": "<string>",
  "evaluationDataset": "<string>",
  "evalAutoCarveout": true,
  "state": "JOB_STATE_UNSPECIFIED",
  "status": {
    "code": "OK",
    "message": "<string>"
  },
  "createdBy": "<string>",
  "trainingConfig": {
    "outputModel": "<string>",
    "baseModel": "<string>",
    "warmStartFrom": "<string>",
    "jinjaTemplate": "<string>",
    "learningRate": 123,
    "maxContextLength": 123,
    "loraRank": 123,
    "region": "REGION_UNSPECIFIED",
    "epochs": 123,
    "batchSize": 123,
    "gradientAccumulationSteps": 123,
    "learningRateWarmupSteps": 123,
    "batchSizeSamples": 123,
    "optimizerWeightDecay": 123
  },
  "rewardWeights": [
    "<string>"
  ],
  "wandbConfig": {
    "enabled": true,
    "apiKey": "<string>",
    "project": "<string>",
    "entity": "<string>",
    "runId": "<string>",
    "url": "<string>"
  },
  "awsS3Config": {
    "credentialsSecret": "<string>",
    "iamRoleArn": "<string>"
  },
  "azureBlobStorageConfig": {
    "credentialsSecret": "<string>",
    "managedIdentityClientId": "<string>",
    "tenantId": "<string>"
  },
  "jobProgress": {
    "percent": 123,
    "epoch": 123,
    "totalInputRequests": 123,
    "totalProcessedRequests": 123,
    "successfullyProcessedRequests": 123,
    "failedRequests": 123,
    "outputRows": 123,
    "inputTokens": 123,
    "outputTokens": 123,
    "cachedInputTokenCount": 123
  },
  "keepAlive": true,
  "rolloutDeploymentName": "<string>",
  "lossConfig": {
    "method": "METHOD_UNSPECIFIED",
    "klBeta": 123
  },
  "nodeCount": 123,
  "acceleratorSeconds": {},
  "serviceMode": true,
  "directRouteHandle": "<string>",
  "hotLoadDeploymentId": "<string>",
  "usePurpose": "<string>",
  "forwardOnly": true
}

Authorizations

Authorization

string

header

required

Bearer authentication using your Fireworks API key. Format: Bearer <API_KEY>

Path Parameters

account_id

string

required

The Account Id

Query Parameters

rlorTrainerJobId

string

ID of the RLOR trainer job, a random UUID will be generated if not specified.

Body

application/json

displayName

string

dataset

string

The name of the dataset used for training.

evaluationDataset

string

The name of a separate dataset to use for evaluation.

evalAutoCarveout

boolean

Whether to auto-carve the dataset for eval.

trainingConfig

BaseTrainingConfig contains common configuration fields shared across different training job types. Next ID: 22 · object

Common training configurations.

Show child attributes

rewardWeights

string[]

A list of reward metrics to use for training in format of "<reward_name>=".

wandbConfig

object

The Weights & Biases team/user account for logging training progress.

Show child attributes

awsS3Config

object

The AWS configuration for S3 dataset access.

Show child attributes

azureBlobStorageConfig

object

The Azure configuration for Azure Blob Storage dataset access.

Show child attributes

keepAlive

boolean

rolloutDeploymentName

string

Rollout deployment name associated with this RLOR trainer job. This is optional. If not set, trainer will not trigger weight sync to rollout engine.

lossConfig

object

Reinforcement learning loss method + hyperparameters for the underlying trainer.

Show child attributes

nodeCount

integer<int32>

The number of nodes to use for the fine-tuning job. If not specified, the default is 1.

serviceMode

boolean

Service-mode RLOR trainers currently support full-parameter tuning only. When enabled, trainingConfig.loraRank must be 0 (loraRank>0 is rejected).

hotLoadDeploymentId

string

The deployment ID used for hot loading. When set, checkpoints are saved to this deployment's hot load bucket, enabling weight swaps on inference. Only valid for service-mode or keep-alive jobs.

usePurpose

string

Use dedicated resources for the job. The only supported value currently is "pilot". Defaults to empty.

forwardOnly

boolean

When true, run the trainer in forward-only mode (no backward/optimizer). Used for reference models in GRPO that only need forward passes.

Response

200 - application/json

A successful response.

name

string

displayName

string

createTime

string<date-time>

completedTime

string<date-time>

dataset

string

The name of the dataset used for training.

evaluationDataset

string

The name of a separate dataset to use for evaluation.

evalAutoCarveout

boolean

Whether to auto-carve the dataset for eval.

state

enum<string>

default:JOB_STATE_UNSPECIFIED

JobState represents the state an asynchronous job can be in.

JOB_STATE_PAUSED: Job is paused, typically due to account suspension or manual intervention.

Available options:

JOB_STATE_UNSPECIFIED,

JOB_STATE_CREATING,

JOB_STATE_RUNNING,

JOB_STATE_COMPLETED,

JOB_STATE_FAILED,

JOB_STATE_CANCELLED,

JOB_STATE_DELETING,

JOB_STATE_WRITING_RESULTS,

JOB_STATE_VALIDATING,

JOB_STATE_DELETING_CLEANING_UP,

JOB_STATE_PENDING,

JOB_STATE_EXPIRED,

JOB_STATE_RE_QUEUEING,

JOB_STATE_CREATING_INPUT_DATASET,

JOB_STATE_IDLE,

JOB_STATE_CANCELLING,

JOB_STATE_EARLY_STOPPED,

JOB_STATE_PAUSED

status

Mimics [https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto] · object

Show child attributes

createdBy

string

The email address of the user who initiated this fine-tuning job.

trainingConfig

BaseTrainingConfig contains common configuration fields shared across different training job types. Next ID: 22 · object

Common training configurations.

Show child attributes

rewardWeights

string[]

A list of reward metrics to use for training in format of "<reward_name>=".

wandbConfig

object

The Weights & Biases team/user account for logging training progress.

Show child attributes

awsS3Config

object

The AWS configuration for S3 dataset access.

Show child attributes

azureBlobStorageConfig

object

The Azure configuration for Azure Blob Storage dataset access.

Show child attributes

jobProgress

object

Job progress.

Show child attributes

keepAlive

boolean

rolloutDeploymentName

string

Rollout deployment name associated with this RLOR trainer job. This is optional. If not set, trainer will not trigger weight sync to rollout engine.

lossConfig

object

Reinforcement learning loss method + hyperparameters for the underlying trainer.

Show child attributes

nodeCount

integer<int32>

The number of nodes to use for the fine-tuning job. If not specified, the default is 1.

acceleratorSeconds

object

Accelerator seconds used by the job, keyed by accelerator type (e.g., "NVIDIA_H100_80GB"). Updated periodically.

Show child attributes

serviceMode

boolean

Service-mode RLOR trainers currently support full-parameter tuning only. When enabled, trainingConfig.loraRank must be 0 (loraRank>0 is rejected).

directRouteHandle

string

hotLoadDeploymentId

string

The deployment ID used for hot loading. When set, checkpoints are saved to this deployment's hot load bucket, enabling weight swaps on inference. Only valid for service-mode or keep-alive jobs.

usePurpose

string

Use dedicated resources for the job. The only supported value currently is "pilot". Defaults to empty.

forwardOnly

boolean

When true, run the trainer in forward-only mode (no backward/optimizer). Used for reference models in GRPO that only need forward passes.

Resume Reinforcement Fine-tuning Job

List Reinforcement Fine-tuning Steps

⌘I

API Reference

Inference

Training SDK

Deployments

Fine-tuning

Evals

Multimedia

Admin

Create Reinforcement Fine-tuning Step

Authorizations

Path Parameters

Query Parameters

Body

Response