> ## Documentation Index
> Fetch the complete documentation index at: https://docs.fireworks.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Cost Estimator

> Estimate and optimize the cost of your RFT training jobs

export const RftCostCalculator = () => {
  const MODEL_GPU_CONFIG = {
    "accounts/fireworks/models/kimi-k2-instruct-0905": {
      gpus: 8,
      gpuType: "NVIDIA_B200_180GB"
    },
    "accounts/fireworks/models/kimi-k2p5": {
      gpus: 8,
      gpuType: "NVIDIA_B200_180GB"
    },
    "accounts/fireworks/models/deepseek-v2-lite-chat": {
      gpus: 8,
      gpuType: "NVIDIA_H200_141GB"
    },
    "accounts/fireworks/models/deepseek-r1-0528": {
      gpus: 8,
      gpuType: "NVIDIA_H200_141GB"
    },
    "accounts/fireworks/models/deepseek-r1-distill-qwen-14b": {
      gpus: 8,
      gpuType: "NVIDIA_H200_141GB"
    }
  };
  const GPU_PRICING = {
    "NVIDIA_A100_80GB": 2.9,
    "NVIDIA_H100_80GB": 4.0,
    "NVIDIA_H200_141GB": 6.0,
    "NVIDIA_B200_180GB": 9.0
  };
  const COMMON_MODELS = ["accounts/fireworks/models/qwen3-0p6b", "accounts/fireworks/models/qwen2p5-coder-14b-instruct", "accounts/fireworks/models/kimi-k2-instruct-0905", "accounts/fireworks/models/kimi-k2p5", "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507", "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", "accounts/fireworks/models/qwen3-32b", "accounts/fireworks/models/gpt-oss-20b", "accounts/fireworks/models/llama-v3p1-8b-instruct", "accounts/fireworks/models/gpt-oss-120b", "accounts/fireworks/models/qwen3-8b", "accounts/fireworks/models/deepseek-v2-lite-chat", "accounts/fireworks/models/deepseek-r1-0528", "accounts/fireworks/models/qwen3p5-9b", "accounts/fireworks/models/qwen3p5-27b", "accounts/fireworks/models/qwen3p5-35b-a3b", "accounts/fireworks/models/qwen3p5-122b-a10b"];
  const getGpuConfig = modelName => {
    return MODEL_GPU_CONFIG[modelName] || ({
      gpus: 4,
      gpuType: "NVIDIA_H200_141GB"
    });
  };
  const parseParameterCount = modelName => {
    const lower = modelName.toLowerCase();
    if (lower.includes("k2") || lower.includes("k2p5")) {
      return 1024_000_000_000;
    }
    if (lower.includes("deepseek-r1-0528")) {
      return 671_000_000_000;
    }
    if (lower.includes("deepseek-v3")) {
      return 671_000_000_000;
    }
    if (lower.includes("deepseek-v2-lite")) {
      return 15_700_000_000;
    }
    if (lower.includes("deepseek-v2")) {
      return 236_000_000_000;
    }
    const decimalMatch = modelName.match(/-(\d+)p(\d+)([bm])(?:-|$)/i);
    if (decimalMatch) {
      const whole = parseInt(decimalMatch[1], 10);
      const decimal = parseInt(decimalMatch[2], 10);
      const unit = decimalMatch[3].toLowerCase();
      const value = whole + decimal / Math.pow(10, decimalMatch[2].length);
      if (unit === "b") {
        return value * 1_000_000_000;
      }
      if (unit === "m") {
        return value * 1_000_000;
      }
    }
    const integerMatch = modelName.match(/-(\d+)([bm])(?:-|$)/i);
    if (integerMatch) {
      const whole = parseInt(integerMatch[1], 10);
      const unit = integerMatch[2].toLowerCase();
      if (unit === "b") {
        return whole * 1_000_000_000;
      }
      if (unit === "m") {
        return whole * 1_000_000;
      }
    }
    return 0;
  };
  const models = COMMON_MODELS.map(modelName => {
    const gpuConfig = getGpuConfig(modelName);
    const params = parseParameterCount(modelName);
    const free = params > 0 && params < 16_000_000_000;
    return {
      baseModel: modelName,
      gpus: gpuConfig.gpus,
      gpuType: gpuConfig.gpuType,
      free
    };
  });
  const [modelIdx, setModelIdx] = useState(0);
  const [prompts, setPrompts] = useState(500);
  const [epochs, setEpochs] = useState(1);
  const [customEpochs, setCustomEpochs] = useState("");
  const [isCustomEpochs, setIsCustomEpochs] = useState(false);
  const [rollouts, setRollouts] = useState(4);
  const [customRollouts, setCustomRollouts] = useState("");
  const [isCustomRollouts, setIsCustomRollouts] = useState(false);
  const [maxTokens, setMaxTokens] = useState(2048);
  const [customMaxTokens, setCustomMaxTokens] = useState("");
  const [isCustomMaxTokens, setIsCustomMaxTokens] = useState(false);
  const model = models[modelIdx];
  const gpuRate = GPU_PRICING[model.gpuType] || 6.0;
  const effectiveEpochs = isCustomEpochs && customEpochs ? Number(customEpochs) || 1 : epochs;
  const effectiveRollouts = isCustomRollouts && customRollouts ? Number(customRollouts) || 4 : rollouts;
  const effectiveMaxTokens = isCustomMaxTokens && customMaxTokens ? Number(customMaxTokens) || 2048 : maxTokens;
  const avgTokens = Math.round(effectiveMaxTokens * 0.6);
  const totalRolloutTokens = prompts * effectiveEpochs * effectiveRollouts * avgTokens;
  const totalTokensMillion = totalRolloutTokens / 1_000_000;
  const getEfficiencyRangeByModelSize = params => {
    if (params === 0) {
      return {
        min: 1.0,
        max: 8.0
      };
    }
    if (params < 16_000_000_000) {
      return {
        min: 0.4,
        max: 6.0
      };
    }
    if (params < 70_000_000_000) {
      return {
        min: 0.3,
        max: 6.4
      };
    }
    if (params < 300_000_000_000) {
      return {
        min: 0.3,
        max: 14.1
      };
    }
    return {
      min: 4.0,
      max: 15.0
    };
  };
  const modelParams = parseParameterCount(model.baseModel);
  const efficiencyRange = getEfficiencyRangeByModelSize(modelParams);
  const gpuHoursPerMtokMin = efficiencyRange.min;
  const gpuHoursPerMtokMax = efficiencyRange.max;
  const totalGpuHoursMin = totalTokensMillion * gpuHoursPerMtokMin;
  const totalGpuHoursMax = totalTokensMillion * gpuHoursPerMtokMax;
  const costMin = model.free ? 0 : totalGpuHoursMin * gpuRate;
  const costMax = model.free ? 0 : totalGpuHoursMax * gpuRate;
  const totalRolloutsCount = prompts * effectiveEpochs * effectiveRollouts;
  const inputClasses = "w-full px-3 py-2 rounded-lg text-sm border border-zinc-300 dark:border-zinc-500 bg-white dark:bg-zinc-800 text-zinc-900 dark:text-zinc-100 outline-none focus:ring-2 focus:ring-purple-500/40 dark:focus:ring-purple-500/60 focus:border-purple-500/50 dark:focus:border-purple-400 transition-colors";
  const labelClasses = "block text-xs font-medium mb-1 text-zinc-500 dark:text-zinc-400";
  return <div className="not-prose mt-4 mb-4 p-6 rounded-2xl border border-zinc-200 dark:border-zinc-700 bg-white dark:bg-zinc-900">
      {}
      <div style={{
    display: "grid",
    gridTemplateColumns: "repeat(auto-fit, minmax(180px, 1fr))",
    gap: "16px",
    marginBottom: "24px"
  }}>
        {}
        <div style={{
    gridColumn: "1 / -1"
  }}>
          <label className={labelClasses}>Base Model</label>
          <select value={modelIdx} onChange={e => setModelIdx(Number(e.target.value))} className={inputClasses}>
            {models.map((m, i) => <option key={i} value={i}>
                {m.baseModel}
              </option>)}
          </select>
        </div>

        {}
        <div>
          <label className={labelClasses}>Dataset prompts</label>
          <input type="number" min={1} max={100000} value={prompts} onChange={e => setPrompts(Math.max(1, Number(e.target.value) || 1))} className={inputClasses} />
        </div>

        {}
        <div>
          <label className={labelClasses}>Epochs</label>
          <select value={isCustomEpochs ? "other" : epochs} onChange={e => {
    if (e.target.value === "other") {
      setIsCustomEpochs(true);
    } else {
      setIsCustomEpochs(false);
      setEpochs(Number(e.target.value));
    }
  }} className={inputClasses}>
            {[1, 2, 3, 4, 5].map(n => <option key={n} value={n}>
                {n}
              </option>)}
            <option value="other">Other</option>
          </select>
          {isCustomEpochs && <input type="number" min={1} value={customEpochs} onChange={e => setCustomEpochs(e.target.value)} placeholder="Enter number of epochs" className={`${inputClasses} mt-2`} />}
        </div>

        {}
        <div>
          <label className={labelClasses}>Response candidates count (n)</label>
          <select value={isCustomRollouts ? "other" : rollouts} onChange={e => {
    if (e.target.value === "other") {
      setIsCustomRollouts(true);
    } else {
      setIsCustomRollouts(false);
      setRollouts(Number(e.target.value));
    }
  }} className={inputClasses}>
            {[2, 4, 6, 8].map(n => <option key={n} value={n}>
                {n}
              </option>)}
            <option value="other">Other</option>
          </select>
          {isCustomRollouts && <input type="number" min={1} value={customRollouts} onChange={e => setCustomRollouts(e.target.value)} placeholder="Enter number of candidates" className={`${inputClasses} mt-2`} />}
        </div>

        {}
        <div>
          <label className={labelClasses}>Max tokens per rollout</label>
          <select value={isCustomMaxTokens ? "other" : maxTokens} onChange={e => {
    if (e.target.value === "other") {
      setIsCustomMaxTokens(true);
    } else {
      setIsCustomMaxTokens(false);
      setMaxTokens(Number(e.target.value));
    }
  }} className={inputClasses}>
            {[256, 512, 1024, 2048, 4096, 8192, 16384].map(n => <option key={n} value={n}>
                {n.toLocaleString()}
              </option>)}
            <option value="other">Other</option>
          </select>
          {isCustomMaxTokens && <input type="number" min={1} value={customMaxTokens} onChange={e => setCustomMaxTokens(e.target.value)} placeholder="Enter max tokens" className={`${inputClasses} mt-2`} />}
        </div>
      </div>

      {}
      <div className="grid grid-cols-2 gap-3 mb-4">
        {}
        <div className="p-4 rounded-xl text-center border border-zinc-200 dark:border-zinc-700">
          <div className="text-xs uppercase tracking-wider text-zinc-500 dark:text-zinc-400">
            Total Rollouts
          </div>
          <div className="text-2xl font-bold my-1 text-zinc-900 dark:text-zinc-100">
            {totalRolloutsCount.toLocaleString()}
          </div>
          <div className="text-[11px] text-zinc-500 dark:text-zinc-400">
            {prompts.toLocaleString()} × {effectiveEpochs} × {effectiveRollouts}
          </div>
        </div>

        {}
        <div className="p-4 rounded-xl text-center border border-zinc-200 dark:border-zinc-700">
          <div className="text-xs uppercase tracking-wider text-zinc-500 dark:text-zinc-400">
            GPUs
          </div>
          <div className="text-2xl font-bold my-1 text-zinc-900 dark:text-zinc-100">
            {model.gpus}
          </div>
          <div className="text-[11px] text-zinc-500 dark:text-zinc-400">
            {model.gpuType.replace("NVIDIA_", "").split("_")[0]}
          </div>
        </div>

        {}
        <div className="p-4 rounded-xl text-center border border-zinc-200 dark:border-zinc-700">
          <div className="text-xs uppercase tracking-wider text-zinc-500 dark:text-zinc-400">
            GPU Hours Range
          </div>
          <div className="text-2xl font-bold my-1 text-zinc-900 dark:text-zinc-100">
            {totalGpuHoursMin.toFixed(1)} - {totalGpuHoursMax.toFixed(1)}
          </div>
          <div className="text-[11px] text-zinc-500 dark:text-zinc-400">
            {totalTokensMillion.toFixed(2)}M tokens × {gpuHoursPerMtokMin}-{gpuHoursPerMtokMax} GPU hrs/Mtok
          </div>
        </div>

        {}
        <div className={"p-4 rounded-xl text-center border " + (model.free ? "bg-green-500/5 dark:bg-green-500/10 border-green-500/20 dark:border-green-500/30" : "bg-purple-500/5 dark:bg-purple-500/10 border-purple-500/20 dark:border-purple-500/30")}>
          <div className="text-xs uppercase tracking-wider text-zinc-500 dark:text-zinc-400">
            Estimated Cost Range
          </div>
          <div className={"text-2xl font-bold my-1 " + (model.free ? "text-green-600 dark:text-green-400" : "text-purple-600 dark:text-purple-400")}>
            {model.free ? "Free" : `$${costMin.toFixed(2)} - $${costMax.toFixed(2)}`}
          </div>
          <div className="text-[11px] text-zinc-500 dark:text-zinc-400">
            {model.free ? "Models under 16B" : `$${gpuRate}/GPU-hour`}
          </div>
        </div>
      </div>
    </div>;
};

<Tip>
  **Reinforcement Fine-Tuning (RFT)** is free for models under 16B parameters. When creating an RFT job in the UI, filter for free tuning models in the model selection area on the [fine-tuning creation page](https://app.fireworks.ai/dashboard/fine-tuning/create). If kicking off jobs from the terminal, you can find the model ID from the [Model Library](https://app.fireworks.ai/models?filter=LLM\&tunable=true). Note: SFT and DPO jobs are billed per training token for all model sizes—see the [pricing page](https://fireworks.ai/pricing) for details.
</Tip>

## Interactive cost calculator

Select your model and training configuration to get an instant cost estimate. The calculator uses the following formulas:

1. **Total tokens**: Prompts × Epochs × Response candidates × (Max tokens × 0.6)
2. **GPU hours**: (Total tokens ÷ 1M) × (GPU hours per million tokens range, varies by model size)
3. **Cost**: GPU hours × GPU rate per hour

You can derive wall-clock training time from the estimate as: **Training time = GPU hours ÷ Number of GPUs**.

The GPU hours per million tokens range varies by model size and accounts for variability in model efficiency, system overhead, and actual response lengths. Ranges are based on actual RFT job data.

<Warning>
  **Order-of-magnitude estimates only.** This calculator provides estimates and is not intended for real forecasting or budgeting. Actual costs may vary significantly.
</Warning>

<RftCostCalculator />

## How RFT pricing works

Reinforcement fine-tuning jobs are billed based on **GPU-seconds** consumed during training. The total cost depends on three main factors:

1. **Model size** — Determines how many GPUs are needed and the per-GPU-hour rate
2. **Training dataset** — How much data is processed (dataset size × epochs × rollouts)
3. **Rollout generation** — Token generation during training (max tokens × rollouts per prompt)

## Cost formula

The approximate cost of an RFT job can be estimated as:

$$
\text{Cost} = \text{GPU-hours} \times \text{Price per GPU-hour}
$$

Where GPU-hours depend on:

$$
\text{GPU-hours} \approx \text{Num GPUs} \times \left(\frac{\text{Prompts} \times \text{Epochs} \times \text{Rollouts (n)} \times \text{Avg tokens per rollout}}{\text{Throughput (tokens/sec)}}\right) \div 3600
$$

The key variables are:

| Variable                    | Description                    | How to control                      |
| --------------------------- | ------------------------------ | ----------------------------------- |
| **Num GPUs**                | GPUs required for the model    | Determined by model size            |
| **Prompts**                 | Number of rows in your dataset | Your dataset size                   |
| **Epochs**                  | Passes through the dataset     | `--epochs` flag (default: 1)        |
| **Response candidates (n)** | Responses generated per prompt | `--n` flag (default: 4)             |
| **Avg tokens per rollout**  | Average response length        | `--max-tokens` flag (default: 2048) |
| **Throughput**              | Tokens generated per second    | Determined by model + hardware      |

<Note>
  Training time directly translates to cost: **Cost = Training time × Num GPUs × GPU-hour rate**. Check the [pricing page](https://fireworks.ai/pricing) for current GPU-hour rates.
</Note>

### How parameters affect cost

See how each parameter change impacts your total cost relative to a baseline configuration (500 prompts, 1 epoch, n=4, 2048 max tokens):

| Change                             | Cost impact    | Explanation                             |
| ---------------------------------- | -------------- | --------------------------------------- |
| Double dataset size (1000 prompts) | **\~2×**       | Linear scaling with dataset size        |
| Double rollouts (n=8)              | **\~2×**       | Linear scaling with rollout count       |
| Double max tokens (4096)           | **\~1.5–2×**   | More tokens per rollout                 |
| Add epoch (epochs=2)               | **\~2×**       | Full additional pass through data       |
| Double LoRA rank (16 → 32)         | **\~1.2–1.5×** | More trainable parameters               |
| Halve max tokens (1024)            | **\~0.5–0.7×** | Fewer tokens generated                  |
| Halve rollouts (n=2)               | **\~0.5×**     | Fewer rollouts but less learning signal |

## Cost optimization tips

<AccordionGroup>
  <Accordion title="Start with free models">
    Use models under 16B parameters for initial experimentation. Iterate on your evaluator and dataset with `qwen3-0p6b` or `llama-v3p1-8b-instruct` before moving to larger models.

    This lets you:

    * Validate your evaluator logic at zero cost
    * Test dataset quality and format
    * Tune rollout parameters
    * Establish baseline reward curves
  </Accordion>

  <Accordion title="Limit max tokens">
    Set `--max-tokens` to the minimum needed for your task:

    * **Short outputs** (classification, short answers): 256–512 tokens
    * **Medium outputs** (code generation, summaries): 1024–2048 tokens
    * **Long outputs** (detailed analysis, multi-step reasoning): 4096+ tokens

    Every token generated during rollouts costs compute. Don't use 16384 max tokens if your task only needs 512.
  </Accordion>

  <Accordion title="Use 1 epoch first">
    Start with 1 epoch (default). Most RFT jobs converge well within a single pass through the data. Add more epochs only if the reward curve is still climbing at the end of training.
  </Accordion>

  <Accordion title="Optimize evaluator speed">
    Slow evaluators increase wall-clock training time and therefore cost:

    * Keep evaluations under 5 seconds per rollout
    * Cache expensive computations
    * For remote evaluators, ensure your server can handle concurrent requests
    * Avoid unnecessary API calls in your evaluation logic

    **Evaluator complexity impact**: Simple evaluators (self-contained) have minimal overhead. Evaluators with calls to external services, such as LLM-as-judge use cases or company-specific endpoints, may have variable training time due to rate limits by model providers or other services.
  </Accordion>

  <Accordion title="Curate your dataset">
    A smaller, high-quality dataset often outperforms a larger, noisy one:

    * Remove duplicate or near-duplicate prompts
    * Ensure prompts are diverse and representative
    * Start with 200–500 well-chosen prompts
    * Quality over quantity reduces cost while maintaining performance
  </Accordion>
</AccordionGroup>

## Example cost scenarios

<AccordionGroup>
  <Accordion title="Scenario 1: Quick prototype (Free)">
    **Goal**: Test an evaluator on a small model

    | Parameter          | Value           |
    | ------------------ | --------------- |
    | Model              | Qwen3 0.6B      |
    | Dataset            | 100 prompts     |
    | Epochs             | 1               |
    | Rollouts (n)       | 4               |
    | Max tokens         | 2048            |
    | **Estimated cost** | **Free**        |
    | **Estimated time** | \~15–30 minutes |

    Best for: Initial evaluator development and testing.
  </Accordion>

  <Accordion title="Scenario 2: Production training (Free)">
    **Goal**: Train a capable model for production use

    | Parameter          | Value                 |
    | ------------------ | --------------------- |
    | Model              | Llama 3.1 8B Instruct |
    | Dataset            | 500 prompts           |
    | Epochs             | 1                     |
    | Rollouts (n)       | 4                     |
    | Max tokens         | 2048                  |
    | **Estimated cost** | **Free**              |
    | **Estimated time** | \~1–2 hours           |

    Best for: Production workloads that can use an 8B model.
  </Accordion>

  <Accordion title="Scenario 3: Large model training (Paid)">
    **Goal**: Train a large model for maximum quality

    | Parameter          | Value                          |
    | ------------------ | ------------------------------ |
    | Model              | Llama 3.3 70B Instruct         |
    | Dataset            | 500 prompts                    |
    | Epochs             | 1                              |
    | Rollouts (n)       | 4                              |
    | Max tokens         | 2048                           |
    | **Estimated cost** | Training hours × 8 GPUs × rate |
    | **Estimated time** | \~1–2 hours                    |

    Check the [Fireworks Pricing page](https://fireworks.ai/pricing) for the current GPU-hour rate. For a 2-hour job on 8 GPUs, multiply: 2 × 8 × (rate per GPU-hour).
  </Accordion>

  <Accordion title="Scenario 4: High-quality with more rollouts (Paid)">
    **Goal**: Maximum quality with large model and more rollouts

    | Parameter          | Value                          |
    | ------------------ | ------------------------------ |
    | Model              | DeepSeek V3                    |
    | Dataset            | 1000 prompts                   |
    | Epochs             | 2                              |
    | Rollouts (n)       | 8                              |
    | Max tokens         | 4096                           |
    | **Estimated cost** | Training hours × 8 GPUs × rate |
    | **Estimated time** | \~8–16 hours                   |

    This is a larger job. The cost scales with training time: more prompts, epochs, rollouts, and tokens all increase total GPU-hours.
  </Accordion>
</AccordionGroup>

## Monitoring costs during training

Cost information is only available after your job completes:

1. **Dashboard**: The [Fireworks Dashboard](https://app.fireworks.ai) displays the final cost on the RFT job page once training finishes
2. **Training progress**: While the job is running, you can monitor elapsed time and estimated completion in the job overview
3. **Early stopping**: You can cancel a job early if needed—the model checkpoint from the last completed step is still usable. The final cost will be calculated based on GPU-seconds consumed up to the cancellation point.

<Tip>
  If a job is running longer than expected, check your evaluator performance. Slow evaluators are the most common cause of unexpectedly long (and expensive) training runs.
</Tip>

## Next steps

<CardGroup cols={3}>
  <Card title="Pricing Page" icon="dollar-sign" href="https://fireworks.ai/pricing">
    View current GPU-hour rates and pricing tiers
  </Card>

  <Card title="Parameter Tuning" icon="sliders" href="/fine-tuning/parameter-tuning">
    Learn how each parameter affects training quality and cost
  </Card>

  <Card title="Launch Training" icon="rocket" href="/fine-tuning/cli-reference">
    Create your first RFT job
  </Card>
</CardGroup>