diff --git a/en/cli/resources/fine_tuning/index.md b/en/cli/resources/fine_tuning/index.md deleted file mode 100644 index b8772c9..0000000 --- a/en/cli/resources/fine_tuning/index.md +++ /dev/null @@ -1,8033 +0,0 @@ -# Fine Tuning - -# Methods - -## Domain Types - -### Dpo Hyperparameters - -- `dpo_hyperparameters: object { batch_size, beta, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the DPO fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `beta: optional "auto" or number` - - The beta value for the DPO method. A higher beta value will increase the weight of the penalty between the policy and reference model. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - -### Dpo Method - -- `dpo_method: object { hyperparameters }` - - Configuration for the DPO fine-tuning method. - - - `hyperparameters: optional object { batch_size, beta, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the DPO fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `beta: optional "auto" or number` - - The beta value for the DPO method. A higher beta value will increase the weight of the penalty between the policy and reference model. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - -### Reinforcement Hyperparameters - -- `reinforcement_hyperparameters: object { batch_size, compute_multiplier, eval_interval, 4 more }` - - The hyperparameters used for the reinforcement fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `compute_multiplier: optional "auto" or number` - - Multiplier on amount of compute used for exploring search space during training. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_interval: optional "auto" or number` - - The number of training steps between evaluation runs. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_samples: optional "auto" or number` - - Number of evaluation samples to generate per training step. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reasoning_effort: optional "default" or "low" or "medium" or "high"` - - Level of reasoning effort. - - - `"default"` - - - `"low"` - - - `"medium"` - - - `"high"` - -### Reinforcement Method - -- `reinforcement_method: object { grader, hyperparameters }` - - Configuration for the reinforcement fine-tuning method. - - - `grader: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - The grader used for the fine-tuning job. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `input: string` - - The input text. This may include template strings. - - - `name: string` - - The name of the grader. - - - `operation: "eq" or "ne" or "like" or "ilike"` - - The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`. - - - `"eq"` - - - `"ne"` - - - `"like"` - - - `"ilike"` - - - `reference: string` - - The reference text. This may include template strings. - - - `type: "string_check"` - - The object type, which is always `string_check`. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `evaluation_metric: "cosine" or "fuzzy_match" or "bleu" or 8 more` - - The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, - `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, - or `rouge_l`. - - - `"cosine"` - - - `"fuzzy_match"` - - - `"bleu"` - - - `"gleu"` - - - `"meteor"` - - - `"rouge_1"` - - - `"rouge_2"` - - - `"rouge_3"` - - - `"rouge_4"` - - - `"rouge_5"` - - - `"rouge_l"` - - - `input: string` - - The text being graded. - - - `name: string` - - The name of the grader. - - - `reference: string` - - The text being graded against. - - - `type: "text_similarity"` - - The type of grader. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `name: string` - - The name of the grader. - - - `source: string` - - The source code of the python script. - - - `type: "python"` - - The object type, which is always `python`. - - - `image_tag: optional string` - - The image tag to use for the python script. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `input: array of object { content, role, type }` - - The input messages evaluated by the grader. Supports text, output text, input image, and input audio content blocks, and may include template strings. - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `text: string` - - The text input to the model. - - - `type: "input_text"` - - The type of the input item. Always `input_text`. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `input_audio: object { data, format }` - - - `data: string` - - Base64-encoded audio data. - - - `format: "mp3" or "wav"` - - The format of the audio data. Currently supported formats are `mp3` and - `wav`. - - - `"mp3"` - - - `"wav"` - - - `type: "input_audio"` - - The type of the input item. Always `input_audio`. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `model: string` - - The model to use for the evaluation. - - - `name: string` - - The name of the grader. - - - `type: "score_model"` - - The object type, which is always `score_model`. - - - `range: optional array of number` - - The range of the score. Defaults to `[0, 1]`. - - - `sampling_params: optional object { max_completions_tokens, reasoning_effort, seed, 2 more }` - - The sampling parameters for the model. - - - `max_completions_tokens: optional number` - - The maximum number of tokens the grader model may generate in its response. - - - `reasoning_effort: optional "none" or "minimal" or "low" or 3 more` - - Constrains effort on reasoning for - [reasoning models](https://platform.openai.com/docs/guides/reasoning). - Currently supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. Reducing - reasoning effort can result in faster responses and fewer tokens used - on reasoning in a response. - - - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool calls are supported for all reasoning values in gpt-5.1. - - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. - - `xhigh` is supported for all models after `gpt-5.1-codex-max`. - - - `"none"` - - - `"minimal"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `"xhigh"` - - - `seed: optional number` - - A seed value to initialize the randomness, during sampling. - - - `temperature: optional number` - - A higher temperature increases randomness in the outputs. - - - `top_p: optional number` - - An alternative to temperature for nucleus sampling; 1.0 includes all tokens. - - - `multi_grader: object { calculate_output, graders, name, type }` - - A MultiGrader object combines the output of multiple graders to produce a single score. - - - `calculate_output: string` - - A formula to calculate the output based on grader results. - - - `graders: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `label_model_grader: object { input, labels, model, 3 more }` - - A LabelModelGrader object which uses a model to assign labels to each item - in the evaluation. - - - `input: array of object { content, role, type }` - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `labels: array of string` - - The labels to assign to each item in the evaluation. - - - `model: string` - - The model to use for the evaluation. Must support structured outputs. - - - `name: string` - - The name of the grader. - - - `passing_labels: array of string` - - The labels that indicate a passing result. Must be a subset of labels. - - - `type: "label_model"` - - The object type, which is always `label_model`. - - - `name: string` - - The name of the grader. - - - `type: "multi"` - - The object type, which is always `multi`. - - - `hyperparameters: optional object { batch_size, compute_multiplier, eval_interval, 4 more }` - - The hyperparameters used for the reinforcement fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `compute_multiplier: optional "auto" or number` - - Multiplier on amount of compute used for exploring search space during training. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_interval: optional "auto" or number` - - The number of training steps between evaluation runs. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_samples: optional "auto" or number` - - Number of evaluation samples to generate per training step. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reasoning_effort: optional "default" or "low" or "medium" or "high"` - - Level of reasoning effort. - - - `"default"` - - - `"low"` - - - `"medium"` - - - `"high"` - -### Supervised Hyperparameters - -- `supervised_hyperparameters: object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - -### Supervised Method - -- `supervised_method: object { hyperparameters }` - - Configuration for the supervised fine-tuning method. - - - `hyperparameters: optional object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - -# Jobs - -## Create fine-tuning job - -`$ openai fine-tuning:jobs create` - -**post** `/fine_tuning/jobs` - -Creates a fine-tuning job which begins the process of creating a new model from a given dataset. - -Response includes details of the enqueued job including job status and the name of the fine-tuned models once complete. - -[Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization) - -### Parameters - -- `--model: string or "babbage-002" or "davinci-002" or "gpt-3.5-turbo" or "gpt-4o-mini"` - - The name of the model to fine-tune. You can select one of the - [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned). - -- `--training-file: string` - - The ID of an uploaded file that contains training data. - - See [upload file](https://platform.openai.com/docs/api-reference/files/create) for how to upload a file. - - Your dataset must be formatted as a JSONL file. Additionally, you must upload your file with the purpose `fine-tune`. - - The contents of the file should differ depending on if the model uses the [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input), [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) format, or if the fine-tuning method uses the [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input) format. - - See the [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization) for more details. - -- `--hyperparameters: optional object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. - This value is now deprecated in favor of `method`, and should be passed in under the `method` parameter. - -- `--integration: optional array of object { type, wandb }` - - A list of integrations to enable for your fine-tuning job. - -- `--metadata: optional map[string]` - - Set of 16 key-value pairs that can be attached to an object. This can be - useful for storing additional information about the object in a structured - format, and querying for objects via API or the dashboard. - - Keys are strings with a maximum length of 64 characters. Values are strings - with a maximum length of 512 characters. - -- `--method: optional object { type, dpo, reinforcement, supervised }` - - The method used for fine-tuning. - -- `--seed: optional number` - - The seed controls the reproducibility of the job. Passing in the same seed and job parameters should produce the same results, but may differ in rare cases. - If a seed is not specified, one will be generated for you. - -- `--suffix: optional string` - - A string of up to 64 characters that will be added to your fine-tuned model name. - - For example, a `suffix` of "custom-model-name" would produce a model name like `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`. - -- `--validation-file: optional string` - - The ID of an uploaded file that contains validation data. - - If you provide this file, the data is used to generate validation - metrics periodically during fine-tuning. These metrics can be viewed in - the fine-tuning results file. - The same data should not be present in both train and validation files. - - Your dataset must be formatted as a JSONL file. You must upload your file with the purpose `fine-tune`. - - See the [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization) for more details. - -### Returns - -- `fine_tuning_job: object { id, created_at, error, 16 more }` - - The `fine_tuning.job` object represents a fine-tuning job that has been created through the API. - - - `id: string` - - The object identifier, which can be referenced in the API endpoints. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was created. - - - `error: object { code, message, param }` - - For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure. - - - `code: string` - - A machine-readable error code. - - - `message: string` - - A human-readable error message. - - - `param: string` - - The parameter that was invalid, usually `training_file` or `validation_file`. This field will be null if the failure was not parameter-specific. - - - `fine_tuned_model: string` - - The name of the fine-tuned model that is being created. The value will be null if the fine-tuning job is still running. - - - `finished_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was finished. The value will be null if the fine-tuning job is still running. - - - `hyperparameters: object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. This value will only be returned when running `supervised` jobs. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters - are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid - overfitting. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle - through the training dataset. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `model: string` - - The base model that is being fine-tuned. - - - `object: "fine_tuning.job"` - - The object type, which is always "fine_tuning.job". - - - `organization_id: string` - - The organization that owns the fine-tuning job. - - - `result_files: array of string` - - The compiled results file ID(s) for the fine-tuning job. You can retrieve the results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `seed: number` - - The seed used for the fine-tuning job. - - - `status: "validating_files" or "queued" or "running" or 3 more` - - The current status of the fine-tuning job, which can be either `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. - - - `"validating_files"` - - - `"queued"` - - - `"running"` - - - `"succeeded"` - - - `"failed"` - - - `"cancelled"` - - - `trained_tokens: number` - - The total number of billable tokens processed by this fine-tuning job. The value will be null if the fine-tuning job is still running. - - - `training_file: string` - - The file ID used for training. You can retrieve the training data with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `validation_file: string` - - The file ID used for validation. You can retrieve the validation results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `estimated_finish: optional number` - - The Unix timestamp (in seconds) for when the fine-tuning job is estimated to finish. The value will be null if the fine-tuning job is not running. - - - `integrations: optional array of FineTuningJobWandbIntegrationObject` - - A list of integrations to enable for this fine-tuning job. - - - `type: "wandb"` - - The type of the integration being enabled for the fine-tuning job - - - `wandb: object { project, entity, name, tags }` - - The settings for your integration with Weights and Biases. This payload specifies the project that - metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags - to your run, and set a default entity (team, username, etc) to be associated with your run. - - - `project: string` - - The name of the project that the new run will be created under. - - - `entity: optional string` - - The entity to use for the run. This allows you to set the team or username of the WandB user that you would - like associated with the run. If not set, the default entity for the registered WandB API key is used. - - - `name: optional string` - - A display name to set for the run. If not set, we will use the Job ID as the name. - - - `tags: optional array of string` - - A list of tags to be attached to the newly created run. These tags are passed through directly to WandB. Some - default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". - - - `metadata: optional map[string]` - - Set of 16 key-value pairs that can be attached to an object. This can be - useful for storing additional information about the object in a structured - format, and querying for objects via API or the dashboard. - - Keys are strings with a maximum length of 64 characters. Values are strings - with a maximum length of 512 characters. - - - `method: optional object { type, dpo, reinforcement, supervised }` - - The method used for fine-tuning. - - - `type: "supervised" or "dpo" or "reinforcement"` - - The type of method. Is either `supervised`, `dpo`, or `reinforcement`. - - - `"supervised"` - - - `"dpo"` - - - `"reinforcement"` - - - `dpo: optional object { hyperparameters }` - - Configuration for the DPO fine-tuning method. - - - `hyperparameters: optional object { batch_size, beta, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the DPO fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `beta: optional "auto" or number` - - The beta value for the DPO method. A higher beta value will increase the weight of the penalty between the policy and reference model. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reinforcement: optional object { grader, hyperparameters }` - - Configuration for the reinforcement fine-tuning method. - - - `grader: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - The grader used for the fine-tuning job. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `input: string` - - The input text. This may include template strings. - - - `name: string` - - The name of the grader. - - - `operation: "eq" or "ne" or "like" or "ilike"` - - The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`. - - - `"eq"` - - - `"ne"` - - - `"like"` - - - `"ilike"` - - - `reference: string` - - The reference text. This may include template strings. - - - `type: "string_check"` - - The object type, which is always `string_check`. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `evaluation_metric: "cosine" or "fuzzy_match" or "bleu" or 8 more` - - The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, - `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, - or `rouge_l`. - - - `"cosine"` - - - `"fuzzy_match"` - - - `"bleu"` - - - `"gleu"` - - - `"meteor"` - - - `"rouge_1"` - - - `"rouge_2"` - - - `"rouge_3"` - - - `"rouge_4"` - - - `"rouge_5"` - - - `"rouge_l"` - - - `input: string` - - The text being graded. - - - `name: string` - - The name of the grader. - - - `reference: string` - - The text being graded against. - - - `type: "text_similarity"` - - The type of grader. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `name: string` - - The name of the grader. - - - `source: string` - - The source code of the python script. - - - `type: "python"` - - The object type, which is always `python`. - - - `image_tag: optional string` - - The image tag to use for the python script. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `input: array of object { content, role, type }` - - The input messages evaluated by the grader. Supports text, output text, input image, and input audio content blocks, and may include template strings. - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `text: string` - - The text input to the model. - - - `type: "input_text"` - - The type of the input item. Always `input_text`. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `input_audio: object { data, format }` - - - `data: string` - - Base64-encoded audio data. - - - `format: "mp3" or "wav"` - - The format of the audio data. Currently supported formats are `mp3` and - `wav`. - - - `"mp3"` - - - `"wav"` - - - `type: "input_audio"` - - The type of the input item. Always `input_audio`. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `model: string` - - The model to use for the evaluation. - - - `name: string` - - The name of the grader. - - - `type: "score_model"` - - The object type, which is always `score_model`. - - - `range: optional array of number` - - The range of the score. Defaults to `[0, 1]`. - - - `sampling_params: optional object { max_completions_tokens, reasoning_effort, seed, 2 more }` - - The sampling parameters for the model. - - - `max_completions_tokens: optional number` - - The maximum number of tokens the grader model may generate in its response. - - - `reasoning_effort: optional "none" or "minimal" or "low" or 3 more` - - Constrains effort on reasoning for - [reasoning models](https://platform.openai.com/docs/guides/reasoning). - Currently supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. Reducing - reasoning effort can result in faster responses and fewer tokens used - on reasoning in a response. - - - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool calls are supported for all reasoning values in gpt-5.1. - - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. - - `xhigh` is supported for all models after `gpt-5.1-codex-max`. - - - `"none"` - - - `"minimal"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `"xhigh"` - - - `seed: optional number` - - A seed value to initialize the randomness, during sampling. - - - `temperature: optional number` - - A higher temperature increases randomness in the outputs. - - - `top_p: optional number` - - An alternative to temperature for nucleus sampling; 1.0 includes all tokens. - - - `multi_grader: object { calculate_output, graders, name, type }` - - A MultiGrader object combines the output of multiple graders to produce a single score. - - - `calculate_output: string` - - A formula to calculate the output based on grader results. - - - `graders: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `label_model_grader: object { input, labels, model, 3 more }` - - A LabelModelGrader object which uses a model to assign labels to each item - in the evaluation. - - - `input: array of object { content, role, type }` - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `labels: array of string` - - The labels to assign to each item in the evaluation. - - - `model: string` - - The model to use for the evaluation. Must support structured outputs. - - - `name: string` - - The name of the grader. - - - `passing_labels: array of string` - - The labels that indicate a passing result. Must be a subset of labels. - - - `type: "label_model"` - - The object type, which is always `label_model`. - - - `name: string` - - The name of the grader. - - - `type: "multi"` - - The object type, which is always `multi`. - - - `hyperparameters: optional object { batch_size, compute_multiplier, eval_interval, 4 more }` - - The hyperparameters used for the reinforcement fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `compute_multiplier: optional "auto" or number` - - Multiplier on amount of compute used for exploring search space during training. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_interval: optional "auto" or number` - - The number of training steps between evaluation runs. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_samples: optional "auto" or number` - - Number of evaluation samples to generate per training step. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reasoning_effort: optional "default" or "low" or "medium" or "high"` - - Level of reasoning effort. - - - `"default"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `supervised: optional object { hyperparameters }` - - Configuration for the supervised fine-tuning method. - - - `hyperparameters: optional object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - -### Example - -```cli -openai fine-tuning:jobs create \ - --api-key 'My API Key' \ - --model gpt-4o-mini \ - --training-file file-abc123 -``` - -#### Response - -```json -{ - "id": "id", - "created_at": 0, - "error": { - "code": "code", - "message": "message", - "param": "param" - }, - "fine_tuned_model": "fine_tuned_model", - "finished_at": 0, - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - }, - "model": "model", - "object": "fine_tuning.job", - "organization_id": "organization_id", - "result_files": [ - "file-abc123" - ], - "seed": 0, - "status": "validating_files", - "trained_tokens": 0, - "training_file": "training_file", - "validation_file": "validation_file", - "estimated_finish": 0, - "integrations": [ - { - "type": "wandb", - "wandb": { - "project": "my-wandb-project", - "entity": "entity", - "name": "name", - "tags": [ - "custom-tag" - ] - } - } - ], - "metadata": { - "foo": "string" - }, - "method": { - "type": "supervised", - "dpo": { - "hyperparameters": { - "batch_size": "auto", - "beta": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - } - }, - "reinforcement": { - "grader": { - "input": "input", - "name": "name", - "operation": "eq", - "reference": "reference", - "type": "string_check" - }, - "hyperparameters": { - "batch_size": "auto", - "compute_multiplier": "auto", - "eval_interval": "auto", - "eval_samples": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto", - "reasoning_effort": "default" - } - }, - "supervised": { - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - } - } - } -} -``` - -## List fine-tuning jobs - -`$ openai fine-tuning:jobs list` - -**get** `/fine_tuning/jobs` - -List your organization's fine-tuning jobs - -### Parameters - -- `--after: optional string` - - Identifier for the last job from the previous pagination request. - -- `--limit: optional number` - - Number of fine-tuning jobs to retrieve. - -- `--metadata: optional map[string]` - - Optional metadata filter. To filter, use the syntax `metadata[k]=v`. Alternatively, set `metadata=null` to indicate no metadata. - -### Returns - -- `ListPaginatedFineTuningJobsResponse: object { data, has_more, object }` - - - `data: array of FineTuningJob` - - - `id: string` - - The object identifier, which can be referenced in the API endpoints. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was created. - - - `error: object { code, message, param }` - - For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure. - - - `code: string` - - A machine-readable error code. - - - `message: string` - - A human-readable error message. - - - `param: string` - - The parameter that was invalid, usually `training_file` or `validation_file`. This field will be null if the failure was not parameter-specific. - - - `fine_tuned_model: string` - - The name of the fine-tuned model that is being created. The value will be null if the fine-tuning job is still running. - - - `finished_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was finished. The value will be null if the fine-tuning job is still running. - - - `hyperparameters: object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. This value will only be returned when running `supervised` jobs. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters - are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid - overfitting. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle - through the training dataset. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `model: string` - - The base model that is being fine-tuned. - - - `object: "fine_tuning.job"` - - The object type, which is always "fine_tuning.job". - - - `organization_id: string` - - The organization that owns the fine-tuning job. - - - `result_files: array of string` - - The compiled results file ID(s) for the fine-tuning job. You can retrieve the results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `seed: number` - - The seed used for the fine-tuning job. - - - `status: "validating_files" or "queued" or "running" or 3 more` - - The current status of the fine-tuning job, which can be either `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. - - - `"validating_files"` - - - `"queued"` - - - `"running"` - - - `"succeeded"` - - - `"failed"` - - - `"cancelled"` - - - `trained_tokens: number` - - The total number of billable tokens processed by this fine-tuning job. The value will be null if the fine-tuning job is still running. - - - `training_file: string` - - The file ID used for training. You can retrieve the training data with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `validation_file: string` - - The file ID used for validation. You can retrieve the validation results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `estimated_finish: optional number` - - The Unix timestamp (in seconds) for when the fine-tuning job is estimated to finish. The value will be null if the fine-tuning job is not running. - - - `integrations: optional array of FineTuningJobWandbIntegrationObject` - - A list of integrations to enable for this fine-tuning job. - - - `type: "wandb"` - - The type of the integration being enabled for the fine-tuning job - - - `wandb: object { project, entity, name, tags }` - - The settings for your integration with Weights and Biases. This payload specifies the project that - metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags - to your run, and set a default entity (team, username, etc) to be associated with your run. - - - `project: string` - - The name of the project that the new run will be created under. - - - `entity: optional string` - - The entity to use for the run. This allows you to set the team or username of the WandB user that you would - like associated with the run. If not set, the default entity for the registered WandB API key is used. - - - `name: optional string` - - A display name to set for the run. If not set, we will use the Job ID as the name. - - - `tags: optional array of string` - - A list of tags to be attached to the newly created run. These tags are passed through directly to WandB. Some - default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". - - - `metadata: optional map[string]` - - Set of 16 key-value pairs that can be attached to an object. This can be - useful for storing additional information about the object in a structured - format, and querying for objects via API or the dashboard. - - Keys are strings with a maximum length of 64 characters. Values are strings - with a maximum length of 512 characters. - - - `method: optional object { type, dpo, reinforcement, supervised }` - - The method used for fine-tuning. - - - `type: "supervised" or "dpo" or "reinforcement"` - - The type of method. Is either `supervised`, `dpo`, or `reinforcement`. - - - `"supervised"` - - - `"dpo"` - - - `"reinforcement"` - - - `dpo: optional object { hyperparameters }` - - Configuration for the DPO fine-tuning method. - - - `hyperparameters: optional object { batch_size, beta, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the DPO fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `beta: optional "auto" or number` - - The beta value for the DPO method. A higher beta value will increase the weight of the penalty between the policy and reference model. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reinforcement: optional object { grader, hyperparameters }` - - Configuration for the reinforcement fine-tuning method. - - - `grader: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - The grader used for the fine-tuning job. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `input: string` - - The input text. This may include template strings. - - - `name: string` - - The name of the grader. - - - `operation: "eq" or "ne" or "like" or "ilike"` - - The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`. - - - `"eq"` - - - `"ne"` - - - `"like"` - - - `"ilike"` - - - `reference: string` - - The reference text. This may include template strings. - - - `type: "string_check"` - - The object type, which is always `string_check`. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `evaluation_metric: "cosine" or "fuzzy_match" or "bleu" or 8 more` - - The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, - `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, - or `rouge_l`. - - - `"cosine"` - - - `"fuzzy_match"` - - - `"bleu"` - - - `"gleu"` - - - `"meteor"` - - - `"rouge_1"` - - - `"rouge_2"` - - - `"rouge_3"` - - - `"rouge_4"` - - - `"rouge_5"` - - - `"rouge_l"` - - - `input: string` - - The text being graded. - - - `name: string` - - The name of the grader. - - - `reference: string` - - The text being graded against. - - - `type: "text_similarity"` - - The type of grader. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `name: string` - - The name of the grader. - - - `source: string` - - The source code of the python script. - - - `type: "python"` - - The object type, which is always `python`. - - - `image_tag: optional string` - - The image tag to use for the python script. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `input: array of object { content, role, type }` - - The input messages evaluated by the grader. Supports text, output text, input image, and input audio content blocks, and may include template strings. - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `text: string` - - The text input to the model. - - - `type: "input_text"` - - The type of the input item. Always `input_text`. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `input_audio: object { data, format }` - - - `data: string` - - Base64-encoded audio data. - - - `format: "mp3" or "wav"` - - The format of the audio data. Currently supported formats are `mp3` and - `wav`. - - - `"mp3"` - - - `"wav"` - - - `type: "input_audio"` - - The type of the input item. Always `input_audio`. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `model: string` - - The model to use for the evaluation. - - - `name: string` - - The name of the grader. - - - `type: "score_model"` - - The object type, which is always `score_model`. - - - `range: optional array of number` - - The range of the score. Defaults to `[0, 1]`. - - - `sampling_params: optional object { max_completions_tokens, reasoning_effort, seed, 2 more }` - - The sampling parameters for the model. - - - `max_completions_tokens: optional number` - - The maximum number of tokens the grader model may generate in its response. - - - `reasoning_effort: optional "none" or "minimal" or "low" or 3 more` - - Constrains effort on reasoning for - [reasoning models](https://platform.openai.com/docs/guides/reasoning). - Currently supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. Reducing - reasoning effort can result in faster responses and fewer tokens used - on reasoning in a response. - - - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool calls are supported for all reasoning values in gpt-5.1. - - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. - - `xhigh` is supported for all models after `gpt-5.1-codex-max`. - - - `"none"` - - - `"minimal"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `"xhigh"` - - - `seed: optional number` - - A seed value to initialize the randomness, during sampling. - - - `temperature: optional number` - - A higher temperature increases randomness in the outputs. - - - `top_p: optional number` - - An alternative to temperature for nucleus sampling; 1.0 includes all tokens. - - - `multi_grader: object { calculate_output, graders, name, type }` - - A MultiGrader object combines the output of multiple graders to produce a single score. - - - `calculate_output: string` - - A formula to calculate the output based on grader results. - - - `graders: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `label_model_grader: object { input, labels, model, 3 more }` - - A LabelModelGrader object which uses a model to assign labels to each item - in the evaluation. - - - `input: array of object { content, role, type }` - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `labels: array of string` - - The labels to assign to each item in the evaluation. - - - `model: string` - - The model to use for the evaluation. Must support structured outputs. - - - `name: string` - - The name of the grader. - - - `passing_labels: array of string` - - The labels that indicate a passing result. Must be a subset of labels. - - - `type: "label_model"` - - The object type, which is always `label_model`. - - - `name: string` - - The name of the grader. - - - `type: "multi"` - - The object type, which is always `multi`. - - - `hyperparameters: optional object { batch_size, compute_multiplier, eval_interval, 4 more }` - - The hyperparameters used for the reinforcement fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `compute_multiplier: optional "auto" or number` - - Multiplier on amount of compute used for exploring search space during training. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_interval: optional "auto" or number` - - The number of training steps between evaluation runs. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_samples: optional "auto" or number` - - Number of evaluation samples to generate per training step. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reasoning_effort: optional "default" or "low" or "medium" or "high"` - - Level of reasoning effort. - - - `"default"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `supervised: optional object { hyperparameters }` - - Configuration for the supervised fine-tuning method. - - - `hyperparameters: optional object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `has_more: boolean` - - - `object: "list"` - -### Example - -```cli -openai fine-tuning:jobs list \ - --api-key 'My API Key' -``` - -#### Response - -```json -{ - "data": [ - { - "id": "id", - "created_at": 0, - "error": { - "code": "code", - "message": "message", - "param": "param" - }, - "fine_tuned_model": "fine_tuned_model", - "finished_at": 0, - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - }, - "model": "model", - "object": "fine_tuning.job", - "organization_id": "organization_id", - "result_files": [ - "file-abc123" - ], - "seed": 0, - "status": "validating_files", - "trained_tokens": 0, - "training_file": "training_file", - "validation_file": "validation_file", - "estimated_finish": 0, - "integrations": [ - { - "type": "wandb", - "wandb": { - "project": "my-wandb-project", - "entity": "entity", - "name": "name", - "tags": [ - "custom-tag" - ] - } - } - ], - "metadata": { - "foo": "string" - }, - "method": { - "type": "supervised", - "dpo": { - "hyperparameters": { - "batch_size": "auto", - "beta": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - } - }, - "reinforcement": { - "grader": { - "input": "input", - "name": "name", - "operation": "eq", - "reference": "reference", - "type": "string_check" - }, - "hyperparameters": { - "batch_size": "auto", - "compute_multiplier": "auto", - "eval_interval": "auto", - "eval_samples": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto", - "reasoning_effort": "default" - } - }, - "supervised": { - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - } - } - } - } - ], - "has_more": true, - "object": "list" -} -``` - -## Retrieve fine-tuning job - -`$ openai fine-tuning:jobs retrieve` - -**get** `/fine_tuning/jobs/{fine_tuning_job_id}` - -Get info about a fine-tuning job. - -[Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization) - -### Parameters - -- `--fine-tuning-job-id: string` - - The ID of the fine-tuning job. - -### Returns - -- `fine_tuning_job: object { id, created_at, error, 16 more }` - - The `fine_tuning.job` object represents a fine-tuning job that has been created through the API. - - - `id: string` - - The object identifier, which can be referenced in the API endpoints. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was created. - - - `error: object { code, message, param }` - - For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure. - - - `code: string` - - A machine-readable error code. - - - `message: string` - - A human-readable error message. - - - `param: string` - - The parameter that was invalid, usually `training_file` or `validation_file`. This field will be null if the failure was not parameter-specific. - - - `fine_tuned_model: string` - - The name of the fine-tuned model that is being created. The value will be null if the fine-tuning job is still running. - - - `finished_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was finished. The value will be null if the fine-tuning job is still running. - - - `hyperparameters: object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. This value will only be returned when running `supervised` jobs. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters - are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid - overfitting. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle - through the training dataset. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `model: string` - - The base model that is being fine-tuned. - - - `object: "fine_tuning.job"` - - The object type, which is always "fine_tuning.job". - - - `organization_id: string` - - The organization that owns the fine-tuning job. - - - `result_files: array of string` - - The compiled results file ID(s) for the fine-tuning job. You can retrieve the results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `seed: number` - - The seed used for the fine-tuning job. - - - `status: "validating_files" or "queued" or "running" or 3 more` - - The current status of the fine-tuning job, which can be either `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. - - - `"validating_files"` - - - `"queued"` - - - `"running"` - - - `"succeeded"` - - - `"failed"` - - - `"cancelled"` - - - `trained_tokens: number` - - The total number of billable tokens processed by this fine-tuning job. The value will be null if the fine-tuning job is still running. - - - `training_file: string` - - The file ID used for training. You can retrieve the training data with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `validation_file: string` - - The file ID used for validation. You can retrieve the validation results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `estimated_finish: optional number` - - The Unix timestamp (in seconds) for when the fine-tuning job is estimated to finish. The value will be null if the fine-tuning job is not running. - - - `integrations: optional array of FineTuningJobWandbIntegrationObject` - - A list of integrations to enable for this fine-tuning job. - - - `type: "wandb"` - - The type of the integration being enabled for the fine-tuning job - - - `wandb: object { project, entity, name, tags }` - - The settings for your integration with Weights and Biases. This payload specifies the project that - metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags - to your run, and set a default entity (team, username, etc) to be associated with your run. - - - `project: string` - - The name of the project that the new run will be created under. - - - `entity: optional string` - - The entity to use for the run. This allows you to set the team or username of the WandB user that you would - like associated with the run. If not set, the default entity for the registered WandB API key is used. - - - `name: optional string` - - A display name to set for the run. If not set, we will use the Job ID as the name. - - - `tags: optional array of string` - - A list of tags to be attached to the newly created run. These tags are passed through directly to WandB. Some - default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". - - - `metadata: optional map[string]` - - Set of 16 key-value pairs that can be attached to an object. This can be - useful for storing additional information about the object in a structured - format, and querying for objects via API or the dashboard. - - Keys are strings with a maximum length of 64 characters. Values are strings - with a maximum length of 512 characters. - - - `method: optional object { type, dpo, reinforcement, supervised }` - - The method used for fine-tuning. - - - `type: "supervised" or "dpo" or "reinforcement"` - - The type of method. Is either `supervised`, `dpo`, or `reinforcement`. - - - `"supervised"` - - - `"dpo"` - - - `"reinforcement"` - - - `dpo: optional object { hyperparameters }` - - Configuration for the DPO fine-tuning method. - - - `hyperparameters: optional object { batch_size, beta, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the DPO fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `beta: optional "auto" or number` - - The beta value for the DPO method. A higher beta value will increase the weight of the penalty between the policy and reference model. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reinforcement: optional object { grader, hyperparameters }` - - Configuration for the reinforcement fine-tuning method. - - - `grader: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - The grader used for the fine-tuning job. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `input: string` - - The input text. This may include template strings. - - - `name: string` - - The name of the grader. - - - `operation: "eq" or "ne" or "like" or "ilike"` - - The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`. - - - `"eq"` - - - `"ne"` - - - `"like"` - - - `"ilike"` - - - `reference: string` - - The reference text. This may include template strings. - - - `type: "string_check"` - - The object type, which is always `string_check`. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `evaluation_metric: "cosine" or "fuzzy_match" or "bleu" or 8 more` - - The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, - `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, - or `rouge_l`. - - - `"cosine"` - - - `"fuzzy_match"` - - - `"bleu"` - - - `"gleu"` - - - `"meteor"` - - - `"rouge_1"` - - - `"rouge_2"` - - - `"rouge_3"` - - - `"rouge_4"` - - - `"rouge_5"` - - - `"rouge_l"` - - - `input: string` - - The text being graded. - - - `name: string` - - The name of the grader. - - - `reference: string` - - The text being graded against. - - - `type: "text_similarity"` - - The type of grader. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `name: string` - - The name of the grader. - - - `source: string` - - The source code of the python script. - - - `type: "python"` - - The object type, which is always `python`. - - - `image_tag: optional string` - - The image tag to use for the python script. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `input: array of object { content, role, type }` - - The input messages evaluated by the grader. Supports text, output text, input image, and input audio content blocks, and may include template strings. - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `text: string` - - The text input to the model. - - - `type: "input_text"` - - The type of the input item. Always `input_text`. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `input_audio: object { data, format }` - - - `data: string` - - Base64-encoded audio data. - - - `format: "mp3" or "wav"` - - The format of the audio data. Currently supported formats are `mp3` and - `wav`. - - - `"mp3"` - - - `"wav"` - - - `type: "input_audio"` - - The type of the input item. Always `input_audio`. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `model: string` - - The model to use for the evaluation. - - - `name: string` - - The name of the grader. - - - `type: "score_model"` - - The object type, which is always `score_model`. - - - `range: optional array of number` - - The range of the score. Defaults to `[0, 1]`. - - - `sampling_params: optional object { max_completions_tokens, reasoning_effort, seed, 2 more }` - - The sampling parameters for the model. - - - `max_completions_tokens: optional number` - - The maximum number of tokens the grader model may generate in its response. - - - `reasoning_effort: optional "none" or "minimal" or "low" or 3 more` - - Constrains effort on reasoning for - [reasoning models](https://platform.openai.com/docs/guides/reasoning). - Currently supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. Reducing - reasoning effort can result in faster responses and fewer tokens used - on reasoning in a response. - - - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool calls are supported for all reasoning values in gpt-5.1. - - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. - - `xhigh` is supported for all models after `gpt-5.1-codex-max`. - - - `"none"` - - - `"minimal"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `"xhigh"` - - - `seed: optional number` - - A seed value to initialize the randomness, during sampling. - - - `temperature: optional number` - - A higher temperature increases randomness in the outputs. - - - `top_p: optional number` - - An alternative to temperature for nucleus sampling; 1.0 includes all tokens. - - - `multi_grader: object { calculate_output, graders, name, type }` - - A MultiGrader object combines the output of multiple graders to produce a single score. - - - `calculate_output: string` - - A formula to calculate the output based on grader results. - - - `graders: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `label_model_grader: object { input, labels, model, 3 more }` - - A LabelModelGrader object which uses a model to assign labels to each item - in the evaluation. - - - `input: array of object { content, role, type }` - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `labels: array of string` - - The labels to assign to each item in the evaluation. - - - `model: string` - - The model to use for the evaluation. Must support structured outputs. - - - `name: string` - - The name of the grader. - - - `passing_labels: array of string` - - The labels that indicate a passing result. Must be a subset of labels. - - - `type: "label_model"` - - The object type, which is always `label_model`. - - - `name: string` - - The name of the grader. - - - `type: "multi"` - - The object type, which is always `multi`. - - - `hyperparameters: optional object { batch_size, compute_multiplier, eval_interval, 4 more }` - - The hyperparameters used for the reinforcement fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `compute_multiplier: optional "auto" or number` - - Multiplier on amount of compute used for exploring search space during training. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_interval: optional "auto" or number` - - The number of training steps between evaluation runs. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_samples: optional "auto" or number` - - Number of evaluation samples to generate per training step. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reasoning_effort: optional "default" or "low" or "medium" or "high"` - - Level of reasoning effort. - - - `"default"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `supervised: optional object { hyperparameters }` - - Configuration for the supervised fine-tuning method. - - - `hyperparameters: optional object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - -### Example - -```cli -openai fine-tuning:jobs retrieve \ - --api-key 'My API Key' \ - --fine-tuning-job-id ft-AF1WoRqd3aJAHsqc9NY7iL8F -``` - -#### Response - -```json -{ - "id": "id", - "created_at": 0, - "error": { - "code": "code", - "message": "message", - "param": "param" - }, - "fine_tuned_model": "fine_tuned_model", - "finished_at": 0, - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - }, - "model": "model", - "object": "fine_tuning.job", - "organization_id": "organization_id", - "result_files": [ - "file-abc123" - ], - "seed": 0, - "status": "validating_files", - "trained_tokens": 0, - "training_file": "training_file", - "validation_file": "validation_file", - "estimated_finish": 0, - "integrations": [ - { - "type": "wandb", - "wandb": { - "project": "my-wandb-project", - "entity": "entity", - "name": "name", - "tags": [ - "custom-tag" - ] - } - } - ], - "metadata": { - "foo": "string" - }, - "method": { - "type": "supervised", - "dpo": { - "hyperparameters": { - "batch_size": "auto", - "beta": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - } - }, - "reinforcement": { - "grader": { - "input": "input", - "name": "name", - "operation": "eq", - "reference": "reference", - "type": "string_check" - }, - "hyperparameters": { - "batch_size": "auto", - "compute_multiplier": "auto", - "eval_interval": "auto", - "eval_samples": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto", - "reasoning_effort": "default" - } - }, - "supervised": { - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - } - } - } -} -``` - -## List fine-tuning events - -`$ openai fine-tuning:jobs list-events` - -**get** `/fine_tuning/jobs/{fine_tuning_job_id}/events` - -Get status updates for a fine-tuning job. - -### Parameters - -- `--fine-tuning-job-id: string` - - The ID of the fine-tuning job to get events for. - -- `--after: optional string` - - Identifier for the last event from the previous pagination request. - -- `--limit: optional number` - - Number of events to retrieve. - -### Returns - -- `ListFineTuningJobEventsResponse: object { data, has_more, object }` - - - `data: array of FineTuningJobEvent` - - - `id: string` - - The object identifier. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was created. - - - `level: "info" or "warn" or "error"` - - The log level of the event. - - - `"info"` - - - `"warn"` - - - `"error"` - - - `message: string` - - The message of the event. - - - `object: "fine_tuning.job.event"` - - The object type, which is always "fine_tuning.job.event". - - - `data: optional unknown` - - The data associated with the event. - - - `type: optional "message" or "metrics"` - - The type of event. - - - `"message"` - - - `"metrics"` - - - `has_more: boolean` - - - `object: "list"` - -### Example - -```cli -openai fine-tuning:jobs list-events \ - --api-key 'My API Key' \ - --fine-tuning-job-id ft-AF1WoRqd3aJAHsqc9NY7iL8F -``` - -#### Response - -```json -{ - "data": [ - { - "id": "id", - "created_at": 0, - "level": "info", - "message": "message", - "object": "fine_tuning.job.event", - "data": {}, - "type": "message" - } - ], - "has_more": true, - "object": "list" -} -``` - -## Cancel fine-tuning - -`$ openai fine-tuning:jobs cancel` - -**post** `/fine_tuning/jobs/{fine_tuning_job_id}/cancel` - -Immediately cancel a fine-tune job. - -### Parameters - -- `--fine-tuning-job-id: string` - - The ID of the fine-tuning job to cancel. - -### Returns - -- `fine_tuning_job: object { id, created_at, error, 16 more }` - - The `fine_tuning.job` object represents a fine-tuning job that has been created through the API. - - - `id: string` - - The object identifier, which can be referenced in the API endpoints. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was created. - - - `error: object { code, message, param }` - - For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure. - - - `code: string` - - A machine-readable error code. - - - `message: string` - - A human-readable error message. - - - `param: string` - - The parameter that was invalid, usually `training_file` or `validation_file`. This field will be null if the failure was not parameter-specific. - - - `fine_tuned_model: string` - - The name of the fine-tuned model that is being created. The value will be null if the fine-tuning job is still running. - - - `finished_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was finished. The value will be null if the fine-tuning job is still running. - - - `hyperparameters: object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. This value will only be returned when running `supervised` jobs. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters - are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid - overfitting. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle - through the training dataset. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `model: string` - - The base model that is being fine-tuned. - - - `object: "fine_tuning.job"` - - The object type, which is always "fine_tuning.job". - - - `organization_id: string` - - The organization that owns the fine-tuning job. - - - `result_files: array of string` - - The compiled results file ID(s) for the fine-tuning job. You can retrieve the results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `seed: number` - - The seed used for the fine-tuning job. - - - `status: "validating_files" or "queued" or "running" or 3 more` - - The current status of the fine-tuning job, which can be either `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. - - - `"validating_files"` - - - `"queued"` - - - `"running"` - - - `"succeeded"` - - - `"failed"` - - - `"cancelled"` - - - `trained_tokens: number` - - The total number of billable tokens processed by this fine-tuning job. The value will be null if the fine-tuning job is still running. - - - `training_file: string` - - The file ID used for training. You can retrieve the training data with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `validation_file: string` - - The file ID used for validation. You can retrieve the validation results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `estimated_finish: optional number` - - The Unix timestamp (in seconds) for when the fine-tuning job is estimated to finish. The value will be null if the fine-tuning job is not running. - - - `integrations: optional array of FineTuningJobWandbIntegrationObject` - - A list of integrations to enable for this fine-tuning job. - - - `type: "wandb"` - - The type of the integration being enabled for the fine-tuning job - - - `wandb: object { project, entity, name, tags }` - - The settings for your integration with Weights and Biases. This payload specifies the project that - metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags - to your run, and set a default entity (team, username, etc) to be associated with your run. - - - `project: string` - - The name of the project that the new run will be created under. - - - `entity: optional string` - - The entity to use for the run. This allows you to set the team or username of the WandB user that you would - like associated with the run. If not set, the default entity for the registered WandB API key is used. - - - `name: optional string` - - A display name to set for the run. If not set, we will use the Job ID as the name. - - - `tags: optional array of string` - - A list of tags to be attached to the newly created run. These tags are passed through directly to WandB. Some - default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". - - - `metadata: optional map[string]` - - Set of 16 key-value pairs that can be attached to an object. This can be - useful for storing additional information about the object in a structured - format, and querying for objects via API or the dashboard. - - Keys are strings with a maximum length of 64 characters. Values are strings - with a maximum length of 512 characters. - - - `method: optional object { type, dpo, reinforcement, supervised }` - - The method used for fine-tuning. - - - `type: "supervised" or "dpo" or "reinforcement"` - - The type of method. Is either `supervised`, `dpo`, or `reinforcement`. - - - `"supervised"` - - - `"dpo"` - - - `"reinforcement"` - - - `dpo: optional object { hyperparameters }` - - Configuration for the DPO fine-tuning method. - - - `hyperparameters: optional object { batch_size, beta, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the DPO fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `beta: optional "auto" or number` - - The beta value for the DPO method. A higher beta value will increase the weight of the penalty between the policy and reference model. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reinforcement: optional object { grader, hyperparameters }` - - Configuration for the reinforcement fine-tuning method. - - - `grader: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - The grader used for the fine-tuning job. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `input: string` - - The input text. This may include template strings. - - - `name: string` - - The name of the grader. - - - `operation: "eq" or "ne" or "like" or "ilike"` - - The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`. - - - `"eq"` - - - `"ne"` - - - `"like"` - - - `"ilike"` - - - `reference: string` - - The reference text. This may include template strings. - - - `type: "string_check"` - - The object type, which is always `string_check`. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `evaluation_metric: "cosine" or "fuzzy_match" or "bleu" or 8 more` - - The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, - `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, - or `rouge_l`. - - - `"cosine"` - - - `"fuzzy_match"` - - - `"bleu"` - - - `"gleu"` - - - `"meteor"` - - - `"rouge_1"` - - - `"rouge_2"` - - - `"rouge_3"` - - - `"rouge_4"` - - - `"rouge_5"` - - - `"rouge_l"` - - - `input: string` - - The text being graded. - - - `name: string` - - The name of the grader. - - - `reference: string` - - The text being graded against. - - - `type: "text_similarity"` - - The type of grader. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `name: string` - - The name of the grader. - - - `source: string` - - The source code of the python script. - - - `type: "python"` - - The object type, which is always `python`. - - - `image_tag: optional string` - - The image tag to use for the python script. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `input: array of object { content, role, type }` - - The input messages evaluated by the grader. Supports text, output text, input image, and input audio content blocks, and may include template strings. - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `text: string` - - The text input to the model. - - - `type: "input_text"` - - The type of the input item. Always `input_text`. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `input_audio: object { data, format }` - - - `data: string` - - Base64-encoded audio data. - - - `format: "mp3" or "wav"` - - The format of the audio data. Currently supported formats are `mp3` and - `wav`. - - - `"mp3"` - - - `"wav"` - - - `type: "input_audio"` - - The type of the input item. Always `input_audio`. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `model: string` - - The model to use for the evaluation. - - - `name: string` - - The name of the grader. - - - `type: "score_model"` - - The object type, which is always `score_model`. - - - `range: optional array of number` - - The range of the score. Defaults to `[0, 1]`. - - - `sampling_params: optional object { max_completions_tokens, reasoning_effort, seed, 2 more }` - - The sampling parameters for the model. - - - `max_completions_tokens: optional number` - - The maximum number of tokens the grader model may generate in its response. - - - `reasoning_effort: optional "none" or "minimal" or "low" or 3 more` - - Constrains effort on reasoning for - [reasoning models](https://platform.openai.com/docs/guides/reasoning). - Currently supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. Reducing - reasoning effort can result in faster responses and fewer tokens used - on reasoning in a response. - - - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool calls are supported for all reasoning values in gpt-5.1. - - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. - - `xhigh` is supported for all models after `gpt-5.1-codex-max`. - - - `"none"` - - - `"minimal"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `"xhigh"` - - - `seed: optional number` - - A seed value to initialize the randomness, during sampling. - - - `temperature: optional number` - - A higher temperature increases randomness in the outputs. - - - `top_p: optional number` - - An alternative to temperature for nucleus sampling; 1.0 includes all tokens. - - - `multi_grader: object { calculate_output, graders, name, type }` - - A MultiGrader object combines the output of multiple graders to produce a single score. - - - `calculate_output: string` - - A formula to calculate the output based on grader results. - - - `graders: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `label_model_grader: object { input, labels, model, 3 more }` - - A LabelModelGrader object which uses a model to assign labels to each item - in the evaluation. - - - `input: array of object { content, role, type }` - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `labels: array of string` - - The labels to assign to each item in the evaluation. - - - `model: string` - - The model to use for the evaluation. Must support structured outputs. - - - `name: string` - - The name of the grader. - - - `passing_labels: array of string` - - The labels that indicate a passing result. Must be a subset of labels. - - - `type: "label_model"` - - The object type, which is always `label_model`. - - - `name: string` - - The name of the grader. - - - `type: "multi"` - - The object type, which is always `multi`. - - - `hyperparameters: optional object { batch_size, compute_multiplier, eval_interval, 4 more }` - - The hyperparameters used for the reinforcement fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `compute_multiplier: optional "auto" or number` - - Multiplier on amount of compute used for exploring search space during training. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_interval: optional "auto" or number` - - The number of training steps between evaluation runs. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_samples: optional "auto" or number` - - Number of evaluation samples to generate per training step. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reasoning_effort: optional "default" or "low" or "medium" or "high"` - - Level of reasoning effort. - - - `"default"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `supervised: optional object { hyperparameters }` - - Configuration for the supervised fine-tuning method. - - - `hyperparameters: optional object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - -### Example - -```cli -openai fine-tuning:jobs cancel \ - --api-key 'My API Key' \ - --fine-tuning-job-id ft-AF1WoRqd3aJAHsqc9NY7iL8F -``` - -#### Response - -```json -{ - "id": "id", - "created_at": 0, - "error": { - "code": "code", - "message": "message", - "param": "param" - }, - "fine_tuned_model": "fine_tuned_model", - "finished_at": 0, - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - }, - "model": "model", - "object": "fine_tuning.job", - "organization_id": "organization_id", - "result_files": [ - "file-abc123" - ], - "seed": 0, - "status": "validating_files", - "trained_tokens": 0, - "training_file": "training_file", - "validation_file": "validation_file", - "estimated_finish": 0, - "integrations": [ - { - "type": "wandb", - "wandb": { - "project": "my-wandb-project", - "entity": "entity", - "name": "name", - "tags": [ - "custom-tag" - ] - } - } - ], - "metadata": { - "foo": "string" - }, - "method": { - "type": "supervised", - "dpo": { - "hyperparameters": { - "batch_size": "auto", - "beta": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - } - }, - "reinforcement": { - "grader": { - "input": "input", - "name": "name", - "operation": "eq", - "reference": "reference", - "type": "string_check" - }, - "hyperparameters": { - "batch_size": "auto", - "compute_multiplier": "auto", - "eval_interval": "auto", - "eval_samples": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto", - "reasoning_effort": "default" - } - }, - "supervised": { - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - } - } - } -} -``` - -## Pause fine-tuning - -`$ openai fine-tuning:jobs pause` - -**post** `/fine_tuning/jobs/{fine_tuning_job_id}/pause` - -Pause a fine-tune job. - -### Parameters - -- `--fine-tuning-job-id: string` - - The ID of the fine-tuning job to pause. - -### Returns - -- `fine_tuning_job: object { id, created_at, error, 16 more }` - - The `fine_tuning.job` object represents a fine-tuning job that has been created through the API. - - - `id: string` - - The object identifier, which can be referenced in the API endpoints. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was created. - - - `error: object { code, message, param }` - - For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure. - - - `code: string` - - A machine-readable error code. - - - `message: string` - - A human-readable error message. - - - `param: string` - - The parameter that was invalid, usually `training_file` or `validation_file`. This field will be null if the failure was not parameter-specific. - - - `fine_tuned_model: string` - - The name of the fine-tuned model that is being created. The value will be null if the fine-tuning job is still running. - - - `finished_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was finished. The value will be null if the fine-tuning job is still running. - - - `hyperparameters: object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. This value will only be returned when running `supervised` jobs. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters - are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid - overfitting. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle - through the training dataset. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `model: string` - - The base model that is being fine-tuned. - - - `object: "fine_tuning.job"` - - The object type, which is always "fine_tuning.job". - - - `organization_id: string` - - The organization that owns the fine-tuning job. - - - `result_files: array of string` - - The compiled results file ID(s) for the fine-tuning job. You can retrieve the results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `seed: number` - - The seed used for the fine-tuning job. - - - `status: "validating_files" or "queued" or "running" or 3 more` - - The current status of the fine-tuning job, which can be either `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. - - - `"validating_files"` - - - `"queued"` - - - `"running"` - - - `"succeeded"` - - - `"failed"` - - - `"cancelled"` - - - `trained_tokens: number` - - The total number of billable tokens processed by this fine-tuning job. The value will be null if the fine-tuning job is still running. - - - `training_file: string` - - The file ID used for training. You can retrieve the training data with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `validation_file: string` - - The file ID used for validation. You can retrieve the validation results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `estimated_finish: optional number` - - The Unix timestamp (in seconds) for when the fine-tuning job is estimated to finish. The value will be null if the fine-tuning job is not running. - - - `integrations: optional array of FineTuningJobWandbIntegrationObject` - - A list of integrations to enable for this fine-tuning job. - - - `type: "wandb"` - - The type of the integration being enabled for the fine-tuning job - - - `wandb: object { project, entity, name, tags }` - - The settings for your integration with Weights and Biases. This payload specifies the project that - metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags - to your run, and set a default entity (team, username, etc) to be associated with your run. - - - `project: string` - - The name of the project that the new run will be created under. - - - `entity: optional string` - - The entity to use for the run. This allows you to set the team or username of the WandB user that you would - like associated with the run. If not set, the default entity for the registered WandB API key is used. - - - `name: optional string` - - A display name to set for the run. If not set, we will use the Job ID as the name. - - - `tags: optional array of string` - - A list of tags to be attached to the newly created run. These tags are passed through directly to WandB. Some - default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". - - - `metadata: optional map[string]` - - Set of 16 key-value pairs that can be attached to an object. This can be - useful for storing additional information about the object in a structured - format, and querying for objects via API or the dashboard. - - Keys are strings with a maximum length of 64 characters. Values are strings - with a maximum length of 512 characters. - - - `method: optional object { type, dpo, reinforcement, supervised }` - - The method used for fine-tuning. - - - `type: "supervised" or "dpo" or "reinforcement"` - - The type of method. Is either `supervised`, `dpo`, or `reinforcement`. - - - `"supervised"` - - - `"dpo"` - - - `"reinforcement"` - - - `dpo: optional object { hyperparameters }` - - Configuration for the DPO fine-tuning method. - - - `hyperparameters: optional object { batch_size, beta, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the DPO fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `beta: optional "auto" or number` - - The beta value for the DPO method. A higher beta value will increase the weight of the penalty between the policy and reference model. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reinforcement: optional object { grader, hyperparameters }` - - Configuration for the reinforcement fine-tuning method. - - - `grader: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - The grader used for the fine-tuning job. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `input: string` - - The input text. This may include template strings. - - - `name: string` - - The name of the grader. - - - `operation: "eq" or "ne" or "like" or "ilike"` - - The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`. - - - `"eq"` - - - `"ne"` - - - `"like"` - - - `"ilike"` - - - `reference: string` - - The reference text. This may include template strings. - - - `type: "string_check"` - - The object type, which is always `string_check`. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `evaluation_metric: "cosine" or "fuzzy_match" or "bleu" or 8 more` - - The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, - `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, - or `rouge_l`. - - - `"cosine"` - - - `"fuzzy_match"` - - - `"bleu"` - - - `"gleu"` - - - `"meteor"` - - - `"rouge_1"` - - - `"rouge_2"` - - - `"rouge_3"` - - - `"rouge_4"` - - - `"rouge_5"` - - - `"rouge_l"` - - - `input: string` - - The text being graded. - - - `name: string` - - The name of the grader. - - - `reference: string` - - The text being graded against. - - - `type: "text_similarity"` - - The type of grader. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `name: string` - - The name of the grader. - - - `source: string` - - The source code of the python script. - - - `type: "python"` - - The object type, which is always `python`. - - - `image_tag: optional string` - - The image tag to use for the python script. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `input: array of object { content, role, type }` - - The input messages evaluated by the grader. Supports text, output text, input image, and input audio content blocks, and may include template strings. - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `text: string` - - The text input to the model. - - - `type: "input_text"` - - The type of the input item. Always `input_text`. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `input_audio: object { data, format }` - - - `data: string` - - Base64-encoded audio data. - - - `format: "mp3" or "wav"` - - The format of the audio data. Currently supported formats are `mp3` and - `wav`. - - - `"mp3"` - - - `"wav"` - - - `type: "input_audio"` - - The type of the input item. Always `input_audio`. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `model: string` - - The model to use for the evaluation. - - - `name: string` - - The name of the grader. - - - `type: "score_model"` - - The object type, which is always `score_model`. - - - `range: optional array of number` - - The range of the score. Defaults to `[0, 1]`. - - - `sampling_params: optional object { max_completions_tokens, reasoning_effort, seed, 2 more }` - - The sampling parameters for the model. - - - `max_completions_tokens: optional number` - - The maximum number of tokens the grader model may generate in its response. - - - `reasoning_effort: optional "none" or "minimal" or "low" or 3 more` - - Constrains effort on reasoning for - [reasoning models](https://platform.openai.com/docs/guides/reasoning). - Currently supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. Reducing - reasoning effort can result in faster responses and fewer tokens used - on reasoning in a response. - - - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool calls are supported for all reasoning values in gpt-5.1. - - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. - - `xhigh` is supported for all models after `gpt-5.1-codex-max`. - - - `"none"` - - - `"minimal"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `"xhigh"` - - - `seed: optional number` - - A seed value to initialize the randomness, during sampling. - - - `temperature: optional number` - - A higher temperature increases randomness in the outputs. - - - `top_p: optional number` - - An alternative to temperature for nucleus sampling; 1.0 includes all tokens. - - - `multi_grader: object { calculate_output, graders, name, type }` - - A MultiGrader object combines the output of multiple graders to produce a single score. - - - `calculate_output: string` - - A formula to calculate the output based on grader results. - - - `graders: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `label_model_grader: object { input, labels, model, 3 more }` - - A LabelModelGrader object which uses a model to assign labels to each item - in the evaluation. - - - `input: array of object { content, role, type }` - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `labels: array of string` - - The labels to assign to each item in the evaluation. - - - `model: string` - - The model to use for the evaluation. Must support structured outputs. - - - `name: string` - - The name of the grader. - - - `passing_labels: array of string` - - The labels that indicate a passing result. Must be a subset of labels. - - - `type: "label_model"` - - The object type, which is always `label_model`. - - - `name: string` - - The name of the grader. - - - `type: "multi"` - - The object type, which is always `multi`. - - - `hyperparameters: optional object { batch_size, compute_multiplier, eval_interval, 4 more }` - - The hyperparameters used for the reinforcement fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `compute_multiplier: optional "auto" or number` - - Multiplier on amount of compute used for exploring search space during training. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_interval: optional "auto" or number` - - The number of training steps between evaluation runs. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_samples: optional "auto" or number` - - Number of evaluation samples to generate per training step. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reasoning_effort: optional "default" or "low" or "medium" or "high"` - - Level of reasoning effort. - - - `"default"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `supervised: optional object { hyperparameters }` - - Configuration for the supervised fine-tuning method. - - - `hyperparameters: optional object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - -### Example - -```cli -openai fine-tuning:jobs pause \ - --api-key 'My API Key' \ - --fine-tuning-job-id ft-AF1WoRqd3aJAHsqc9NY7iL8F -``` - -#### Response - -```json -{ - "id": "id", - "created_at": 0, - "error": { - "code": "code", - "message": "message", - "param": "param" - }, - "fine_tuned_model": "fine_tuned_model", - "finished_at": 0, - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - }, - "model": "model", - "object": "fine_tuning.job", - "organization_id": "organization_id", - "result_files": [ - "file-abc123" - ], - "seed": 0, - "status": "validating_files", - "trained_tokens": 0, - "training_file": "training_file", - "validation_file": "validation_file", - "estimated_finish": 0, - "integrations": [ - { - "type": "wandb", - "wandb": { - "project": "my-wandb-project", - "entity": "entity", - "name": "name", - "tags": [ - "custom-tag" - ] - } - } - ], - "metadata": { - "foo": "string" - }, - "method": { - "type": "supervised", - "dpo": { - "hyperparameters": { - "batch_size": "auto", - "beta": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - } - }, - "reinforcement": { - "grader": { - "input": "input", - "name": "name", - "operation": "eq", - "reference": "reference", - "type": "string_check" - }, - "hyperparameters": { - "batch_size": "auto", - "compute_multiplier": "auto", - "eval_interval": "auto", - "eval_samples": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto", - "reasoning_effort": "default" - } - }, - "supervised": { - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - } - } - } -} -``` - -## Resume fine-tuning - -`$ openai fine-tuning:jobs resume` - -**post** `/fine_tuning/jobs/{fine_tuning_job_id}/resume` - -Resume a fine-tune job. - -### Parameters - -- `--fine-tuning-job-id: string` - - The ID of the fine-tuning job to resume. - -### Returns - -- `fine_tuning_job: object { id, created_at, error, 16 more }` - - The `fine_tuning.job` object represents a fine-tuning job that has been created through the API. - - - `id: string` - - The object identifier, which can be referenced in the API endpoints. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was created. - - - `error: object { code, message, param }` - - For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure. - - - `code: string` - - A machine-readable error code. - - - `message: string` - - A human-readable error message. - - - `param: string` - - The parameter that was invalid, usually `training_file` or `validation_file`. This field will be null if the failure was not parameter-specific. - - - `fine_tuned_model: string` - - The name of the fine-tuned model that is being created. The value will be null if the fine-tuning job is still running. - - - `finished_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was finished. The value will be null if the fine-tuning job is still running. - - - `hyperparameters: object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. This value will only be returned when running `supervised` jobs. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters - are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid - overfitting. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle - through the training dataset. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `model: string` - - The base model that is being fine-tuned. - - - `object: "fine_tuning.job"` - - The object type, which is always "fine_tuning.job". - - - `organization_id: string` - - The organization that owns the fine-tuning job. - - - `result_files: array of string` - - The compiled results file ID(s) for the fine-tuning job. You can retrieve the results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `seed: number` - - The seed used for the fine-tuning job. - - - `status: "validating_files" or "queued" or "running" or 3 more` - - The current status of the fine-tuning job, which can be either `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. - - - `"validating_files"` - - - `"queued"` - - - `"running"` - - - `"succeeded"` - - - `"failed"` - - - `"cancelled"` - - - `trained_tokens: number` - - The total number of billable tokens processed by this fine-tuning job. The value will be null if the fine-tuning job is still running. - - - `training_file: string` - - The file ID used for training. You can retrieve the training data with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `validation_file: string` - - The file ID used for validation. You can retrieve the validation results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `estimated_finish: optional number` - - The Unix timestamp (in seconds) for when the fine-tuning job is estimated to finish. The value will be null if the fine-tuning job is not running. - - - `integrations: optional array of FineTuningJobWandbIntegrationObject` - - A list of integrations to enable for this fine-tuning job. - - - `type: "wandb"` - - The type of the integration being enabled for the fine-tuning job - - - `wandb: object { project, entity, name, tags }` - - The settings for your integration with Weights and Biases. This payload specifies the project that - metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags - to your run, and set a default entity (team, username, etc) to be associated with your run. - - - `project: string` - - The name of the project that the new run will be created under. - - - `entity: optional string` - - The entity to use for the run. This allows you to set the team or username of the WandB user that you would - like associated with the run. If not set, the default entity for the registered WandB API key is used. - - - `name: optional string` - - A display name to set for the run. If not set, we will use the Job ID as the name. - - - `tags: optional array of string` - - A list of tags to be attached to the newly created run. These tags are passed through directly to WandB. Some - default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". - - - `metadata: optional map[string]` - - Set of 16 key-value pairs that can be attached to an object. This can be - useful for storing additional information about the object in a structured - format, and querying for objects via API or the dashboard. - - Keys are strings with a maximum length of 64 characters. Values are strings - with a maximum length of 512 characters. - - - `method: optional object { type, dpo, reinforcement, supervised }` - - The method used for fine-tuning. - - - `type: "supervised" or "dpo" or "reinforcement"` - - The type of method. Is either `supervised`, `dpo`, or `reinforcement`. - - - `"supervised"` - - - `"dpo"` - - - `"reinforcement"` - - - `dpo: optional object { hyperparameters }` - - Configuration for the DPO fine-tuning method. - - - `hyperparameters: optional object { batch_size, beta, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the DPO fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `beta: optional "auto" or number` - - The beta value for the DPO method. A higher beta value will increase the weight of the penalty between the policy and reference model. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reinforcement: optional object { grader, hyperparameters }` - - Configuration for the reinforcement fine-tuning method. - - - `grader: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - The grader used for the fine-tuning job. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `input: string` - - The input text. This may include template strings. - - - `name: string` - - The name of the grader. - - - `operation: "eq" or "ne" or "like" or "ilike"` - - The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`. - - - `"eq"` - - - `"ne"` - - - `"like"` - - - `"ilike"` - - - `reference: string` - - The reference text. This may include template strings. - - - `type: "string_check"` - - The object type, which is always `string_check`. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `evaluation_metric: "cosine" or "fuzzy_match" or "bleu" or 8 more` - - The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, - `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, - or `rouge_l`. - - - `"cosine"` - - - `"fuzzy_match"` - - - `"bleu"` - - - `"gleu"` - - - `"meteor"` - - - `"rouge_1"` - - - `"rouge_2"` - - - `"rouge_3"` - - - `"rouge_4"` - - - `"rouge_5"` - - - `"rouge_l"` - - - `input: string` - - The text being graded. - - - `name: string` - - The name of the grader. - - - `reference: string` - - The text being graded against. - - - `type: "text_similarity"` - - The type of grader. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `name: string` - - The name of the grader. - - - `source: string` - - The source code of the python script. - - - `type: "python"` - - The object type, which is always `python`. - - - `image_tag: optional string` - - The image tag to use for the python script. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `input: array of object { content, role, type }` - - The input messages evaluated by the grader. Supports text, output text, input image, and input audio content blocks, and may include template strings. - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `text: string` - - The text input to the model. - - - `type: "input_text"` - - The type of the input item. Always `input_text`. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `input_audio: object { data, format }` - - - `data: string` - - Base64-encoded audio data. - - - `format: "mp3" or "wav"` - - The format of the audio data. Currently supported formats are `mp3` and - `wav`. - - - `"mp3"` - - - `"wav"` - - - `type: "input_audio"` - - The type of the input item. Always `input_audio`. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `model: string` - - The model to use for the evaluation. - - - `name: string` - - The name of the grader. - - - `type: "score_model"` - - The object type, which is always `score_model`. - - - `range: optional array of number` - - The range of the score. Defaults to `[0, 1]`. - - - `sampling_params: optional object { max_completions_tokens, reasoning_effort, seed, 2 more }` - - The sampling parameters for the model. - - - `max_completions_tokens: optional number` - - The maximum number of tokens the grader model may generate in its response. - - - `reasoning_effort: optional "none" or "minimal" or "low" or 3 more` - - Constrains effort on reasoning for - [reasoning models](https://platform.openai.com/docs/guides/reasoning). - Currently supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. Reducing - reasoning effort can result in faster responses and fewer tokens used - on reasoning in a response. - - - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool calls are supported for all reasoning values in gpt-5.1. - - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. - - `xhigh` is supported for all models after `gpt-5.1-codex-max`. - - - `"none"` - - - `"minimal"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `"xhigh"` - - - `seed: optional number` - - A seed value to initialize the randomness, during sampling. - - - `temperature: optional number` - - A higher temperature increases randomness in the outputs. - - - `top_p: optional number` - - An alternative to temperature for nucleus sampling; 1.0 includes all tokens. - - - `multi_grader: object { calculate_output, graders, name, type }` - - A MultiGrader object combines the output of multiple graders to produce a single score. - - - `calculate_output: string` - - A formula to calculate the output based on grader results. - - - `graders: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `label_model_grader: object { input, labels, model, 3 more }` - - A LabelModelGrader object which uses a model to assign labels to each item - in the evaluation. - - - `input: array of object { content, role, type }` - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `labels: array of string` - - The labels to assign to each item in the evaluation. - - - `model: string` - - The model to use for the evaluation. Must support structured outputs. - - - `name: string` - - The name of the grader. - - - `passing_labels: array of string` - - The labels that indicate a passing result. Must be a subset of labels. - - - `type: "label_model"` - - The object type, which is always `label_model`. - - - `name: string` - - The name of the grader. - - - `type: "multi"` - - The object type, which is always `multi`. - - - `hyperparameters: optional object { batch_size, compute_multiplier, eval_interval, 4 more }` - - The hyperparameters used for the reinforcement fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `compute_multiplier: optional "auto" or number` - - Multiplier on amount of compute used for exploring search space during training. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_interval: optional "auto" or number` - - The number of training steps between evaluation runs. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_samples: optional "auto" or number` - - Number of evaluation samples to generate per training step. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reasoning_effort: optional "default" or "low" or "medium" or "high"` - - Level of reasoning effort. - - - `"default"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `supervised: optional object { hyperparameters }` - - Configuration for the supervised fine-tuning method. - - - `hyperparameters: optional object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - -### Example - -```cli -openai fine-tuning:jobs resume \ - --api-key 'My API Key' \ - --fine-tuning-job-id ft-AF1WoRqd3aJAHsqc9NY7iL8F -``` - -#### Response - -```json -{ - "id": "id", - "created_at": 0, - "error": { - "code": "code", - "message": "message", - "param": "param" - }, - "fine_tuned_model": "fine_tuned_model", - "finished_at": 0, - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - }, - "model": "model", - "object": "fine_tuning.job", - "organization_id": "organization_id", - "result_files": [ - "file-abc123" - ], - "seed": 0, - "status": "validating_files", - "trained_tokens": 0, - "training_file": "training_file", - "validation_file": "validation_file", - "estimated_finish": 0, - "integrations": [ - { - "type": "wandb", - "wandb": { - "project": "my-wandb-project", - "entity": "entity", - "name": "name", - "tags": [ - "custom-tag" - ] - } - } - ], - "metadata": { - "foo": "string" - }, - "method": { - "type": "supervised", - "dpo": { - "hyperparameters": { - "batch_size": "auto", - "beta": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - } - }, - "reinforcement": { - "grader": { - "input": "input", - "name": "name", - "operation": "eq", - "reference": "reference", - "type": "string_check" - }, - "hyperparameters": { - "batch_size": "auto", - "compute_multiplier": "auto", - "eval_interval": "auto", - "eval_samples": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto", - "reasoning_effort": "default" - } - }, - "supervised": { - "hyperparameters": { - "batch_size": "auto", - "learning_rate_multiplier": "auto", - "n_epochs": "auto" - } - } - } -} -``` - -## Domain Types - -### Fine Tuning Job - -- `fine_tuning_job: object { id, created_at, error, 16 more }` - - The `fine_tuning.job` object represents a fine-tuning job that has been created through the API. - - - `id: string` - - The object identifier, which can be referenced in the API endpoints. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was created. - - - `error: object { code, message, param }` - - For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure. - - - `code: string` - - A machine-readable error code. - - - `message: string` - - A human-readable error message. - - - `param: string` - - The parameter that was invalid, usually `training_file` or `validation_file`. This field will be null if the failure was not parameter-specific. - - - `fine_tuned_model: string` - - The name of the fine-tuned model that is being created. The value will be null if the fine-tuning job is still running. - - - `finished_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was finished. The value will be null if the fine-tuning job is still running. - - - `hyperparameters: object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. This value will only be returned when running `supervised` jobs. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters - are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid - overfitting. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle - through the training dataset. - - - `Auto: "auto"` - - - `union_member_1: number` - - - `model: string` - - The base model that is being fine-tuned. - - - `object: "fine_tuning.job"` - - The object type, which is always "fine_tuning.job". - - - `organization_id: string` - - The organization that owns the fine-tuning job. - - - `result_files: array of string` - - The compiled results file ID(s) for the fine-tuning job. You can retrieve the results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `seed: number` - - The seed used for the fine-tuning job. - - - `status: "validating_files" or "queued" or "running" or 3 more` - - The current status of the fine-tuning job, which can be either `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. - - - `"validating_files"` - - - `"queued"` - - - `"running"` - - - `"succeeded"` - - - `"failed"` - - - `"cancelled"` - - - `trained_tokens: number` - - The total number of billable tokens processed by this fine-tuning job. The value will be null if the fine-tuning job is still running. - - - `training_file: string` - - The file ID used for training. You can retrieve the training data with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `validation_file: string` - - The file ID used for validation. You can retrieve the validation results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - - - `estimated_finish: optional number` - - The Unix timestamp (in seconds) for when the fine-tuning job is estimated to finish. The value will be null if the fine-tuning job is not running. - - - `integrations: optional array of FineTuningJobWandbIntegrationObject` - - A list of integrations to enable for this fine-tuning job. - - - `type: "wandb"` - - The type of the integration being enabled for the fine-tuning job - - - `wandb: object { project, entity, name, tags }` - - The settings for your integration with Weights and Biases. This payload specifies the project that - metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags - to your run, and set a default entity (team, username, etc) to be associated with your run. - - - `project: string` - - The name of the project that the new run will be created under. - - - `entity: optional string` - - The entity to use for the run. This allows you to set the team or username of the WandB user that you would - like associated with the run. If not set, the default entity for the registered WandB API key is used. - - - `name: optional string` - - A display name to set for the run. If not set, we will use the Job ID as the name. - - - `tags: optional array of string` - - A list of tags to be attached to the newly created run. These tags are passed through directly to WandB. Some - default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". - - - `metadata: optional map[string]` - - Set of 16 key-value pairs that can be attached to an object. This can be - useful for storing additional information about the object in a structured - format, and querying for objects via API or the dashboard. - - Keys are strings with a maximum length of 64 characters. Values are strings - with a maximum length of 512 characters. - - - `method: optional object { type, dpo, reinforcement, supervised }` - - The method used for fine-tuning. - - - `type: "supervised" or "dpo" or "reinforcement"` - - The type of method. Is either `supervised`, `dpo`, or `reinforcement`. - - - `"supervised"` - - - `"dpo"` - - - `"reinforcement"` - - - `dpo: optional object { hyperparameters }` - - Configuration for the DPO fine-tuning method. - - - `hyperparameters: optional object { batch_size, beta, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the DPO fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `beta: optional "auto" or number` - - The beta value for the DPO method. A higher beta value will increase the weight of the penalty between the policy and reference model. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reinforcement: optional object { grader, hyperparameters }` - - Configuration for the reinforcement fine-tuning method. - - - `grader: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - The grader used for the fine-tuning job. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `input: string` - - The input text. This may include template strings. - - - `name: string` - - The name of the grader. - - - `operation: "eq" or "ne" or "like" or "ilike"` - - The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`. - - - `"eq"` - - - `"ne"` - - - `"like"` - - - `"ilike"` - - - `reference: string` - - The reference text. This may include template strings. - - - `type: "string_check"` - - The object type, which is always `string_check`. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `evaluation_metric: "cosine" or "fuzzy_match" or "bleu" or 8 more` - - The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, - `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, - or `rouge_l`. - - - `"cosine"` - - - `"fuzzy_match"` - - - `"bleu"` - - - `"gleu"` - - - `"meteor"` - - - `"rouge_1"` - - - `"rouge_2"` - - - `"rouge_3"` - - - `"rouge_4"` - - - `"rouge_5"` - - - `"rouge_l"` - - - `input: string` - - The text being graded. - - - `name: string` - - The name of the grader. - - - `reference: string` - - The text being graded against. - - - `type: "text_similarity"` - - The type of grader. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `name: string` - - The name of the grader. - - - `source: string` - - The source code of the python script. - - - `type: "python"` - - The object type, which is always `python`. - - - `image_tag: optional string` - - The image tag to use for the python script. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `input: array of object { content, role, type }` - - The input messages evaluated by the grader. Supports text, output text, input image, and input audio content blocks, and may include template strings. - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `text: string` - - The text input to the model. - - - `type: "input_text"` - - The type of the input item. Always `input_text`. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `input_audio: object { data, format }` - - - `data: string` - - Base64-encoded audio data. - - - `format: "mp3" or "wav"` - - The format of the audio data. Currently supported formats are `mp3` and - `wav`. - - - `"mp3"` - - - `"wav"` - - - `type: "input_audio"` - - The type of the input item. Always `input_audio`. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `model: string` - - The model to use for the evaluation. - - - `name: string` - - The name of the grader. - - - `type: "score_model"` - - The object type, which is always `score_model`. - - - `range: optional array of number` - - The range of the score. Defaults to `[0, 1]`. - - - `sampling_params: optional object { max_completions_tokens, reasoning_effort, seed, 2 more }` - - The sampling parameters for the model. - - - `max_completions_tokens: optional number` - - The maximum number of tokens the grader model may generate in its response. - - - `reasoning_effort: optional "none" or "minimal" or "low" or 3 more` - - Constrains effort on reasoning for - [reasoning models](https://platform.openai.com/docs/guides/reasoning). - Currently supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. Reducing - reasoning effort can result in faster responses and fewer tokens used - on reasoning in a response. - - - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool calls are supported for all reasoning values in gpt-5.1. - - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. - - `xhigh` is supported for all models after `gpt-5.1-codex-max`. - - - `"none"` - - - `"minimal"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `"xhigh"` - - - `seed: optional number` - - A seed value to initialize the randomness, during sampling. - - - `temperature: optional number` - - A higher temperature increases randomness in the outputs. - - - `top_p: optional number` - - An alternative to temperature for nucleus sampling; 1.0 includes all tokens. - - - `multi_grader: object { calculate_output, graders, name, type }` - - A MultiGrader object combines the output of multiple graders to produce a single score. - - - `calculate_output: string` - - A formula to calculate the output based on grader results. - - - `graders: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `label_model_grader: object { input, labels, model, 3 more }` - - A LabelModelGrader object which uses a model to assign labels to each item - in the evaluation. - - - `input: array of object { content, role, type }` - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `labels: array of string` - - The labels to assign to each item in the evaluation. - - - `model: string` - - The model to use for the evaluation. Must support structured outputs. - - - `name: string` - - The name of the grader. - - - `passing_labels: array of string` - - The labels that indicate a passing result. Must be a subset of labels. - - - `type: "label_model"` - - The object type, which is always `label_model`. - - - `name: string` - - The name of the grader. - - - `type: "multi"` - - The object type, which is always `multi`. - - - `hyperparameters: optional object { batch_size, compute_multiplier, eval_interval, 4 more }` - - The hyperparameters used for the reinforcement fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `compute_multiplier: optional "auto" or number` - - Multiplier on amount of compute used for exploring search space during training. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_interval: optional "auto" or number` - - The number of training steps between evaluation runs. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `eval_samples: optional "auto" or number` - - Number of evaluation samples to generate per training step. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `reasoning_effort: optional "default" or "low" or "medium" or "high"` - - Level of reasoning effort. - - - `"default"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `supervised: optional object { hyperparameters }` - - Configuration for the supervised fine-tuning method. - - - `hyperparameters: optional object { batch_size, learning_rate_multiplier, n_epochs }` - - The hyperparameters used for the fine-tuning job. - - - `batch_size: optional "auto" or number` - - Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `learning_rate_multiplier: optional "auto" or number` - - Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. - - - `union_member_0: "auto"` - - - `union_member_1: number` - - - `n_epochs: optional "auto" or number` - - The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - - `union_member_0: "auto"` - - - `union_member_1: number` - -### Fine Tuning Job Event - -- `fine_tuning_job_event: object { id, created_at, level, 4 more }` - - Fine-tuning job event object - - - `id: string` - - The object identifier. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the fine-tuning job was created. - - - `level: "info" or "warn" or "error"` - - The log level of the event. - - - `"info"` - - - `"warn"` - - - `"error"` - - - `message: string` - - The message of the event. - - - `object: "fine_tuning.job.event"` - - The object type, which is always "fine_tuning.job.event". - - - `data: optional unknown` - - The data associated with the event. - - - `type: optional "message" or "metrics"` - - The type of event. - - - `"message"` - - - `"metrics"` - -### Fine Tuning Job Wandb Integration - -- `fine_tuning_job_wandb_integration: object { project, entity, name, tags }` - - The settings for your integration with Weights and Biases. This payload specifies the project that - metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags - to your run, and set a default entity (team, username, etc) to be associated with your run. - - - `project: string` - - The name of the project that the new run will be created under. - - - `entity: optional string` - - The entity to use for the run. This allows you to set the team or username of the WandB user that you would - like associated with the run. If not set, the default entity for the registered WandB API key is used. - - - `name: optional string` - - A display name to set for the run. If not set, we will use the Job ID as the name. - - - `tags: optional array of string` - - A list of tags to be attached to the newly created run. These tags are passed through directly to WandB. Some - default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". - -### Fine Tuning Job Wandb Integration Object - -- `fine_tuning_job_wandb_integration_object: object { type, wandb }` - - - `type: "wandb"` - - The type of the integration being enabled for the fine-tuning job - - - `wandb: object { project, entity, name, tags }` - - The settings for your integration with Weights and Biases. This payload specifies the project that - metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags - to your run, and set a default entity (team, username, etc) to be associated with your run. - - - `project: string` - - The name of the project that the new run will be created under. - - - `entity: optional string` - - The entity to use for the run. This allows you to set the team or username of the WandB user that you would - like associated with the run. If not set, the default entity for the registered WandB API key is used. - - - `name: optional string` - - A display name to set for the run. If not set, we will use the Job ID as the name. - - - `tags: optional array of string` - - A list of tags to be attached to the newly created run. These tags are passed through directly to WandB. Some - default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". - -# Checkpoints - -## List fine-tuning checkpoints - -`$ openai fine-tuning:jobs:checkpoints list` - -**get** `/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints` - -List checkpoints for a fine-tuning job. - -### Parameters - -- `--fine-tuning-job-id: string` - - The ID of the fine-tuning job to get checkpoints for. - -- `--after: optional string` - - Identifier for the last checkpoint ID from the previous pagination request. - -- `--limit: optional number` - - Number of checkpoints to retrieve. - -### Returns - -- `ListFineTuningJobCheckpointsResponse: object { data, has_more, object, 2 more }` - - - `data: array of FineTuningJobCheckpoint` - - - `id: string` - - The checkpoint identifier, which can be referenced in the API endpoints. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the checkpoint was created. - - - `fine_tuned_model_checkpoint: string` - - The name of the fine-tuned checkpoint model that is created. - - - `fine_tuning_job_id: string` - - The name of the fine-tuning job that this checkpoint was created from. - - - `metrics: object { full_valid_loss, full_valid_mean_token_accuracy, step, 4 more }` - - Metrics at the step number during the fine-tuning job. - - - `full_valid_loss: optional number` - - - `full_valid_mean_token_accuracy: optional number` - - - `step: optional number` - - - `train_loss: optional number` - - - `train_mean_token_accuracy: optional number` - - - `valid_loss: optional number` - - - `valid_mean_token_accuracy: optional number` - - - `object: "fine_tuning.job.checkpoint"` - - The object type, which is always "fine_tuning.job.checkpoint". - - - `step_number: number` - - The step number that the checkpoint was created at. - - - `has_more: boolean` - - - `object: "list"` - - - `first_id: optional string` - - - `last_id: optional string` - -### Example - -```cli -openai fine-tuning:jobs:checkpoints list \ - --api-key 'My API Key' \ - --fine-tuning-job-id ft-AF1WoRqd3aJAHsqc9NY7iL8F -``` - -#### Response - -```json -{ - "data": [ - { - "id": "id", - "created_at": 0, - "fine_tuned_model_checkpoint": "fine_tuned_model_checkpoint", - "fine_tuning_job_id": "fine_tuning_job_id", - "metrics": { - "full_valid_loss": 0, - "full_valid_mean_token_accuracy": 0, - "step": 0, - "train_loss": 0, - "train_mean_token_accuracy": 0, - "valid_loss": 0, - "valid_mean_token_accuracy": 0 - }, - "object": "fine_tuning.job.checkpoint", - "step_number": 0 - } - ], - "has_more": true, - "object": "list", - "first_id": "first_id", - "last_id": "last_id" -} -``` - -## Domain Types - -### Fine Tuning Job Checkpoint - -- `fine_tuning_job_checkpoint: object { id, created_at, fine_tuned_model_checkpoint, 4 more }` - - The `fine_tuning.job.checkpoint` object represents a model checkpoint for a fine-tuning job that is ready to use. - - - `id: string` - - The checkpoint identifier, which can be referenced in the API endpoints. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the checkpoint was created. - - - `fine_tuned_model_checkpoint: string` - - The name of the fine-tuned checkpoint model that is created. - - - `fine_tuning_job_id: string` - - The name of the fine-tuning job that this checkpoint was created from. - - - `metrics: object { full_valid_loss, full_valid_mean_token_accuracy, step, 4 more }` - - Metrics at the step number during the fine-tuning job. - - - `full_valid_loss: optional number` - - - `full_valid_mean_token_accuracy: optional number` - - - `step: optional number` - - - `train_loss: optional number` - - - `train_mean_token_accuracy: optional number` - - - `valid_loss: optional number` - - - `valid_mean_token_accuracy: optional number` - - - `object: "fine_tuning.job.checkpoint"` - - The object type, which is always "fine_tuning.job.checkpoint". - - - `step_number: number` - - The step number that the checkpoint was created at. - -# Checkpoints - -# Permissions - -## List checkpoint permissions - -`$ openai fine-tuning:checkpoints:permissions retrieve` - -**get** `/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions` - -**NOTE:** This endpoint requires an [admin API key](../admin-api-keys). - -Organization owners can use this endpoint to view all permissions for a fine-tuned model checkpoint. - -### Parameters - -- `--fine-tuned-model-checkpoint: string` - - The ID of the fine-tuned model checkpoint to get permissions for. - -- `--after: optional string` - - Identifier for the last permission ID from the previous pagination request. - -- `--limit: optional number` - - Number of permissions to retrieve. - -- `--order: optional "ascending" or "descending"` - - The order in which to retrieve permissions. - -- `--project-id: optional string` - - The ID of the project to get permissions for. - -### Returns - -- `FineTuningCheckpointPermissionGetResponse: object { data, has_more, object, 2 more }` - - - `data: array of object { id, created_at, object, project_id }` - - - `id: string` - - The permission identifier, which can be referenced in the API endpoints. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the permission was created. - - - `object: "checkpoint.permission"` - - The object type, which is always "checkpoint.permission". - - - `project_id: string` - - The project identifier that the permission is for. - - - `has_more: boolean` - - - `object: "list"` - - - `first_id: optional string` - - - `last_id: optional string` - -### Example - -```cli -openai fine-tuning:checkpoints:permissions retrieve \ - --api-key 'My API Key' \ - --fine-tuned-model-checkpoint ft-AF1WoRqd3aJAHsqc9NY7iL8F -``` - -#### Response - -```json -{ - "data": [ - { - "id": "id", - "created_at": 0, - "object": "checkpoint.permission", - "project_id": "project_id" - } - ], - "has_more": true, - "object": "list", - "first_id": "first_id", - "last_id": "last_id" -} -``` - -## List checkpoint permissions - -`$ openai fine-tuning:checkpoints:permissions list` - -**get** `/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions` - -**NOTE:** This endpoint requires an [admin API key](../admin-api-keys). - -Organization owners can use this endpoint to view all permissions for a fine-tuned model checkpoint. - -### Parameters - -- `--fine-tuned-model-checkpoint: string` - - The ID of the fine-tuned model checkpoint to get permissions for. - -- `--after: optional string` - - Identifier for the last permission ID from the previous pagination request. - -- `--limit: optional number` - - Number of permissions to retrieve. - -- `--order: optional "ascending" or "descending"` - - The order in which to retrieve permissions. - -- `--project-id: optional string` - - The ID of the project to get permissions for. - -### Returns - -- `ListFineTuningCheckpointPermissionResponse: object { data, has_more, object, 2 more }` - - - `data: array of object { id, created_at, object, project_id }` - - - `id: string` - - The permission identifier, which can be referenced in the API endpoints. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the permission was created. - - - `object: "checkpoint.permission"` - - The object type, which is always "checkpoint.permission". - - - `project_id: string` - - The project identifier that the permission is for. - - - `has_more: boolean` - - - `object: "list"` - - - `first_id: optional string` - - - `last_id: optional string` - -### Example - -```cli -openai fine-tuning:checkpoints:permissions list \ - --api-key 'My API Key' \ - --fine-tuned-model-checkpoint ft-AF1WoRqd3aJAHsqc9NY7iL8F -``` - -#### Response - -```json -{ - "data": [ - { - "id": "id", - "created_at": 0, - "object": "checkpoint.permission", - "project_id": "project_id" - } - ], - "has_more": true, - "object": "list", - "first_id": "first_id", - "last_id": "last_id" -} -``` - -## Create checkpoint permissions - -`$ openai fine-tuning:checkpoints:permissions create` - -**post** `/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions` - -**NOTE:** Calling this endpoint requires an [admin API key](../admin-api-keys). - -This enables organization owners to share fine-tuned models with other projects in their organization. - -### Parameters - -- `--fine-tuned-model-checkpoint: string` - - The ID of the fine-tuned model checkpoint to create a permission for. - -- `--project-id: array of string` - - The project identifiers to grant access to. - -### Returns - -- `ListFineTuningCheckpointPermissionResponse: object { data, has_more, object, 2 more }` - - - `data: array of object { id, created_at, object, project_id }` - - - `id: string` - - The permission identifier, which can be referenced in the API endpoints. - - - `created_at: number` - - The Unix timestamp (in seconds) for when the permission was created. - - - `object: "checkpoint.permission"` - - The object type, which is always "checkpoint.permission". - - - `project_id: string` - - The project identifier that the permission is for. - - - `has_more: boolean` - - - `object: "list"` - - - `first_id: optional string` - - - `last_id: optional string` - -### Example - -```cli -openai fine-tuning:checkpoints:permissions create \ - --api-key 'My API Key' \ - --fine-tuned-model-checkpoint ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd \ - --project-id string -``` - -#### Response - -```json -{ - "data": [ - { - "id": "id", - "created_at": 0, - "object": "checkpoint.permission", - "project_id": "project_id" - } - ], - "has_more": true, - "object": "list", - "first_id": "first_id", - "last_id": "last_id" -} -``` - -## Delete checkpoint permission - -`$ openai fine-tuning:checkpoints:permissions delete` - -**delete** `/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}` - -**NOTE:** This endpoint requires an [admin API key](../admin-api-keys). - -Organization owners can use this endpoint to delete a permission for a fine-tuned model checkpoint. - -### Parameters - -- `--fine-tuned-model-checkpoint: string` - - The ID of the fine-tuned model checkpoint to delete a permission for. - -- `--permission-id: string` - - The ID of the fine-tuned model checkpoint permission to delete. - -### Returns - -- `FineTuningCheckpointPermissionDeleteResponse: object { id, deleted, object }` - - - `id: string` - - The ID of the fine-tuned model checkpoint permission that was deleted. - - - `deleted: boolean` - - Whether the fine-tuned model checkpoint permission was successfully deleted. - - - `object: "checkpoint.permission"` - - The object type, which is always "checkpoint.permission". - -### Example - -```cli -openai fine-tuning:checkpoints:permissions delete \ - --api-key 'My API Key' \ - --fine-tuned-model-checkpoint ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd \ - --permission-id cp_zc4Q7MP6XxulcVzj4MZdwsAB -``` - -#### Response - -```json -{ - "id": "id", - "deleted": true, - "object": "checkpoint.permission" -} -``` - -# Alpha - -# Graders - -## Run grader - -`$ openai fine-tuning:alpha:graders run` - -**post** `/fine_tuning/alpha/graders/run` - -Run a grader. - -### Parameters - -- `--grader: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - The grader used for the fine-tuning job. - -- `--model-sample: string` - - The model sample to be evaluated. This value will be used to populate - the `sample` namespace. See [the guide](https://platform.openai.com/docs/guides/graders) for more details. - The `output_json` variable will be populated if the model sample is a - valid JSON string. - -- `--item: optional unknown` - - The dataset item provided to the grader. This will be used to populate - the `item` namespace. See [the guide](https://platform.openai.com/docs/guides/graders) for more details. - -### Returns - -- `FineTuningAlphaGraderRunResponse: object { metadata, model_grader_token_usage_per_model, reward, sub_rewards }` - - - `metadata: object { errors, execution_time, name, 4 more }` - - - `errors: object { formula_parse_error, invalid_variable_error, model_grader_parse_error, 11 more }` - - - `formula_parse_error: boolean` - - - `invalid_variable_error: boolean` - - - `model_grader_parse_error: boolean` - - - `model_grader_refusal_error: boolean` - - - `model_grader_server_error: boolean` - - - `model_grader_server_error_details: string` - - - `other_error: boolean` - - - `python_grader_runtime_error: boolean` - - - `python_grader_runtime_error_details: string` - - - `python_grader_server_error: boolean` - - - `python_grader_server_error_type: string` - - - `sample_parse_error: boolean` - - - `truncated_observation_error: boolean` - - - `unresponsive_reward_error: boolean` - - - `execution_time: number` - - - `name: string` - - - `sampled_model_name: string` - - - `scores: map[unknown]` - - - `token_usage: number` - - - `type: string` - - - `model_grader_token_usage_per_model: map[unknown]` - - - `reward: number` - - - `sub_rewards: map[unknown]` - -### Example - -```cli -openai fine-tuning:alpha:graders run \ - --api-key 'My API Key' \ - --grader '{input: input, name: name, operation: eq, reference: reference, type: string_check}' \ - --model-sample model_sample -``` - -#### Response - -```json -{ - "metadata": { - "errors": { - "formula_parse_error": true, - "invalid_variable_error": true, - "model_grader_parse_error": true, - "model_grader_refusal_error": true, - "model_grader_server_error": true, - "model_grader_server_error_details": "model_grader_server_error_details", - "other_error": true, - "python_grader_runtime_error": true, - "python_grader_runtime_error_details": "python_grader_runtime_error_details", - "python_grader_server_error": true, - "python_grader_server_error_type": "python_grader_server_error_type", - "sample_parse_error": true, - "truncated_observation_error": true, - "unresponsive_reward_error": true - }, - "execution_time": 0, - "name": "name", - "sampled_model_name": "sampled_model_name", - "scores": { - "foo": "bar" - }, - "token_usage": 0, - "type": "type" - }, - "model_grader_token_usage_per_model": { - "foo": "bar" - }, - "reward": 0, - "sub_rewards": { - "foo": "bar" - } -} -``` - -## Validate grader - -`$ openai fine-tuning:alpha:graders validate` - -**post** `/fine_tuning/alpha/graders/validate` - -Validate a grader. - -### Parameters - -- `--grader: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - The grader used for the fine-tuning job. - -### Returns - -- `FineTuningAlphaGraderValidateResponse: object { grader }` - - - `grader: optional StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - The grader used for the fine-tuning job. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `input: string` - - The input text. This may include template strings. - - - `name: string` - - The name of the grader. - - - `operation: "eq" or "ne" or "like" or "ilike"` - - The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`. - - - `"eq"` - - - `"ne"` - - - `"like"` - - - `"ilike"` - - - `reference: string` - - The reference text. This may include template strings. - - - `type: "string_check"` - - The object type, which is always `string_check`. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `evaluation_metric: "cosine" or "fuzzy_match" or "bleu" or 8 more` - - The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, - `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, - or `rouge_l`. - - - `"cosine"` - - - `"fuzzy_match"` - - - `"bleu"` - - - `"gleu"` - - - `"meteor"` - - - `"rouge_1"` - - - `"rouge_2"` - - - `"rouge_3"` - - - `"rouge_4"` - - - `"rouge_5"` - - - `"rouge_l"` - - - `input: string` - - The text being graded. - - - `name: string` - - The name of the grader. - - - `reference: string` - - The text being graded against. - - - `type: "text_similarity"` - - The type of grader. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `name: string` - - The name of the grader. - - - `source: string` - - The source code of the python script. - - - `type: "python"` - - The object type, which is always `python`. - - - `image_tag: optional string` - - The image tag to use for the python script. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `input: array of object { content, role, type }` - - The input messages evaluated by the grader. Supports text, output text, input image, and input audio content blocks, and may include template strings. - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `text: string` - - The text input to the model. - - - `type: "input_text"` - - The type of the input item. Always `input_text`. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `input_audio: object { data, format }` - - - `data: string` - - Base64-encoded audio data. - - - `format: "mp3" or "wav"` - - The format of the audio data. Currently supported formats are `mp3` and - `wav`. - - - `"mp3"` - - - `"wav"` - - - `type: "input_audio"` - - The type of the input item. Always `input_audio`. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `model: string` - - The model to use for the evaluation. - - - `name: string` - - The name of the grader. - - - `type: "score_model"` - - The object type, which is always `score_model`. - - - `range: optional array of number` - - The range of the score. Defaults to `[0, 1]`. - - - `sampling_params: optional object { max_completions_tokens, reasoning_effort, seed, 2 more }` - - The sampling parameters for the model. - - - `max_completions_tokens: optional number` - - The maximum number of tokens the grader model may generate in its response. - - - `reasoning_effort: optional "none" or "minimal" or "low" or 3 more` - - Constrains effort on reasoning for - [reasoning models](https://platform.openai.com/docs/guides/reasoning). - Currently supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. Reducing - reasoning effort can result in faster responses and fewer tokens used - on reasoning in a response. - - - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool calls are supported for all reasoning values in gpt-5.1. - - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. - - `xhigh` is supported for all models after `gpt-5.1-codex-max`. - - - `"none"` - - - `"minimal"` - - - `"low"` - - - `"medium"` - - - `"high"` - - - `"xhigh"` - - - `seed: optional number` - - A seed value to initialize the randomness, during sampling. - - - `temperature: optional number` - - A higher temperature increases randomness in the outputs. - - - `top_p: optional number` - - An alternative to temperature for nucleus sampling; 1.0 includes all tokens. - - - `multi_grader: object { calculate_output, graders, name, type }` - - A MultiGrader object combines the output of multiple graders to produce a single score. - - - `calculate_output: string` - - A formula to calculate the output based on grader results. - - - `graders: StringCheckGrader or TextSimilarityGrader or PythonGrader or 2 more` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `string_check_grader: object { input, name, operation, 2 more }` - - A StringCheckGrader object that performs a string comparison between input and reference using a specified operation. - - - `text_similarity_grader: object { evaluation_metric, input, name, 2 more }` - - A TextSimilarityGrader object which grades text based on similarity metrics. - - - `python_grader: object { name, source, type, image_tag }` - - A PythonGrader object that runs a python script on the input. - - - `score_model_grader: object { input, model, name, 3 more }` - - A ScoreModelGrader object that uses a model to assign a score to the input. - - - `label_model_grader: object { input, labels, model, 3 more }` - - A LabelModelGrader object which uses a model to assign labels to each item - in the evaluation. - - - `input: array of object { content, role, type }` - - - `content: string or ResponseInputText or object { text, type } or 3 more` - - Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. - - - `Text input: string` - - A text input to the model. - - - `response_input_text: object { text, type }` - - A text input to the model. - - - `Output text: object { text, type }` - - A text output from the model. - - - `text: string` - - The text output from the model. - - - `type: "output_text"` - - The type of the output text. Always `output_text`. - - - `Input image: object { image_url, type, detail }` - - An image input block used within EvalItem content arrays. - - - `image_url: string` - - The URL of the image input. - - - `type: "input_image"` - - The type of the image input. Always `input_image`. - - - `detail: optional string` - - The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. - - - `response_input_audio: object { input_audio, type }` - - An audio input to the model. - - - `grader_inputs: array of string or ResponseInputText or object { text, type } or 2 more` - - A list of inputs, each of which may be either an input text, output text, input - image, or input audio object. - - - `role: "user" or "assistant" or "system" or "developer"` - - The role of the message input. One of `user`, `assistant`, `system`, or - `developer`. - - - `"user"` - - - `"assistant"` - - - `"system"` - - - `"developer"` - - - `type: optional "message"` - - The type of the message input. Always `message`. - - - `"message"` - - - `labels: array of string` - - The labels to assign to each item in the evaluation. - - - `model: string` - - The model to use for the evaluation. Must support structured outputs. - - - `name: string` - - The name of the grader. - - - `passing_labels: array of string` - - The labels that indicate a passing result. Must be a subset of labels. - - - `type: "label_model"` - - The object type, which is always `label_model`. - - - `name: string` - - The name of the grader. - - - `type: "multi"` - - The object type, which is always `multi`. - -### Example - -```cli -openai fine-tuning:alpha:graders validate \ - --api-key 'My API Key' \ - --grader '{input: input, name: name, operation: eq, reference: reference, type: string_check}' -``` - -#### Response - -```json -{ - "grader": { - "input": "input", - "name": "name", - "operation": "eq", - "reference": "reference", - "type": "string_check" - } -} -```