audiotranscription.go

  1// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
  2
  3package openai
  4
  5import (
  6	"bytes"
  7	"context"
  8	"encoding/json"
  9	"io"
 10	"mime/multipart"
 11	"net/http"
 12
 13	"github.com/openai/openai-go/internal/apiform"
 14	"github.com/openai/openai-go/internal/apijson"
 15	"github.com/openai/openai-go/internal/requestconfig"
 16	"github.com/openai/openai-go/option"
 17	"github.com/openai/openai-go/packages/param"
 18	"github.com/openai/openai-go/packages/respjson"
 19	"github.com/openai/openai-go/packages/ssestream"
 20	"github.com/openai/openai-go/shared/constant"
 21)
 22
 23// AudioTranscriptionService contains methods and other services that help with
 24// interacting with the openai API.
 25//
 26// Note, unlike clients, this service does not read variables from the environment
 27// automatically. You should not instantiate this service directly, and instead use
 28// the [NewAudioTranscriptionService] method instead.
 29type AudioTranscriptionService struct {
 30	Options []option.RequestOption
 31}
 32
 33// NewAudioTranscriptionService generates a new service that applies the given
 34// options to each request. These options are applied after the parent client's
 35// options (if there is one), and before any request-specific options.
 36func NewAudioTranscriptionService(opts ...option.RequestOption) (r AudioTranscriptionService) {
 37	r = AudioTranscriptionService{}
 38	r.Options = opts
 39	return
 40}
 41
 42// Transcribes audio into the input language.
 43func (r *AudioTranscriptionService) New(ctx context.Context, body AudioTranscriptionNewParams, opts ...option.RequestOption) (res *Transcription, err error) {
 44	opts = append(r.Options[:], opts...)
 45	path := "audio/transcriptions"
 46	err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &res, opts...)
 47	return
 48}
 49
 50// Transcribes audio into the input language.
 51func (r *AudioTranscriptionService) NewStreaming(ctx context.Context, body AudioTranscriptionNewParams, opts ...option.RequestOption) (stream *ssestream.Stream[TranscriptionStreamEventUnion]) {
 52	var (
 53		raw *http.Response
 54		err error
 55	)
 56	opts = append(r.Options[:], opts...)
 57	body.SetExtraFields(map[string]any{
 58		"stream": "true",
 59	})
 60	path := "audio/transcriptions"
 61	err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &raw, opts...)
 62	return ssestream.NewStream[TranscriptionStreamEventUnion](ssestream.NewDecoder(raw), err)
 63}
 64
 65// Represents a transcription response returned by model, based on the provided
 66// input.
 67type Transcription struct {
 68	// The transcribed text.
 69	Text string `json:"text,required"`
 70	// The log probabilities of the tokens in the transcription. Only returned with the
 71	// models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe` if `logprobs` is added
 72	// to the `include` array.
 73	Logprobs []TranscriptionLogprob `json:"logprobs"`
 74	// Token usage statistics for the request.
 75	Usage TranscriptionUsageUnion `json:"usage"`
 76	// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
 77	JSON struct {
 78		Text        respjson.Field
 79		Logprobs    respjson.Field
 80		Usage       respjson.Field
 81		ExtraFields map[string]respjson.Field
 82		raw         string
 83	} `json:"-"`
 84}
 85
 86// Returns the unmodified JSON received from the API
 87func (r Transcription) RawJSON() string { return r.JSON.raw }
 88func (r *Transcription) UnmarshalJSON(data []byte) error {
 89	return apijson.UnmarshalRoot(data, r)
 90}
 91
 92type TranscriptionLogprob struct {
 93	// The token in the transcription.
 94	Token string `json:"token"`
 95	// The bytes of the token.
 96	Bytes []float64 `json:"bytes"`
 97	// The log probability of the token.
 98	Logprob float64 `json:"logprob"`
 99	// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
100	JSON struct {
101		Token       respjson.Field
102		Bytes       respjson.Field
103		Logprob     respjson.Field
104		ExtraFields map[string]respjson.Field
105		raw         string
106	} `json:"-"`
107}
108
109// Returns the unmodified JSON received from the API
110func (r TranscriptionLogprob) RawJSON() string { return r.JSON.raw }
111func (r *TranscriptionLogprob) UnmarshalJSON(data []byte) error {
112	return apijson.UnmarshalRoot(data, r)
113}
114
115// TranscriptionUsageUnion contains all possible properties and values from
116// [TranscriptionUsageTokens], [TranscriptionUsageDuration].
117//
118// Use the [TranscriptionUsageUnion.AsAny] method to switch on the variant.
119//
120// Use the methods beginning with 'As' to cast the union to one of its variants.
121type TranscriptionUsageUnion struct {
122	// This field is from variant [TranscriptionUsageTokens].
123	InputTokens int64 `json:"input_tokens"`
124	// This field is from variant [TranscriptionUsageTokens].
125	OutputTokens int64 `json:"output_tokens"`
126	// This field is from variant [TranscriptionUsageTokens].
127	TotalTokens int64 `json:"total_tokens"`
128	// Any of "tokens", "duration".
129	Type string `json:"type"`
130	// This field is from variant [TranscriptionUsageTokens].
131	InputTokenDetails TranscriptionUsageTokensInputTokenDetails `json:"input_token_details"`
132	// This field is from variant [TranscriptionUsageDuration].
133	Duration float64 `json:"duration"`
134	JSON     struct {
135		InputTokens       respjson.Field
136		OutputTokens      respjson.Field
137		TotalTokens       respjson.Field
138		Type              respjson.Field
139		InputTokenDetails respjson.Field
140		Duration          respjson.Field
141		raw               string
142	} `json:"-"`
143}
144
145// anyTranscriptionUsage is implemented by each variant of
146// [TranscriptionUsageUnion] to add type safety for the return type of
147// [TranscriptionUsageUnion.AsAny]
148type anyTranscriptionUsage interface {
149	implTranscriptionUsageUnion()
150}
151
152func (TranscriptionUsageTokens) implTranscriptionUsageUnion()   {}
153func (TranscriptionUsageDuration) implTranscriptionUsageUnion() {}
154
155// Use the following switch statement to find the correct variant
156//
157//	switch variant := TranscriptionUsageUnion.AsAny().(type) {
158//	case openai.TranscriptionUsageTokens:
159//	case openai.TranscriptionUsageDuration:
160//	default:
161//	  fmt.Errorf("no variant present")
162//	}
163func (u TranscriptionUsageUnion) AsAny() anyTranscriptionUsage {
164	switch u.Type {
165	case "tokens":
166		return u.AsTokens()
167	case "duration":
168		return u.AsDuration()
169	}
170	return nil
171}
172
173func (u TranscriptionUsageUnion) AsTokens() (v TranscriptionUsageTokens) {
174	apijson.UnmarshalRoot(json.RawMessage(u.JSON.raw), &v)
175	return
176}
177
178func (u TranscriptionUsageUnion) AsDuration() (v TranscriptionUsageDuration) {
179	apijson.UnmarshalRoot(json.RawMessage(u.JSON.raw), &v)
180	return
181}
182
183// Returns the unmodified JSON received from the API
184func (u TranscriptionUsageUnion) RawJSON() string { return u.JSON.raw }
185
186func (r *TranscriptionUsageUnion) UnmarshalJSON(data []byte) error {
187	return apijson.UnmarshalRoot(data, r)
188}
189
190// Usage statistics for models billed by token usage.
191type TranscriptionUsageTokens struct {
192	// Number of input tokens billed for this request.
193	InputTokens int64 `json:"input_tokens,required"`
194	// Number of output tokens generated.
195	OutputTokens int64 `json:"output_tokens,required"`
196	// Total number of tokens used (input + output).
197	TotalTokens int64 `json:"total_tokens,required"`
198	// The type of the usage object. Always `tokens` for this variant.
199	Type constant.Tokens `json:"type,required"`
200	// Details about the input tokens billed for this request.
201	InputTokenDetails TranscriptionUsageTokensInputTokenDetails `json:"input_token_details"`
202	// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
203	JSON struct {
204		InputTokens       respjson.Field
205		OutputTokens      respjson.Field
206		TotalTokens       respjson.Field
207		Type              respjson.Field
208		InputTokenDetails respjson.Field
209		ExtraFields       map[string]respjson.Field
210		raw               string
211	} `json:"-"`
212}
213
214// Returns the unmodified JSON received from the API
215func (r TranscriptionUsageTokens) RawJSON() string { return r.JSON.raw }
216func (r *TranscriptionUsageTokens) UnmarshalJSON(data []byte) error {
217	return apijson.UnmarshalRoot(data, r)
218}
219
220// Details about the input tokens billed for this request.
221type TranscriptionUsageTokensInputTokenDetails struct {
222	// Number of audio tokens billed for this request.
223	AudioTokens int64 `json:"audio_tokens"`
224	// Number of text tokens billed for this request.
225	TextTokens int64 `json:"text_tokens"`
226	// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
227	JSON struct {
228		AudioTokens respjson.Field
229		TextTokens  respjson.Field
230		ExtraFields map[string]respjson.Field
231		raw         string
232	} `json:"-"`
233}
234
235// Returns the unmodified JSON received from the API
236func (r TranscriptionUsageTokensInputTokenDetails) RawJSON() string { return r.JSON.raw }
237func (r *TranscriptionUsageTokensInputTokenDetails) UnmarshalJSON(data []byte) error {
238	return apijson.UnmarshalRoot(data, r)
239}
240
241// Usage statistics for models billed by audio input duration.
242type TranscriptionUsageDuration struct {
243	// Duration of the input audio in seconds.
244	Duration float64 `json:"duration,required"`
245	// The type of the usage object. Always `duration` for this variant.
246	Type constant.Duration `json:"type,required"`
247	// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
248	JSON struct {
249		Duration    respjson.Field
250		Type        respjson.Field
251		ExtraFields map[string]respjson.Field
252		raw         string
253	} `json:"-"`
254}
255
256// Returns the unmodified JSON received from the API
257func (r TranscriptionUsageDuration) RawJSON() string { return r.JSON.raw }
258func (r *TranscriptionUsageDuration) UnmarshalJSON(data []byte) error {
259	return apijson.UnmarshalRoot(data, r)
260}
261
262type TranscriptionInclude string
263
264const (
265	TranscriptionIncludeLogprobs TranscriptionInclude = "logprobs"
266)
267
268// TranscriptionStreamEventUnion contains all possible properties and values from
269// [TranscriptionTextDeltaEvent], [TranscriptionTextDoneEvent].
270//
271// Use the [TranscriptionStreamEventUnion.AsAny] method to switch on the variant.
272//
273// Use the methods beginning with 'As' to cast the union to one of its variants.
274type TranscriptionStreamEventUnion struct {
275	// This field is from variant [TranscriptionTextDeltaEvent].
276	Delta string `json:"delta"`
277	// Any of "transcript.text.delta", "transcript.text.done".
278	Type string `json:"type"`
279	// This field is a union of [[]TranscriptionTextDeltaEventLogprob],
280	// [[]TranscriptionTextDoneEventLogprob]
281	Logprobs TranscriptionStreamEventUnionLogprobs `json:"logprobs"`
282	// This field is from variant [TranscriptionTextDoneEvent].
283	Text string `json:"text"`
284	// This field is from variant [TranscriptionTextDoneEvent].
285	Usage TranscriptionTextDoneEventUsage `json:"usage"`
286	JSON  struct {
287		Delta    respjson.Field
288		Type     respjson.Field
289		Logprobs respjson.Field
290		Text     respjson.Field
291		Usage    respjson.Field
292		raw      string
293	} `json:"-"`
294}
295
296// anyTranscriptionStreamEvent is implemented by each variant of
297// [TranscriptionStreamEventUnion] to add type safety for the return type of
298// [TranscriptionStreamEventUnion.AsAny]
299type anyTranscriptionStreamEvent interface {
300	implTranscriptionStreamEventUnion()
301}
302
303func (TranscriptionTextDeltaEvent) implTranscriptionStreamEventUnion() {}
304func (TranscriptionTextDoneEvent) implTranscriptionStreamEventUnion()  {}
305
306// Use the following switch statement to find the correct variant
307//
308//	switch variant := TranscriptionStreamEventUnion.AsAny().(type) {
309//	case openai.TranscriptionTextDeltaEvent:
310//	case openai.TranscriptionTextDoneEvent:
311//	default:
312//	  fmt.Errorf("no variant present")
313//	}
314func (u TranscriptionStreamEventUnion) AsAny() anyTranscriptionStreamEvent {
315	switch u.Type {
316	case "transcript.text.delta":
317		return u.AsTranscriptTextDelta()
318	case "transcript.text.done":
319		return u.AsTranscriptTextDone()
320	}
321	return nil
322}
323
324func (u TranscriptionStreamEventUnion) AsTranscriptTextDelta() (v TranscriptionTextDeltaEvent) {
325	apijson.UnmarshalRoot(json.RawMessage(u.JSON.raw), &v)
326	return
327}
328
329func (u TranscriptionStreamEventUnion) AsTranscriptTextDone() (v TranscriptionTextDoneEvent) {
330	apijson.UnmarshalRoot(json.RawMessage(u.JSON.raw), &v)
331	return
332}
333
334// Returns the unmodified JSON received from the API
335func (u TranscriptionStreamEventUnion) RawJSON() string { return u.JSON.raw }
336
337func (r *TranscriptionStreamEventUnion) UnmarshalJSON(data []byte) error {
338	return apijson.UnmarshalRoot(data, r)
339}
340
341// TranscriptionStreamEventUnionLogprobs is an implicit subunion of
342// [TranscriptionStreamEventUnion]. TranscriptionStreamEventUnionLogprobs provides
343// convenient access to the sub-properties of the union.
344//
345// For type safety it is recommended to directly use a variant of the
346// [TranscriptionStreamEventUnion].
347//
348// If the underlying value is not a json object, one of the following properties
349// will be valid: OfTranscriptionTextDeltaEventLogprobs
350// OfTranscriptionTextDoneEventLogprobs]
351type TranscriptionStreamEventUnionLogprobs struct {
352	// This field will be present if the value is a
353	// [[]TranscriptionTextDeltaEventLogprob] instead of an object.
354	OfTranscriptionTextDeltaEventLogprobs []TranscriptionTextDeltaEventLogprob `json:",inline"`
355	// This field will be present if the value is a
356	// [[]TranscriptionTextDoneEventLogprob] instead of an object.
357	OfTranscriptionTextDoneEventLogprobs []TranscriptionTextDoneEventLogprob `json:",inline"`
358	JSON                                 struct {
359		OfTranscriptionTextDeltaEventLogprobs respjson.Field
360		OfTranscriptionTextDoneEventLogprobs  respjson.Field
361		raw                                   string
362	} `json:"-"`
363}
364
365func (r *TranscriptionStreamEventUnionLogprobs) UnmarshalJSON(data []byte) error {
366	return apijson.UnmarshalRoot(data, r)
367}
368
369// Emitted when there is an additional text delta. This is also the first event
370// emitted when the transcription starts. Only emitted when you
371// [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
372// with the `Stream` parameter set to `true`.
373type TranscriptionTextDeltaEvent struct {
374	// The text delta that was additionally transcribed.
375	Delta string `json:"delta,required"`
376	// The type of the event. Always `transcript.text.delta`.
377	Type constant.TranscriptTextDelta `json:"type,required"`
378	// The log probabilities of the delta. Only included if you
379	// [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
380	// with the `include[]` parameter set to `logprobs`.
381	Logprobs []TranscriptionTextDeltaEventLogprob `json:"logprobs"`
382	// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
383	JSON struct {
384		Delta       respjson.Field
385		Type        respjson.Field
386		Logprobs    respjson.Field
387		ExtraFields map[string]respjson.Field
388		raw         string
389	} `json:"-"`
390}
391
392// Returns the unmodified JSON received from the API
393func (r TranscriptionTextDeltaEvent) RawJSON() string { return r.JSON.raw }
394func (r *TranscriptionTextDeltaEvent) UnmarshalJSON(data []byte) error {
395	return apijson.UnmarshalRoot(data, r)
396}
397
398type TranscriptionTextDeltaEventLogprob struct {
399	// The token that was used to generate the log probability.
400	Token string `json:"token"`
401	// The bytes that were used to generate the log probability.
402	Bytes []int64 `json:"bytes"`
403	// The log probability of the token.
404	Logprob float64 `json:"logprob"`
405	// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
406	JSON struct {
407		Token       respjson.Field
408		Bytes       respjson.Field
409		Logprob     respjson.Field
410		ExtraFields map[string]respjson.Field
411		raw         string
412	} `json:"-"`
413}
414
415// Returns the unmodified JSON received from the API
416func (r TranscriptionTextDeltaEventLogprob) RawJSON() string { return r.JSON.raw }
417func (r *TranscriptionTextDeltaEventLogprob) UnmarshalJSON(data []byte) error {
418	return apijson.UnmarshalRoot(data, r)
419}
420
421// Emitted when the transcription is complete. Contains the complete transcription
422// text. Only emitted when you
423// [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
424// with the `Stream` parameter set to `true`.
425type TranscriptionTextDoneEvent struct {
426	// The text that was transcribed.
427	Text string `json:"text,required"`
428	// The type of the event. Always `transcript.text.done`.
429	Type constant.TranscriptTextDone `json:"type,required"`
430	// The log probabilities of the individual tokens in the transcription. Only
431	// included if you
432	// [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
433	// with the `include[]` parameter set to `logprobs`.
434	Logprobs []TranscriptionTextDoneEventLogprob `json:"logprobs"`
435	// Usage statistics for models billed by token usage.
436	Usage TranscriptionTextDoneEventUsage `json:"usage"`
437	// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
438	JSON struct {
439		Text        respjson.Field
440		Type        respjson.Field
441		Logprobs    respjson.Field
442		Usage       respjson.Field
443		ExtraFields map[string]respjson.Field
444		raw         string
445	} `json:"-"`
446}
447
448// Returns the unmodified JSON received from the API
449func (r TranscriptionTextDoneEvent) RawJSON() string { return r.JSON.raw }
450func (r *TranscriptionTextDoneEvent) UnmarshalJSON(data []byte) error {
451	return apijson.UnmarshalRoot(data, r)
452}
453
454type TranscriptionTextDoneEventLogprob struct {
455	// The token that was used to generate the log probability.
456	Token string `json:"token"`
457	// The bytes that were used to generate the log probability.
458	Bytes []int64 `json:"bytes"`
459	// The log probability of the token.
460	Logprob float64 `json:"logprob"`
461	// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
462	JSON struct {
463		Token       respjson.Field
464		Bytes       respjson.Field
465		Logprob     respjson.Field
466		ExtraFields map[string]respjson.Field
467		raw         string
468	} `json:"-"`
469}
470
471// Returns the unmodified JSON received from the API
472func (r TranscriptionTextDoneEventLogprob) RawJSON() string { return r.JSON.raw }
473func (r *TranscriptionTextDoneEventLogprob) UnmarshalJSON(data []byte) error {
474	return apijson.UnmarshalRoot(data, r)
475}
476
477// Usage statistics for models billed by token usage.
478type TranscriptionTextDoneEventUsage struct {
479	// Number of input tokens billed for this request.
480	InputTokens int64 `json:"input_tokens,required"`
481	// Number of output tokens generated.
482	OutputTokens int64 `json:"output_tokens,required"`
483	// Total number of tokens used (input + output).
484	TotalTokens int64 `json:"total_tokens,required"`
485	// The type of the usage object. Always `tokens` for this variant.
486	Type constant.Tokens `json:"type,required"`
487	// Details about the input tokens billed for this request.
488	InputTokenDetails TranscriptionTextDoneEventUsageInputTokenDetails `json:"input_token_details"`
489	// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
490	JSON struct {
491		InputTokens       respjson.Field
492		OutputTokens      respjson.Field
493		TotalTokens       respjson.Field
494		Type              respjson.Field
495		InputTokenDetails respjson.Field
496		ExtraFields       map[string]respjson.Field
497		raw               string
498	} `json:"-"`
499}
500
501// Returns the unmodified JSON received from the API
502func (r TranscriptionTextDoneEventUsage) RawJSON() string { return r.JSON.raw }
503func (r *TranscriptionTextDoneEventUsage) UnmarshalJSON(data []byte) error {
504	return apijson.UnmarshalRoot(data, r)
505}
506
507// Details about the input tokens billed for this request.
508type TranscriptionTextDoneEventUsageInputTokenDetails struct {
509	// Number of audio tokens billed for this request.
510	AudioTokens int64 `json:"audio_tokens"`
511	// Number of text tokens billed for this request.
512	TextTokens int64 `json:"text_tokens"`
513	// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
514	JSON struct {
515		AudioTokens respjson.Field
516		TextTokens  respjson.Field
517		ExtraFields map[string]respjson.Field
518		raw         string
519	} `json:"-"`
520}
521
522// Returns the unmodified JSON received from the API
523func (r TranscriptionTextDoneEventUsageInputTokenDetails) RawJSON() string { return r.JSON.raw }
524func (r *TranscriptionTextDoneEventUsageInputTokenDetails) UnmarshalJSON(data []byte) error {
525	return apijson.UnmarshalRoot(data, r)
526}
527
528type AudioTranscriptionNewParams struct {
529	// The audio file object (not file name) to transcribe, in one of these formats:
530	// flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
531	File io.Reader `json:"file,omitzero,required" format:"binary"`
532	// ID of the model to use. The options are `gpt-4o-transcribe`,
533	// `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
534	// Whisper V2 model).
535	Model AudioModel `json:"model,omitzero,required"`
536	// The language of the input audio. Supplying the input language in
537	// [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
538	// format will improve accuracy and latency.
539	Language param.Opt[string] `json:"language,omitzero"`
540	// An optional text to guide the model's style or continue a previous audio
541	// segment. The
542	// [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
543	// should match the audio language.
544	Prompt param.Opt[string] `json:"prompt,omitzero"`
545	// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
546	// output more random, while lower values like 0.2 will make it more focused and
547	// deterministic. If set to 0, the model will use
548	// [log probability](https://en.wikipedia.org/wiki/Log_probability) to
549	// automatically increase the temperature until certain thresholds are hit.
550	Temperature param.Opt[float64] `json:"temperature,omitzero"`
551	// Controls how the audio is cut into chunks. When set to `"auto"`, the server
552	// first normalizes loudness and then uses voice activity detection (VAD) to choose
553	// boundaries. `server_vad` object can be provided to tweak VAD detection
554	// parameters manually. If unset, the audio is transcribed as a single block.
555	ChunkingStrategy AudioTranscriptionNewParamsChunkingStrategyUnion `json:"chunking_strategy,omitzero"`
556	// Additional information to include in the transcription response. `logprobs` will
557	// return the log probabilities of the tokens in the response to understand the
558	// model's confidence in the transcription. `logprobs` only works with
559	// response_format set to `json` and only with the models `gpt-4o-transcribe` and
560	// `gpt-4o-mini-transcribe`.
561	Include []TranscriptionInclude `json:"include,omitzero"`
562	// The format of the output, in one of these options: `json`, `text`, `srt`,
563	// `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
564	// the only supported format is `json`.
565	//
566	// Any of "json", "text", "srt", "verbose_json", "vtt".
567	ResponseFormat AudioResponseFormat `json:"response_format,omitzero"`
568	// The timestamp granularities to populate for this transcription.
569	// `response_format` must be set `verbose_json` to use timestamp granularities.
570	// Either or both of these options are supported: `word`, or `segment`. Note: There
571	// is no additional latency for segment timestamps, but generating word timestamps
572	// incurs additional latency.
573	//
574	// Any of "word", "segment".
575	TimestampGranularities []string `json:"timestamp_granularities,omitzero"`
576	paramObj
577}
578
579func (r AudioTranscriptionNewParams) MarshalMultipart() (data []byte, contentType string, err error) {
580	buf := bytes.NewBuffer(nil)
581	writer := multipart.NewWriter(buf)
582	err = apiform.MarshalRoot(r, writer)
583	if err == nil {
584		err = apiform.WriteExtras(writer, r.ExtraFields())
585	}
586	if err != nil {
587		writer.Close()
588		return nil, "", err
589	}
590	err = writer.Close()
591	if err != nil {
592		return nil, "", err
593	}
594	return buf.Bytes(), writer.FormDataContentType(), nil
595}
596
597// Only one field can be non-zero.
598//
599// Use [param.IsOmitted] to confirm if a field is set.
600type AudioTranscriptionNewParamsChunkingStrategyUnion struct {
601	// Construct this variant with constant.ValueOf[constant.Auto]()
602	OfAuto                                            constant.Auto                                         `json:",omitzero,inline"`
603	OfAudioTranscriptionNewsChunkingStrategyVadConfig *AudioTranscriptionNewParamsChunkingStrategyVadConfig `json:",omitzero,inline"`
604	paramUnion
605}
606
607func (u AudioTranscriptionNewParamsChunkingStrategyUnion) MarshalJSON() ([]byte, error) {
608	return param.MarshalUnion(u, u.OfAuto, u.OfAudioTranscriptionNewsChunkingStrategyVadConfig)
609}
610func (u *AudioTranscriptionNewParamsChunkingStrategyUnion) UnmarshalJSON(data []byte) error {
611	return apijson.UnmarshalRoot(data, u)
612}
613
614func (u *AudioTranscriptionNewParamsChunkingStrategyUnion) asAny() any {
615	if !param.IsOmitted(u.OfAuto) {
616		return &u.OfAuto
617	} else if !param.IsOmitted(u.OfAudioTranscriptionNewsChunkingStrategyVadConfig) {
618		return u.OfAudioTranscriptionNewsChunkingStrategyVadConfig
619	}
620	return nil
621}
622
623// The property Type is required.
624type AudioTranscriptionNewParamsChunkingStrategyVadConfig struct {
625	// Must be set to `server_vad` to enable manual chunking using server side VAD.
626	//
627	// Any of "server_vad".
628	Type string `json:"type,omitzero,required"`
629	// Amount of audio to include before the VAD detected speech (in milliseconds).
630	PrefixPaddingMs param.Opt[int64] `json:"prefix_padding_ms,omitzero"`
631	// Duration of silence to detect speech stop (in milliseconds). With shorter values
632	// the model will respond more quickly, but may jump in on short pauses from the
633	// user.
634	SilenceDurationMs param.Opt[int64] `json:"silence_duration_ms,omitzero"`
635	// Sensitivity threshold (0.0 to 1.0) for voice activity detection. A higher
636	// threshold will require louder audio to activate the model, and thus might
637	// perform better in noisy environments.
638	Threshold param.Opt[float64] `json:"threshold,omitzero"`
639	paramObj
640}
641
642func (r AudioTranscriptionNewParamsChunkingStrategyVadConfig) MarshalJSON() (data []byte, err error) {
643	type shadow AudioTranscriptionNewParamsChunkingStrategyVadConfig
644	return param.MarshalObject(r, (*shadow)(&r))
645}
646func (r *AudioTranscriptionNewParamsChunkingStrategyVadConfig) UnmarshalJSON(data []byte) error {
647	return apijson.UnmarshalRoot(data, r)
648}
649
650func init() {
651	apijson.RegisterFieldValidator[AudioTranscriptionNewParamsChunkingStrategyVadConfig](
652		"type", "server_vad",
653	)
654}