1// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3package openai
4
5import (
6 "bytes"
7 "context"
8 "encoding/json"
9 "io"
10 "mime/multipart"
11 "net/http"
12
13 "github.com/openai/openai-go/internal/apiform"
14 "github.com/openai/openai-go/internal/apijson"
15 "github.com/openai/openai-go/internal/requestconfig"
16 "github.com/openai/openai-go/option"
17 "github.com/openai/openai-go/packages/param"
18 "github.com/openai/openai-go/packages/respjson"
19 "github.com/openai/openai-go/packages/ssestream"
20 "github.com/openai/openai-go/shared/constant"
21)
22
23// AudioTranscriptionService contains methods and other services that help with
24// interacting with the openai API.
25//
26// Note, unlike clients, this service does not read variables from the environment
27// automatically. You should not instantiate this service directly, and instead use
28// the [NewAudioTranscriptionService] method instead.
29type AudioTranscriptionService struct {
30 Options []option.RequestOption
31}
32
33// NewAudioTranscriptionService generates a new service that applies the given
34// options to each request. These options are applied after the parent client's
35// options (if there is one), and before any request-specific options.
36func NewAudioTranscriptionService(opts ...option.RequestOption) (r AudioTranscriptionService) {
37 r = AudioTranscriptionService{}
38 r.Options = opts
39 return
40}
41
42// Transcribes audio into the input language.
43func (r *AudioTranscriptionService) New(ctx context.Context, body AudioTranscriptionNewParams, opts ...option.RequestOption) (res *Transcription, err error) {
44 opts = append(r.Options[:], opts...)
45 path := "audio/transcriptions"
46 err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &res, opts...)
47 return
48}
49
50// Transcribes audio into the input language.
51func (r *AudioTranscriptionService) NewStreaming(ctx context.Context, body AudioTranscriptionNewParams, opts ...option.RequestOption) (stream *ssestream.Stream[TranscriptionStreamEventUnion]) {
52 var (
53 raw *http.Response
54 err error
55 )
56 opts = append(r.Options[:], opts...)
57 body.SetExtraFields(map[string]any{
58 "stream": "true",
59 })
60 path := "audio/transcriptions"
61 err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &raw, opts...)
62 return ssestream.NewStream[TranscriptionStreamEventUnion](ssestream.NewDecoder(raw), err)
63}
64
65// Represents a transcription response returned by model, based on the provided
66// input.
67type Transcription struct {
68 // The transcribed text.
69 Text string `json:"text,required"`
70 // The log probabilities of the tokens in the transcription. Only returned with the
71 // models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe` if `logprobs` is added
72 // to the `include` array.
73 Logprobs []TranscriptionLogprob `json:"logprobs"`
74 // Token usage statistics for the request.
75 Usage TranscriptionUsageUnion `json:"usage"`
76 // JSON contains metadata for fields, check presence with [respjson.Field.Valid].
77 JSON struct {
78 Text respjson.Field
79 Logprobs respjson.Field
80 Usage respjson.Field
81 ExtraFields map[string]respjson.Field
82 raw string
83 } `json:"-"`
84}
85
86// Returns the unmodified JSON received from the API
87func (r Transcription) RawJSON() string { return r.JSON.raw }
88func (r *Transcription) UnmarshalJSON(data []byte) error {
89 return apijson.UnmarshalRoot(data, r)
90}
91
92type TranscriptionLogprob struct {
93 // The token in the transcription.
94 Token string `json:"token"`
95 // The bytes of the token.
96 Bytes []float64 `json:"bytes"`
97 // The log probability of the token.
98 Logprob float64 `json:"logprob"`
99 // JSON contains metadata for fields, check presence with [respjson.Field.Valid].
100 JSON struct {
101 Token respjson.Field
102 Bytes respjson.Field
103 Logprob respjson.Field
104 ExtraFields map[string]respjson.Field
105 raw string
106 } `json:"-"`
107}
108
109// Returns the unmodified JSON received from the API
110func (r TranscriptionLogprob) RawJSON() string { return r.JSON.raw }
111func (r *TranscriptionLogprob) UnmarshalJSON(data []byte) error {
112 return apijson.UnmarshalRoot(data, r)
113}
114
115// TranscriptionUsageUnion contains all possible properties and values from
116// [TranscriptionUsageTokens], [TranscriptionUsageDuration].
117//
118// Use the [TranscriptionUsageUnion.AsAny] method to switch on the variant.
119//
120// Use the methods beginning with 'As' to cast the union to one of its variants.
121type TranscriptionUsageUnion struct {
122 // This field is from variant [TranscriptionUsageTokens].
123 InputTokens int64 `json:"input_tokens"`
124 // This field is from variant [TranscriptionUsageTokens].
125 OutputTokens int64 `json:"output_tokens"`
126 // This field is from variant [TranscriptionUsageTokens].
127 TotalTokens int64 `json:"total_tokens"`
128 // Any of "tokens", "duration".
129 Type string `json:"type"`
130 // This field is from variant [TranscriptionUsageTokens].
131 InputTokenDetails TranscriptionUsageTokensInputTokenDetails `json:"input_token_details"`
132 // This field is from variant [TranscriptionUsageDuration].
133 Duration float64 `json:"duration"`
134 JSON struct {
135 InputTokens respjson.Field
136 OutputTokens respjson.Field
137 TotalTokens respjson.Field
138 Type respjson.Field
139 InputTokenDetails respjson.Field
140 Duration respjson.Field
141 raw string
142 } `json:"-"`
143}
144
145// anyTranscriptionUsage is implemented by each variant of
146// [TranscriptionUsageUnion] to add type safety for the return type of
147// [TranscriptionUsageUnion.AsAny]
148type anyTranscriptionUsage interface {
149 implTranscriptionUsageUnion()
150}
151
152func (TranscriptionUsageTokens) implTranscriptionUsageUnion() {}
153func (TranscriptionUsageDuration) implTranscriptionUsageUnion() {}
154
155// Use the following switch statement to find the correct variant
156//
157// switch variant := TranscriptionUsageUnion.AsAny().(type) {
158// case openai.TranscriptionUsageTokens:
159// case openai.TranscriptionUsageDuration:
160// default:
161// fmt.Errorf("no variant present")
162// }
163func (u TranscriptionUsageUnion) AsAny() anyTranscriptionUsage {
164 switch u.Type {
165 case "tokens":
166 return u.AsTokens()
167 case "duration":
168 return u.AsDuration()
169 }
170 return nil
171}
172
173func (u TranscriptionUsageUnion) AsTokens() (v TranscriptionUsageTokens) {
174 apijson.UnmarshalRoot(json.RawMessage(u.JSON.raw), &v)
175 return
176}
177
178func (u TranscriptionUsageUnion) AsDuration() (v TranscriptionUsageDuration) {
179 apijson.UnmarshalRoot(json.RawMessage(u.JSON.raw), &v)
180 return
181}
182
183// Returns the unmodified JSON received from the API
184func (u TranscriptionUsageUnion) RawJSON() string { return u.JSON.raw }
185
186func (r *TranscriptionUsageUnion) UnmarshalJSON(data []byte) error {
187 return apijson.UnmarshalRoot(data, r)
188}
189
190// Usage statistics for models billed by token usage.
191type TranscriptionUsageTokens struct {
192 // Number of input tokens billed for this request.
193 InputTokens int64 `json:"input_tokens,required"`
194 // Number of output tokens generated.
195 OutputTokens int64 `json:"output_tokens,required"`
196 // Total number of tokens used (input + output).
197 TotalTokens int64 `json:"total_tokens,required"`
198 // The type of the usage object. Always `tokens` for this variant.
199 Type constant.Tokens `json:"type,required"`
200 // Details about the input tokens billed for this request.
201 InputTokenDetails TranscriptionUsageTokensInputTokenDetails `json:"input_token_details"`
202 // JSON contains metadata for fields, check presence with [respjson.Field.Valid].
203 JSON struct {
204 InputTokens respjson.Field
205 OutputTokens respjson.Field
206 TotalTokens respjson.Field
207 Type respjson.Field
208 InputTokenDetails respjson.Field
209 ExtraFields map[string]respjson.Field
210 raw string
211 } `json:"-"`
212}
213
214// Returns the unmodified JSON received from the API
215func (r TranscriptionUsageTokens) RawJSON() string { return r.JSON.raw }
216func (r *TranscriptionUsageTokens) UnmarshalJSON(data []byte) error {
217 return apijson.UnmarshalRoot(data, r)
218}
219
220// Details about the input tokens billed for this request.
221type TranscriptionUsageTokensInputTokenDetails struct {
222 // Number of audio tokens billed for this request.
223 AudioTokens int64 `json:"audio_tokens"`
224 // Number of text tokens billed for this request.
225 TextTokens int64 `json:"text_tokens"`
226 // JSON contains metadata for fields, check presence with [respjson.Field.Valid].
227 JSON struct {
228 AudioTokens respjson.Field
229 TextTokens respjson.Field
230 ExtraFields map[string]respjson.Field
231 raw string
232 } `json:"-"`
233}
234
235// Returns the unmodified JSON received from the API
236func (r TranscriptionUsageTokensInputTokenDetails) RawJSON() string { return r.JSON.raw }
237func (r *TranscriptionUsageTokensInputTokenDetails) UnmarshalJSON(data []byte) error {
238 return apijson.UnmarshalRoot(data, r)
239}
240
241// Usage statistics for models billed by audio input duration.
242type TranscriptionUsageDuration struct {
243 // Duration of the input audio in seconds.
244 Duration float64 `json:"duration,required"`
245 // The type of the usage object. Always `duration` for this variant.
246 Type constant.Duration `json:"type,required"`
247 // JSON contains metadata for fields, check presence with [respjson.Field.Valid].
248 JSON struct {
249 Duration respjson.Field
250 Type respjson.Field
251 ExtraFields map[string]respjson.Field
252 raw string
253 } `json:"-"`
254}
255
256// Returns the unmodified JSON received from the API
257func (r TranscriptionUsageDuration) RawJSON() string { return r.JSON.raw }
258func (r *TranscriptionUsageDuration) UnmarshalJSON(data []byte) error {
259 return apijson.UnmarshalRoot(data, r)
260}
261
262type TranscriptionInclude string
263
264const (
265 TranscriptionIncludeLogprobs TranscriptionInclude = "logprobs"
266)
267
268// TranscriptionStreamEventUnion contains all possible properties and values from
269// [TranscriptionTextDeltaEvent], [TranscriptionTextDoneEvent].
270//
271// Use the [TranscriptionStreamEventUnion.AsAny] method to switch on the variant.
272//
273// Use the methods beginning with 'As' to cast the union to one of its variants.
274type TranscriptionStreamEventUnion struct {
275 // This field is from variant [TranscriptionTextDeltaEvent].
276 Delta string `json:"delta"`
277 // Any of "transcript.text.delta", "transcript.text.done".
278 Type string `json:"type"`
279 // This field is a union of [[]TranscriptionTextDeltaEventLogprob],
280 // [[]TranscriptionTextDoneEventLogprob]
281 Logprobs TranscriptionStreamEventUnionLogprobs `json:"logprobs"`
282 // This field is from variant [TranscriptionTextDoneEvent].
283 Text string `json:"text"`
284 // This field is from variant [TranscriptionTextDoneEvent].
285 Usage TranscriptionTextDoneEventUsage `json:"usage"`
286 JSON struct {
287 Delta respjson.Field
288 Type respjson.Field
289 Logprobs respjson.Field
290 Text respjson.Field
291 Usage respjson.Field
292 raw string
293 } `json:"-"`
294}
295
296// anyTranscriptionStreamEvent is implemented by each variant of
297// [TranscriptionStreamEventUnion] to add type safety for the return type of
298// [TranscriptionStreamEventUnion.AsAny]
299type anyTranscriptionStreamEvent interface {
300 implTranscriptionStreamEventUnion()
301}
302
303func (TranscriptionTextDeltaEvent) implTranscriptionStreamEventUnion() {}
304func (TranscriptionTextDoneEvent) implTranscriptionStreamEventUnion() {}
305
306// Use the following switch statement to find the correct variant
307//
308// switch variant := TranscriptionStreamEventUnion.AsAny().(type) {
309// case openai.TranscriptionTextDeltaEvent:
310// case openai.TranscriptionTextDoneEvent:
311// default:
312// fmt.Errorf("no variant present")
313// }
314func (u TranscriptionStreamEventUnion) AsAny() anyTranscriptionStreamEvent {
315 switch u.Type {
316 case "transcript.text.delta":
317 return u.AsTranscriptTextDelta()
318 case "transcript.text.done":
319 return u.AsTranscriptTextDone()
320 }
321 return nil
322}
323
324func (u TranscriptionStreamEventUnion) AsTranscriptTextDelta() (v TranscriptionTextDeltaEvent) {
325 apijson.UnmarshalRoot(json.RawMessage(u.JSON.raw), &v)
326 return
327}
328
329func (u TranscriptionStreamEventUnion) AsTranscriptTextDone() (v TranscriptionTextDoneEvent) {
330 apijson.UnmarshalRoot(json.RawMessage(u.JSON.raw), &v)
331 return
332}
333
334// Returns the unmodified JSON received from the API
335func (u TranscriptionStreamEventUnion) RawJSON() string { return u.JSON.raw }
336
337func (r *TranscriptionStreamEventUnion) UnmarshalJSON(data []byte) error {
338 return apijson.UnmarshalRoot(data, r)
339}
340
341// TranscriptionStreamEventUnionLogprobs is an implicit subunion of
342// [TranscriptionStreamEventUnion]. TranscriptionStreamEventUnionLogprobs provides
343// convenient access to the sub-properties of the union.
344//
345// For type safety it is recommended to directly use a variant of the
346// [TranscriptionStreamEventUnion].
347//
348// If the underlying value is not a json object, one of the following properties
349// will be valid: OfTranscriptionTextDeltaEventLogprobs
350// OfTranscriptionTextDoneEventLogprobs]
351type TranscriptionStreamEventUnionLogprobs struct {
352 // This field will be present if the value is a
353 // [[]TranscriptionTextDeltaEventLogprob] instead of an object.
354 OfTranscriptionTextDeltaEventLogprobs []TranscriptionTextDeltaEventLogprob `json:",inline"`
355 // This field will be present if the value is a
356 // [[]TranscriptionTextDoneEventLogprob] instead of an object.
357 OfTranscriptionTextDoneEventLogprobs []TranscriptionTextDoneEventLogprob `json:",inline"`
358 JSON struct {
359 OfTranscriptionTextDeltaEventLogprobs respjson.Field
360 OfTranscriptionTextDoneEventLogprobs respjson.Field
361 raw string
362 } `json:"-"`
363}
364
365func (r *TranscriptionStreamEventUnionLogprobs) UnmarshalJSON(data []byte) error {
366 return apijson.UnmarshalRoot(data, r)
367}
368
369// Emitted when there is an additional text delta. This is also the first event
370// emitted when the transcription starts. Only emitted when you
371// [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
372// with the `Stream` parameter set to `true`.
373type TranscriptionTextDeltaEvent struct {
374 // The text delta that was additionally transcribed.
375 Delta string `json:"delta,required"`
376 // The type of the event. Always `transcript.text.delta`.
377 Type constant.TranscriptTextDelta `json:"type,required"`
378 // The log probabilities of the delta. Only included if you
379 // [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
380 // with the `include[]` parameter set to `logprobs`.
381 Logprobs []TranscriptionTextDeltaEventLogprob `json:"logprobs"`
382 // JSON contains metadata for fields, check presence with [respjson.Field.Valid].
383 JSON struct {
384 Delta respjson.Field
385 Type respjson.Field
386 Logprobs respjson.Field
387 ExtraFields map[string]respjson.Field
388 raw string
389 } `json:"-"`
390}
391
392// Returns the unmodified JSON received from the API
393func (r TranscriptionTextDeltaEvent) RawJSON() string { return r.JSON.raw }
394func (r *TranscriptionTextDeltaEvent) UnmarshalJSON(data []byte) error {
395 return apijson.UnmarshalRoot(data, r)
396}
397
398type TranscriptionTextDeltaEventLogprob struct {
399 // The token that was used to generate the log probability.
400 Token string `json:"token"`
401 // The bytes that were used to generate the log probability.
402 Bytes []int64 `json:"bytes"`
403 // The log probability of the token.
404 Logprob float64 `json:"logprob"`
405 // JSON contains metadata for fields, check presence with [respjson.Field.Valid].
406 JSON struct {
407 Token respjson.Field
408 Bytes respjson.Field
409 Logprob respjson.Field
410 ExtraFields map[string]respjson.Field
411 raw string
412 } `json:"-"`
413}
414
415// Returns the unmodified JSON received from the API
416func (r TranscriptionTextDeltaEventLogprob) RawJSON() string { return r.JSON.raw }
417func (r *TranscriptionTextDeltaEventLogprob) UnmarshalJSON(data []byte) error {
418 return apijson.UnmarshalRoot(data, r)
419}
420
421// Emitted when the transcription is complete. Contains the complete transcription
422// text. Only emitted when you
423// [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
424// with the `Stream` parameter set to `true`.
425type TranscriptionTextDoneEvent struct {
426 // The text that was transcribed.
427 Text string `json:"text,required"`
428 // The type of the event. Always `transcript.text.done`.
429 Type constant.TranscriptTextDone `json:"type,required"`
430 // The log probabilities of the individual tokens in the transcription. Only
431 // included if you
432 // [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
433 // with the `include[]` parameter set to `logprobs`.
434 Logprobs []TranscriptionTextDoneEventLogprob `json:"logprobs"`
435 // Usage statistics for models billed by token usage.
436 Usage TranscriptionTextDoneEventUsage `json:"usage"`
437 // JSON contains metadata for fields, check presence with [respjson.Field.Valid].
438 JSON struct {
439 Text respjson.Field
440 Type respjson.Field
441 Logprobs respjson.Field
442 Usage respjson.Field
443 ExtraFields map[string]respjson.Field
444 raw string
445 } `json:"-"`
446}
447
448// Returns the unmodified JSON received from the API
449func (r TranscriptionTextDoneEvent) RawJSON() string { return r.JSON.raw }
450func (r *TranscriptionTextDoneEvent) UnmarshalJSON(data []byte) error {
451 return apijson.UnmarshalRoot(data, r)
452}
453
454type TranscriptionTextDoneEventLogprob struct {
455 // The token that was used to generate the log probability.
456 Token string `json:"token"`
457 // The bytes that were used to generate the log probability.
458 Bytes []int64 `json:"bytes"`
459 // The log probability of the token.
460 Logprob float64 `json:"logprob"`
461 // JSON contains metadata for fields, check presence with [respjson.Field.Valid].
462 JSON struct {
463 Token respjson.Field
464 Bytes respjson.Field
465 Logprob respjson.Field
466 ExtraFields map[string]respjson.Field
467 raw string
468 } `json:"-"`
469}
470
471// Returns the unmodified JSON received from the API
472func (r TranscriptionTextDoneEventLogprob) RawJSON() string { return r.JSON.raw }
473func (r *TranscriptionTextDoneEventLogprob) UnmarshalJSON(data []byte) error {
474 return apijson.UnmarshalRoot(data, r)
475}
476
477// Usage statistics for models billed by token usage.
478type TranscriptionTextDoneEventUsage struct {
479 // Number of input tokens billed for this request.
480 InputTokens int64 `json:"input_tokens,required"`
481 // Number of output tokens generated.
482 OutputTokens int64 `json:"output_tokens,required"`
483 // Total number of tokens used (input + output).
484 TotalTokens int64 `json:"total_tokens,required"`
485 // The type of the usage object. Always `tokens` for this variant.
486 Type constant.Tokens `json:"type,required"`
487 // Details about the input tokens billed for this request.
488 InputTokenDetails TranscriptionTextDoneEventUsageInputTokenDetails `json:"input_token_details"`
489 // JSON contains metadata for fields, check presence with [respjson.Field.Valid].
490 JSON struct {
491 InputTokens respjson.Field
492 OutputTokens respjson.Field
493 TotalTokens respjson.Field
494 Type respjson.Field
495 InputTokenDetails respjson.Field
496 ExtraFields map[string]respjson.Field
497 raw string
498 } `json:"-"`
499}
500
501// Returns the unmodified JSON received from the API
502func (r TranscriptionTextDoneEventUsage) RawJSON() string { return r.JSON.raw }
503func (r *TranscriptionTextDoneEventUsage) UnmarshalJSON(data []byte) error {
504 return apijson.UnmarshalRoot(data, r)
505}
506
507// Details about the input tokens billed for this request.
508type TranscriptionTextDoneEventUsageInputTokenDetails struct {
509 // Number of audio tokens billed for this request.
510 AudioTokens int64 `json:"audio_tokens"`
511 // Number of text tokens billed for this request.
512 TextTokens int64 `json:"text_tokens"`
513 // JSON contains metadata for fields, check presence with [respjson.Field.Valid].
514 JSON struct {
515 AudioTokens respjson.Field
516 TextTokens respjson.Field
517 ExtraFields map[string]respjson.Field
518 raw string
519 } `json:"-"`
520}
521
522// Returns the unmodified JSON received from the API
523func (r TranscriptionTextDoneEventUsageInputTokenDetails) RawJSON() string { return r.JSON.raw }
524func (r *TranscriptionTextDoneEventUsageInputTokenDetails) UnmarshalJSON(data []byte) error {
525 return apijson.UnmarshalRoot(data, r)
526}
527
528type AudioTranscriptionNewParams struct {
529 // The audio file object (not file name) to transcribe, in one of these formats:
530 // flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
531 File io.Reader `json:"file,omitzero,required" format:"binary"`
532 // ID of the model to use. The options are `gpt-4o-transcribe`,
533 // `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
534 // Whisper V2 model).
535 Model AudioModel `json:"model,omitzero,required"`
536 // The language of the input audio. Supplying the input language in
537 // [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
538 // format will improve accuracy and latency.
539 Language param.Opt[string] `json:"language,omitzero"`
540 // An optional text to guide the model's style or continue a previous audio
541 // segment. The
542 // [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
543 // should match the audio language.
544 Prompt param.Opt[string] `json:"prompt,omitzero"`
545 // The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
546 // output more random, while lower values like 0.2 will make it more focused and
547 // deterministic. If set to 0, the model will use
548 // [log probability](https://en.wikipedia.org/wiki/Log_probability) to
549 // automatically increase the temperature until certain thresholds are hit.
550 Temperature param.Opt[float64] `json:"temperature,omitzero"`
551 // Controls how the audio is cut into chunks. When set to `"auto"`, the server
552 // first normalizes loudness and then uses voice activity detection (VAD) to choose
553 // boundaries. `server_vad` object can be provided to tweak VAD detection
554 // parameters manually. If unset, the audio is transcribed as a single block.
555 ChunkingStrategy AudioTranscriptionNewParamsChunkingStrategyUnion `json:"chunking_strategy,omitzero"`
556 // Additional information to include in the transcription response. `logprobs` will
557 // return the log probabilities of the tokens in the response to understand the
558 // model's confidence in the transcription. `logprobs` only works with
559 // response_format set to `json` and only with the models `gpt-4o-transcribe` and
560 // `gpt-4o-mini-transcribe`.
561 Include []TranscriptionInclude `json:"include,omitzero"`
562 // The format of the output, in one of these options: `json`, `text`, `srt`,
563 // `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
564 // the only supported format is `json`.
565 //
566 // Any of "json", "text", "srt", "verbose_json", "vtt".
567 ResponseFormat AudioResponseFormat `json:"response_format,omitzero"`
568 // The timestamp granularities to populate for this transcription.
569 // `response_format` must be set `verbose_json` to use timestamp granularities.
570 // Either or both of these options are supported: `word`, or `segment`. Note: There
571 // is no additional latency for segment timestamps, but generating word timestamps
572 // incurs additional latency.
573 //
574 // Any of "word", "segment".
575 TimestampGranularities []string `json:"timestamp_granularities,omitzero"`
576 paramObj
577}
578
579func (r AudioTranscriptionNewParams) MarshalMultipart() (data []byte, contentType string, err error) {
580 buf := bytes.NewBuffer(nil)
581 writer := multipart.NewWriter(buf)
582 err = apiform.MarshalRoot(r, writer)
583 if err == nil {
584 err = apiform.WriteExtras(writer, r.ExtraFields())
585 }
586 if err != nil {
587 writer.Close()
588 return nil, "", err
589 }
590 err = writer.Close()
591 if err != nil {
592 return nil, "", err
593 }
594 return buf.Bytes(), writer.FormDataContentType(), nil
595}
596
597// Only one field can be non-zero.
598//
599// Use [param.IsOmitted] to confirm if a field is set.
600type AudioTranscriptionNewParamsChunkingStrategyUnion struct {
601 // Construct this variant with constant.ValueOf[constant.Auto]()
602 OfAuto constant.Auto `json:",omitzero,inline"`
603 OfAudioTranscriptionNewsChunkingStrategyVadConfig *AudioTranscriptionNewParamsChunkingStrategyVadConfig `json:",omitzero,inline"`
604 paramUnion
605}
606
607func (u AudioTranscriptionNewParamsChunkingStrategyUnion) MarshalJSON() ([]byte, error) {
608 return param.MarshalUnion(u, u.OfAuto, u.OfAudioTranscriptionNewsChunkingStrategyVadConfig)
609}
610func (u *AudioTranscriptionNewParamsChunkingStrategyUnion) UnmarshalJSON(data []byte) error {
611 return apijson.UnmarshalRoot(data, u)
612}
613
614func (u *AudioTranscriptionNewParamsChunkingStrategyUnion) asAny() any {
615 if !param.IsOmitted(u.OfAuto) {
616 return &u.OfAuto
617 } else if !param.IsOmitted(u.OfAudioTranscriptionNewsChunkingStrategyVadConfig) {
618 return u.OfAudioTranscriptionNewsChunkingStrategyVadConfig
619 }
620 return nil
621}
622
623// The property Type is required.
624type AudioTranscriptionNewParamsChunkingStrategyVadConfig struct {
625 // Must be set to `server_vad` to enable manual chunking using server side VAD.
626 //
627 // Any of "server_vad".
628 Type string `json:"type,omitzero,required"`
629 // Amount of audio to include before the VAD detected speech (in milliseconds).
630 PrefixPaddingMs param.Opt[int64] `json:"prefix_padding_ms,omitzero"`
631 // Duration of silence to detect speech stop (in milliseconds). With shorter values
632 // the model will respond more quickly, but may jump in on short pauses from the
633 // user.
634 SilenceDurationMs param.Opt[int64] `json:"silence_duration_ms,omitzero"`
635 // Sensitivity threshold (0.0 to 1.0) for voice activity detection. A higher
636 // threshold will require louder audio to activate the model, and thus might
637 // perform better in noisy environments.
638 Threshold param.Opt[float64] `json:"threshold,omitzero"`
639 paramObj
640}
641
642func (r AudioTranscriptionNewParamsChunkingStrategyVadConfig) MarshalJSON() (data []byte, err error) {
643 type shadow AudioTranscriptionNewParamsChunkingStrategyVadConfig
644 return param.MarshalObject(r, (*shadow)(&r))
645}
646func (r *AudioTranscriptionNewParamsChunkingStrategyVadConfig) UnmarshalJSON(data []byte) error {
647 return apijson.UnmarshalRoot(data, r)
648}
649
650func init() {
651 apijson.RegisterFieldValidator[AudioTranscriptionNewParamsChunkingStrategyVadConfig](
652 "type", "server_vad",
653 )
654}