1// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3package openai
4
5import (
6 "bytes"
7 "context"
8 "encoding/json"
9 "io"
10 "mime/multipart"
11 "net/http"
12
13 "github.com/openai/openai-go/internal/apiform"
14 "github.com/openai/openai-go/internal/apijson"
15 "github.com/openai/openai-go/internal/requestconfig"
16 "github.com/openai/openai-go/option"
17 "github.com/openai/openai-go/packages/param"
18 "github.com/openai/openai-go/packages/resp"
19 "github.com/openai/openai-go/packages/ssestream"
20 "github.com/openai/openai-go/shared/constant"
21)
22
23// AudioTranscriptionService contains methods and other services that help with
24// interacting with the openai API.
25//
26// Note, unlike clients, this service does not read variables from the environment
27// automatically. You should not instantiate this service directly, and instead use
28// the [NewAudioTranscriptionService] method instead.
29type AudioTranscriptionService struct {
30 Options []option.RequestOption
31}
32
33// NewAudioTranscriptionService generates a new service that applies the given
34// options to each request. These options are applied after the parent client's
35// options (if there is one), and before any request-specific options.
36func NewAudioTranscriptionService(opts ...option.RequestOption) (r AudioTranscriptionService) {
37 r = AudioTranscriptionService{}
38 r.Options = opts
39 return
40}
41
42// Transcribes audio into the input language.
43func (r *AudioTranscriptionService) New(ctx context.Context, body AudioTranscriptionNewParams, opts ...option.RequestOption) (res *Transcription, err error) {
44 opts = append(r.Options[:], opts...)
45 path := "audio/transcriptions"
46 err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &res, opts...)
47 return
48}
49
50// Transcribes audio into the input language.
51func (r *AudioTranscriptionService) NewStreaming(ctx context.Context, body AudioTranscriptionNewParams, opts ...option.RequestOption) (stream *ssestream.Stream[TranscriptionStreamEventUnion]) {
52 var (
53 raw *http.Response
54 err error
55 )
56 opts = append(r.Options[:], opts...)
57 opts = append([]option.RequestOption{option.WithJSONSet("stream", true)}, opts...)
58 path := "audio/transcriptions"
59 err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &raw, opts...)
60 return ssestream.NewStream[TranscriptionStreamEventUnion](ssestream.NewDecoder(raw), err)
61}
62
63// Represents a transcription response returned by model, based on the provided
64// input.
65type Transcription struct {
66 // The transcribed text.
67 Text string `json:"text,required"`
68 // The log probabilities of the tokens in the transcription. Only returned with the
69 // models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe` if `logprobs` is added
70 // to the `include` array.
71 Logprobs []TranscriptionLogprob `json:"logprobs"`
72 // Metadata for the response, check the presence of optional fields with the
73 // [resp.Field.IsPresent] method.
74 JSON struct {
75 Text resp.Field
76 Logprobs resp.Field
77 ExtraFields map[string]resp.Field
78 raw string
79 } `json:"-"`
80}
81
82// Returns the unmodified JSON received from the API
83func (r Transcription) RawJSON() string { return r.JSON.raw }
84func (r *Transcription) UnmarshalJSON(data []byte) error {
85 return apijson.UnmarshalRoot(data, r)
86}
87
88type TranscriptionLogprob struct {
89 // The token in the transcription.
90 Token string `json:"token"`
91 // The bytes of the token.
92 Bytes []float64 `json:"bytes"`
93 // The log probability of the token.
94 Logprob float64 `json:"logprob"`
95 // Metadata for the response, check the presence of optional fields with the
96 // [resp.Field.IsPresent] method.
97 JSON struct {
98 Token resp.Field
99 Bytes resp.Field
100 Logprob resp.Field
101 ExtraFields map[string]resp.Field
102 raw string
103 } `json:"-"`
104}
105
106// Returns the unmodified JSON received from the API
107func (r TranscriptionLogprob) RawJSON() string { return r.JSON.raw }
108func (r *TranscriptionLogprob) UnmarshalJSON(data []byte) error {
109 return apijson.UnmarshalRoot(data, r)
110}
111
112type TranscriptionInclude string
113
114const (
115 TranscriptionIncludeLogprobs TranscriptionInclude = "logprobs"
116)
117
118// TranscriptionStreamEventUnion contains all possible properties and values from
119// [TranscriptionTextDeltaEvent], [TranscriptionTextDoneEvent].
120//
121// Use the [TranscriptionStreamEventUnion.AsAny] method to switch on the variant.
122//
123// Use the methods beginning with 'As' to cast the union to one of its variants.
124type TranscriptionStreamEventUnion struct {
125 // This field is from variant [TranscriptionTextDeltaEvent].
126 Delta string `json:"delta"`
127 // Any of "transcript.text.delta", "transcript.text.done".
128 Type string `json:"type"`
129 // This field is a union of [[]TranscriptionTextDeltaEventLogprob],
130 // [[]TranscriptionTextDoneEventLogprob]
131 Logprobs TranscriptionStreamEventUnionLogprobs `json:"logprobs"`
132 // This field is from variant [TranscriptionTextDoneEvent].
133 Text string `json:"text"`
134 JSON struct {
135 Delta resp.Field
136 Type resp.Field
137 Logprobs resp.Field
138 Text resp.Field
139 raw string
140 } `json:"-"`
141}
142
143// Use the following switch statement to find the correct variant
144//
145// switch variant := TranscriptionStreamEventUnion.AsAny().(type) {
146// case TranscriptionTextDeltaEvent:
147// case TranscriptionTextDoneEvent:
148// default:
149// fmt.Errorf("no variant present")
150// }
151func (u TranscriptionStreamEventUnion) AsAny() any {
152 switch u.Type {
153 case "transcript.text.delta":
154 return u.AsTranscriptTextDelta()
155 case "transcript.text.done":
156 return u.AsTranscriptTextDone()
157 }
158 return nil
159}
160
161func (u TranscriptionStreamEventUnion) AsTranscriptTextDelta() (v TranscriptionTextDeltaEvent) {
162 apijson.UnmarshalRoot(json.RawMessage(u.JSON.raw), &v)
163 return
164}
165
166func (u TranscriptionStreamEventUnion) AsTranscriptTextDone() (v TranscriptionTextDoneEvent) {
167 apijson.UnmarshalRoot(json.RawMessage(u.JSON.raw), &v)
168 return
169}
170
171// Returns the unmodified JSON received from the API
172func (u TranscriptionStreamEventUnion) RawJSON() string { return u.JSON.raw }
173
174func (r *TranscriptionStreamEventUnion) UnmarshalJSON(data []byte) error {
175 return apijson.UnmarshalRoot(data, r)
176}
177
178// TranscriptionStreamEventUnionLogprobs is an implicit subunion of
179// [TranscriptionStreamEventUnion]. TranscriptionStreamEventUnionLogprobs provides
180// convenient access to the sub-properties of the union.
181//
182// For type safety it is recommended to directly use a variant of the
183// [TranscriptionStreamEventUnion].
184//
185// If the underlying value is not a json object, one of the following properties
186// will be valid: OfTranscriptionTextDeltaEventLogprobs
187// OfTranscriptionTextDoneEventLogprobs]
188type TranscriptionStreamEventUnionLogprobs struct {
189 // This field will be present if the value is a
190 // [[]TranscriptionTextDeltaEventLogprob] instead of an object.
191 OfTranscriptionTextDeltaEventLogprobs []TranscriptionTextDeltaEventLogprob `json:",inline"`
192 // This field will be present if the value is a
193 // [[]TranscriptionTextDoneEventLogprob] instead of an object.
194 OfTranscriptionTextDoneEventLogprobs []TranscriptionTextDoneEventLogprob `json:",inline"`
195 JSON struct {
196 OfTranscriptionTextDeltaEventLogprobs resp.Field
197 OfTranscriptionTextDoneEventLogprobs resp.Field
198 raw string
199 } `json:"-"`
200}
201
202func (r *TranscriptionStreamEventUnionLogprobs) UnmarshalJSON(data []byte) error {
203 return apijson.UnmarshalRoot(data, r)
204}
205
206// Emitted when there is an additional text delta. This is also the first event
207// emitted when the transcription starts. Only emitted when you
208// [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
209// with the `Stream` parameter set to `true`.
210type TranscriptionTextDeltaEvent struct {
211 // The text delta that was additionally transcribed.
212 Delta string `json:"delta,required"`
213 // The type of the event. Always `transcript.text.delta`.
214 Type constant.TranscriptTextDelta `json:"type,required"`
215 // The log probabilities of the delta. Only included if you
216 // [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
217 // with the `include[]` parameter set to `logprobs`.
218 Logprobs []TranscriptionTextDeltaEventLogprob `json:"logprobs"`
219 // Metadata for the response, check the presence of optional fields with the
220 // [resp.Field.IsPresent] method.
221 JSON struct {
222 Delta resp.Field
223 Type resp.Field
224 Logprobs resp.Field
225 ExtraFields map[string]resp.Field
226 raw string
227 } `json:"-"`
228}
229
230// Returns the unmodified JSON received from the API
231func (r TranscriptionTextDeltaEvent) RawJSON() string { return r.JSON.raw }
232func (r *TranscriptionTextDeltaEvent) UnmarshalJSON(data []byte) error {
233 return apijson.UnmarshalRoot(data, r)
234}
235
236type TranscriptionTextDeltaEventLogprob struct {
237 // The token that was used to generate the log probability.
238 Token string `json:"token"`
239 // The bytes that were used to generate the log probability.
240 Bytes []interface{} `json:"bytes"`
241 // The log probability of the token.
242 Logprob float64 `json:"logprob"`
243 // Metadata for the response, check the presence of optional fields with the
244 // [resp.Field.IsPresent] method.
245 JSON struct {
246 Token resp.Field
247 Bytes resp.Field
248 Logprob resp.Field
249 ExtraFields map[string]resp.Field
250 raw string
251 } `json:"-"`
252}
253
254// Returns the unmodified JSON received from the API
255func (r TranscriptionTextDeltaEventLogprob) RawJSON() string { return r.JSON.raw }
256func (r *TranscriptionTextDeltaEventLogprob) UnmarshalJSON(data []byte) error {
257 return apijson.UnmarshalRoot(data, r)
258}
259
260// Emitted when the transcription is complete. Contains the complete transcription
261// text. Only emitted when you
262// [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
263// with the `Stream` parameter set to `true`.
264type TranscriptionTextDoneEvent struct {
265 // The text that was transcribed.
266 Text string `json:"text,required"`
267 // The type of the event. Always `transcript.text.done`.
268 Type constant.TranscriptTextDone `json:"type,required"`
269 // The log probabilities of the individual tokens in the transcription. Only
270 // included if you
271 // [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
272 // with the `include[]` parameter set to `logprobs`.
273 Logprobs []TranscriptionTextDoneEventLogprob `json:"logprobs"`
274 // Metadata for the response, check the presence of optional fields with the
275 // [resp.Field.IsPresent] method.
276 JSON struct {
277 Text resp.Field
278 Type resp.Field
279 Logprobs resp.Field
280 ExtraFields map[string]resp.Field
281 raw string
282 } `json:"-"`
283}
284
285// Returns the unmodified JSON received from the API
286func (r TranscriptionTextDoneEvent) RawJSON() string { return r.JSON.raw }
287func (r *TranscriptionTextDoneEvent) UnmarshalJSON(data []byte) error {
288 return apijson.UnmarshalRoot(data, r)
289}
290
291type TranscriptionTextDoneEventLogprob struct {
292 // The token that was used to generate the log probability.
293 Token string `json:"token"`
294 // The bytes that were used to generate the log probability.
295 Bytes []interface{} `json:"bytes"`
296 // The log probability of the token.
297 Logprob float64 `json:"logprob"`
298 // Metadata for the response, check the presence of optional fields with the
299 // [resp.Field.IsPresent] method.
300 JSON struct {
301 Token resp.Field
302 Bytes resp.Field
303 Logprob resp.Field
304 ExtraFields map[string]resp.Field
305 raw string
306 } `json:"-"`
307}
308
309// Returns the unmodified JSON received from the API
310func (r TranscriptionTextDoneEventLogprob) RawJSON() string { return r.JSON.raw }
311func (r *TranscriptionTextDoneEventLogprob) UnmarshalJSON(data []byte) error {
312 return apijson.UnmarshalRoot(data, r)
313}
314
315type AudioTranscriptionNewParams struct {
316 // The audio file object (not file name) to transcribe, in one of these formats:
317 // flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
318 File io.Reader `json:"file,required" format:"binary"`
319 // ID of the model to use. The options are `gpt-4o-transcribe`,
320 // `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
321 // Whisper V2 model).
322 Model AudioModel `json:"model,omitzero,required"`
323 // The language of the input audio. Supplying the input language in
324 // [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
325 // format will improve accuracy and latency.
326 Language param.Opt[string] `json:"language,omitzero"`
327 // An optional text to guide the model's style or continue a previous audio
328 // segment. The
329 // [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
330 // should match the audio language.
331 Prompt param.Opt[string] `json:"prompt,omitzero"`
332 // The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
333 // output more random, while lower values like 0.2 will make it more focused and
334 // deterministic. If set to 0, the model will use
335 // [log probability](https://en.wikipedia.org/wiki/Log_probability) to
336 // automatically increase the temperature until certain thresholds are hit.
337 Temperature param.Opt[float64] `json:"temperature,omitzero"`
338 // Additional information to include in the transcription response. `logprobs` will
339 // return the log probabilities of the tokens in the response to understand the
340 // model's confidence in the transcription. `logprobs` only works with
341 // response_format set to `json` and only with the models `gpt-4o-transcribe` and
342 // `gpt-4o-mini-transcribe`.
343 Include []TranscriptionInclude `json:"include,omitzero"`
344 // The format of the output, in one of these options: `json`, `text`, `srt`,
345 // `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
346 // the only supported format is `json`.
347 //
348 // Any of "json", "text", "srt", "verbose_json", "vtt".
349 ResponseFormat AudioResponseFormat `json:"response_format,omitzero"`
350 // The timestamp granularities to populate for this transcription.
351 // `response_format` must be set `verbose_json` to use timestamp granularities.
352 // Either or both of these options are supported: `word`, or `segment`. Note: There
353 // is no additional latency for segment timestamps, but generating word timestamps
354 // incurs additional latency.
355 TimestampGranularities []string `json:"timestamp_granularities,omitzero"`
356 paramObj
357}
358
359// IsPresent returns true if the field's value is not omitted and not the JSON
360// "null". To check if this field is omitted, use [param.IsOmitted].
361func (f AudioTranscriptionNewParams) IsPresent() bool { return !param.IsOmitted(f) && !f.IsNull() }
362
363func (r AudioTranscriptionNewParams) MarshalMultipart() (data []byte, contentType string, err error) {
364 buf := bytes.NewBuffer(nil)
365 writer := multipart.NewWriter(buf)
366 err = apiform.MarshalRoot(r, writer)
367 if err != nil {
368 writer.Close()
369 return nil, "", err
370 }
371 err = writer.Close()
372 if err != nil {
373 return nil, "", err
374 }
375 return buf.Bytes(), writer.FormDataContentType(), nil
376}