diff --git a/README.md b/README.md index 8e2f79e1..6ab2ee61 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,7 @@ Stream chats with the Responses API, transcribe and translate audio with Whisper - [Translate](#translate) - [Transcribe](#transcribe) - [Speech](#speech) + - [Real-Time](#real-time) - [Usage](#usage) - [Errors](#errors-1) - [Development](#development) @@ -1587,6 +1588,33 @@ File.binwrite('demo.mp3', response) # => mp3 file that plays: "This is a speech test!" ``` +### Real-Time + +The Real-Time API allows you to create a real-time session with an OpenAI model. It responds with a session object, plus a client_secret key which contains a usable ephemeral API token that can be used to authenticate browser clients for the Realtime API. + +```ruby +response = client.real_time.create(parameters: { model: "gpt-4o-realtime-preview-2024-12-17" }) +puts "ephemeral key: #{response.dig('client_secret', 'value')}" +# => "ephemeral key: ek_abc123" +``` + +Then in the client-side application, make a POST request to the Real-Time API with the ephemeral key and the SDP offer. + +```js +const OPENAI_REALTIME_URL = 'https://api.openai.com/v1/realtime/sessions' +const MODEL = 'gpt-4o-realtime-preview-2024-12-17' + +const response = await fetch(`${OPENAI_REALTIME_URL}?model=${MODEL}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/sdp', + 'Authorization': `Bearer ${ephemeralKey}`, + 'OpenAI-Beta': 'realtime=v1' + }, + body: offer.sdp +}) +``` + ### Usage The Usage API provides information about the cost of various OpenAI services within your organization. To use Admin APIs like Usage, you need to set an OPENAI_ADMIN_TOKEN, which can be generated [here](https://platform.openai.com/settings/organization/admin-keys). diff --git a/lib/openai.rb b/lib/openai.rb index 978206f4..5f354124 100644 --- a/lib/openai.rb +++ b/lib/openai.rb @@ -10,6 +10,7 @@ require_relative "openai/assistants" require_relative "openai/threads" require_relative "openai/messages" +require_relative "openai/real_time" require_relative "openai/runs" require_relative "openai/run_steps" require_relative "openai/vector_stores" diff --git a/lib/openai/client.rb b/lib/openai/client.rb index e06e5e8c..9a4fd01e 100644 --- a/lib/openai/client.rb +++ b/lib/openai/client.rb @@ -1,3 +1,4 @@ +# rubocop:disable Metrics/ClassLength module OpenAI class Client include OpenAI::HTTP @@ -92,6 +93,10 @@ def batches @batches ||= OpenAI::Batches.new(client: self) end + def real_time + @real_time ||= OpenAI::RealTime.new(client: self) + end + def moderations(parameters: {}) json_post(path: "/moderations", parameters: parameters) end @@ -132,3 +137,4 @@ def inspect end end end +# rubocop:enable Metrics/ClassLength diff --git a/lib/openai/real_time.rb b/lib/openai/real_time.rb new file mode 100644 index 00000000..f3eb3e7d --- /dev/null +++ b/lib/openai/real_time.rb @@ -0,0 +1,26 @@ +module OpenAI + class RealTime + DEFAULT_REALTIME_MODEL = "gpt-4o-realtime-preview-2024-12-17".freeze + + def initialize(client:) + @client = client.beta(realtime: "v1") + end + + # Create a new real-time session with OpenAI. + # + # This method sets up a new session for real-time voice interaction with an OpenAI model. + # It returns session details that can be used to establish a WebRTC connection. + # + # By default, this method uses the 'gpt-4o-realtime-preview-2024-12-17' model + # unless specified otherwise. + # + # @param parameters [Hash] parameters for the session (see: https://platform.openai.com/docs/api-reference/realtime-sessions/create) + # @return [Hash] Session details including session ID, ICE servers, and other + # connection information + def create(parameters: {}) + parameters = parameters.merge(model: DEFAULT_REALTIME_MODEL) unless parameters[:model] + + @client.json_post(path: "/realtime/sessions", parameters: parameters) + end + end +end diff --git a/spec/fixtures/cassettes/realtime_session_create_custom_model.yml b/spec/fixtures/cassettes/realtime_session_create_custom_model.yml new file mode 100644 index 00000000..09842979 --- /dev/null +++ b/spec/fixtures/cassettes/realtime_session_create_custom_model.yml @@ -0,0 +1,112 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/realtime/sessions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-realtime-preview-2024-12-18"}' + headers: + Content-Type: + - application/json + Authorization: + - Bearer + Openai-Beta: + - realtime=v1 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + User-Agent: + - Ruby + response: + status: + code: 200 + message: OK + headers: + Date: + - Wed, 18 Dec 2024 12:35:56 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Openai-Organization: + - org-123456789 + Openai-Processing-Ms: + - '180' + Openai-Version: + - '2024-12-17' + Strict-Transport-Security: + - max-age=15724800; includeSubDomains + X-Ratelimit-Limit-Requests: + - '10000' + X-Ratelimit-Remaining-Requests: + - '9998' + X-Ratelimit-Reset-Requests: + - 6ms + X-Request-Id: + - req_987654321fedcba + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=def456; path=/; expires=Wed, 18-Dec-24 13:05:56 GMT; domain=.api.openai.com; + HttpOnly; Secure; SameSite=None + Server: + - cloudflare + Cf-Ray: + - 987654321fedcba-IAD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |- + { + "id": "session_real123abc", + "object": "realtime.session", + "model": "gpt-4o-realtime-preview-2024-12-18", + "expires_at": 1734626783, + "modalities": [ + "audio", + "text" + ], + "instructions": "", + "voice": "alloy", + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200 + }, + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": null, + "tool_choice": "auto", + "temperature": 0.8, + "max_response_output_tokens": "inf", + "tools": [], + "ice_servers": [ + { + "urls": ["stun:stun1.example.net"] + }, + { + "urls": ["turn:turn.example.org"], + "username": "user123", + "credential": "password123" + } + ], + "session_id": "session_real123abc", + "audio_input_config": { + "sampling_rate": 16000, + "channels": 1, + "encoding": "opus" + }, + "audio_output_config": { + "sampling_rate": 24000, + "channels": 1, + "encoding": "opus" + } + } + recorded_at: Wed, 18 Dec 2024 12:35:56 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/cassettes/realtime_session_create_default.yml b/spec/fixtures/cassettes/realtime_session_create_default.yml new file mode 100644 index 00000000..9b6bde5b --- /dev/null +++ b/spec/fixtures/cassettes/realtime_session_create_default.yml @@ -0,0 +1,112 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/realtime/sessions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-realtime-preview-2024-12-17"}' + headers: + Content-Type: + - application/json + Authorization: + - Bearer + Openai-Beta: + - realtime=v1 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + User-Agent: + - Ruby + response: + status: + code: 200 + message: OK + headers: + Date: + - Wed, 18 Dec 2024 12:35:56 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Openai-Organization: + - org-123456789 + Openai-Processing-Ms: + - '180' + Openai-Version: + - '2024-12-17' + Strict-Transport-Security: + - max-age=15724800; includeSubDomains + X-Ratelimit-Limit-Requests: + - '10000' + X-Ratelimit-Remaining-Requests: + - '9998' + X-Ratelimit-Reset-Requests: + - 6ms + X-Request-Id: + - req_987654321fedcba + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=def456; path=/; expires=Wed, 18-Dec-24 13:05:56 GMT; domain=.api.openai.com; + HttpOnly; Secure; SameSite=None + Server: + - cloudflare + Cf-Ray: + - 987654321fedcba-IAD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |- + { + "id": "session_real123abc", + "object": "realtime.session", + "model": "gpt-4o-realtime-preview-2024-12-17", + "expires_at": 1734626783, + "modalities": [ + "audio", + "text" + ], + "instructions": "", + "voice": "alloy", + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200 + }, + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": null, + "tool_choice": "auto", + "temperature": 0.8, + "max_response_output_tokens": "inf", + "tools": [], + "ice_servers": [ + { + "urls": ["stun:stun1.example.net"] + }, + { + "urls": ["turn:turn.example.org"], + "username": "user123", + "credential": "password123" + } + ], + "session_id": "session_real123abc", + "audio_input_config": { + "sampling_rate": 16000, + "channels": 1, + "encoding": "opus" + }, + "audio_output_config": { + "sampling_rate": 24000, + "channels": 1, + "encoding": "opus" + } + } + recorded_at: Wed, 18 Dec 2024 12:35:56 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/cassettes/realtime_session_create_with_params.yml b/spec/fixtures/cassettes/realtime_session_create_with_params.yml new file mode 100644 index 00000000..bc57574f --- /dev/null +++ b/spec/fixtures/cassettes/realtime_session_create_with_params.yml @@ -0,0 +1,113 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/realtime/sessions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-realtime-preview-2024-12-17","voice":"alloy","instructions":"You + are a helpful assistant."}' + headers: + Content-Type: + - application/json + Authorization: + - Bearer + Openai-Beta: + - realtime=v1 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + User-Agent: + - Ruby + response: + status: + code: 200 + message: OK + headers: + Date: + - Wed, 18 Dec 2024 12:35:56 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Openai-Organization: + - org-123456789 + Openai-Processing-Ms: + - '180' + Openai-Version: + - '2024-12-17' + Strict-Transport-Security: + - max-age=15724800; includeSubDomains + X-Ratelimit-Limit-Requests: + - '10000' + X-Ratelimit-Remaining-Requests: + - '9998' + X-Ratelimit-Reset-Requests: + - 6ms + X-Request-Id: + - req_987654321fedcba + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=def456; path=/; expires=Wed, 18-Dec-24 13:05:56 GMT; domain=.api.openai.com; + HttpOnly; Secure; SameSite=None + Server: + - cloudflare + Cf-Ray: + - 987654321fedcba-IAD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |- + { + "id": "session_real123abc", + "object": "realtime.session", + "model": "gpt-4o-realtime-preview-2024-12-17", + "expires_at": 1734626783, + "modalities": [ + "audio", + "text" + ], + "instructions": "You are a helpful assistant.", + "voice": "alloy", + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200 + }, + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": null, + "tool_choice": "auto", + "temperature": 0.8, + "max_response_output_tokens": "inf", + "tools": [], + "ice_servers": [ + { + "urls": ["stun:stun1.example.net"] + }, + { + "urls": ["turn:turn.example.org"], + "username": "user123", + "credential": "password123" + } + ], + "session_id": "session_real123abc", + "audio_input_config": { + "sampling_rate": 16000, + "channels": 1, + "encoding": "opus" + }, + "audio_output_config": { + "sampling_rate": 24000, + "channels": 1, + "encoding": "opus" + } + } + recorded_at: Wed, 18 Dec 2024 12:35:56 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/openai/client/real_time_spec.rb b/spec/openai/client/real_time_spec.rb new file mode 100644 index 00000000..9f480bb3 --- /dev/null +++ b/spec/openai/client/real_time_spec.rb @@ -0,0 +1,42 @@ +RSpec.describe OpenAI::RealTime do + let(:client) { OpenAI::Client.new } + let(:realtime) { client.real_time } + + describe "#create" do + context "when no model is specified" do + it "uses the default model" do + VCR.use_cassette("realtime_session_create_default") do + response = realtime.create + expect(response["model"]).to eq(OpenAI::RealTime::DEFAULT_REALTIME_MODEL) + end + end + end + + context "when a model is specified" do + it "uses the specified model" do + custom_model = "gpt-4o-realtime-preview-2024-12-18" + VCR.use_cassette("realtime_session_create_custom_model") do + response = realtime.create(parameters: { model: custom_model }) + expect(response["model"]).to eq(custom_model) + end + end + end + + context "with additional parameters" do + it "sends all parameters to the API" do + parameters = { + model: "gpt-4o-realtime-preview-2024-12-17", + voice: "alloy", + instructions: "You are a helpful assistant." + } + + VCR.use_cassette("realtime_session_create_with_params") do + response = realtime.create(parameters: parameters) + expect(response["model"]).to eq(parameters[:model]) + expect(response["voice"]).to eq(parameters[:voice]) + expect(response["instructions"]).to eq(parameters[:instructions]) + end + end + end + end +end