From ac428459ad9c9d9f1cedfe7672482fc6bb24b9aa Mon Sep 17 00:00:00 2001 From: ngel Date: Thu, 20 Mar 2025 11:59:22 +0100 Subject: [PATCH 1/2] Added RealTime Session creation with ephemeral API token generation --- README.md | 28 +++++ lib/openai.rb | 1 + lib/openai/client.rb | 4 + lib/openai/real_time.rb | 25 ++++ .../realtime_session_create_custom_model.yml | 112 +++++++++++++++++ .../realtime_session_create_default.yml | 112 +++++++++++++++++ .../realtime_session_create_with_params.yml | 113 ++++++++++++++++++ spec/openai/client/real_time_spec.rb | 42 +++++++ 8 files changed, 437 insertions(+) create mode 100644 lib/openai/real_time.rb create mode 100644 spec/fixtures/cassettes/realtime_session_create_custom_model.yml create mode 100644 spec/fixtures/cassettes/realtime_session_create_default.yml create mode 100644 spec/fixtures/cassettes/realtime_session_create_with_params.yml create mode 100644 spec/openai/client/real_time_spec.rb diff --git a/README.md b/README.md index 8e2f79e1..6ab2ee61 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,7 @@ Stream chats with the Responses API, transcribe and translate audio with Whisper - [Translate](#translate) - [Transcribe](#transcribe) - [Speech](#speech) + - [Real-Time](#real-time) - [Usage](#usage) - [Errors](#errors-1) - [Development](#development) @@ -1587,6 +1588,33 @@ File.binwrite('demo.mp3', response) # => mp3 file that plays: "This is a speech test!" ``` +### Real-Time + +The Real-Time API allows you to create a real-time session with an OpenAI model. It responds with a session object, plus a client_secret key which contains a usable ephemeral API token that can be used to authenticate browser clients for the Realtime API. + +```ruby +response = client.real_time.create(parameters: { model: "gpt-4o-realtime-preview-2024-12-17" }) +puts "ephemeral key: #{response.dig('client_secret', 'value')}" +# => "ephemeral key: ek_abc123" +``` + +Then in the client-side application, make a POST request to the Real-Time API with the ephemeral key and the SDP offer. + +```js +const OPENAI_REALTIME_URL = 'https://api.openai.com/v1/realtime/sessions' +const MODEL = 'gpt-4o-realtime-preview-2024-12-17' + +const response = await fetch(`${OPENAI_REALTIME_URL}?model=${MODEL}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/sdp', + 'Authorization': `Bearer ${ephemeralKey}`, + 'OpenAI-Beta': 'realtime=v1' + }, + body: offer.sdp +}) +``` + ### Usage The Usage API provides information about the cost of various OpenAI services within your organization. To use Admin APIs like Usage, you need to set an OPENAI_ADMIN_TOKEN, which can be generated [here](https://platform.openai.com/settings/organization/admin-keys). diff --git a/lib/openai.rb b/lib/openai.rb index 978206f4..5f354124 100644 --- a/lib/openai.rb +++ b/lib/openai.rb @@ -10,6 +10,7 @@ require_relative "openai/assistants" require_relative "openai/threads" require_relative "openai/messages" +require_relative "openai/real_time" require_relative "openai/runs" require_relative "openai/run_steps" require_relative "openai/vector_stores" diff --git a/lib/openai/client.rb b/lib/openai/client.rb index e06e5e8c..d392c053 100644 --- a/lib/openai/client.rb +++ b/lib/openai/client.rb @@ -92,6 +92,10 @@ def batches @batches ||= OpenAI::Batches.new(client: self) end + def real_time + @real_time ||= OpenAI::RealTime.new(client: self) + end + def moderations(parameters: {}) json_post(path: "/moderations", parameters: parameters) end diff --git a/lib/openai/real_time.rb b/lib/openai/real_time.rb new file mode 100644 index 00000000..37c6018b --- /dev/null +++ b/lib/openai/real_time.rb @@ -0,0 +1,25 @@ +module OpenAI + class RealTime + + DEFAULT_REALTIME_MODEL = 'gpt-4o-realtime-preview-2024-12-17' + + def initialize(client:) + @client = client.beta(realtime: 'v1') + end + + # Create a new real-time session with OpenAI. + # + # This method sets up a new session for real-time voice interaction with an OpenAI model. + # It returns session details that can be used to establish a WebRTC connection. + # + # By default, this method uses the 'gpt-4o-realtime-preview-2024-12-17' model unless specified otherwise. + # + # @param parameters [Hash] parameters for the session (see: https://platform.openai.com/docs/api-reference/realtime-sessions/create) + # @return [Hash] Session details including session ID, ICE servers, and other connection information + def create(parameters: {}) + parameters = parameters.merge(model: DEFAULT_REALTIME_MODEL) unless parameters[:model] + + @client.json_post(path: '/realtime/sessions', parameters: parameters) + end + end +end diff --git a/spec/fixtures/cassettes/realtime_session_create_custom_model.yml b/spec/fixtures/cassettes/realtime_session_create_custom_model.yml new file mode 100644 index 00000000..09842979 --- /dev/null +++ b/spec/fixtures/cassettes/realtime_session_create_custom_model.yml @@ -0,0 +1,112 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/realtime/sessions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-realtime-preview-2024-12-18"}' + headers: + Content-Type: + - application/json + Authorization: + - Bearer + Openai-Beta: + - realtime=v1 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + User-Agent: + - Ruby + response: + status: + code: 200 + message: OK + headers: + Date: + - Wed, 18 Dec 2024 12:35:56 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Openai-Organization: + - org-123456789 + Openai-Processing-Ms: + - '180' + Openai-Version: + - '2024-12-17' + Strict-Transport-Security: + - max-age=15724800; includeSubDomains + X-Ratelimit-Limit-Requests: + - '10000' + X-Ratelimit-Remaining-Requests: + - '9998' + X-Ratelimit-Reset-Requests: + - 6ms + X-Request-Id: + - req_987654321fedcba + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=def456; path=/; expires=Wed, 18-Dec-24 13:05:56 GMT; domain=.api.openai.com; + HttpOnly; Secure; SameSite=None + Server: + - cloudflare + Cf-Ray: + - 987654321fedcba-IAD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |- + { + "id": "session_real123abc", + "object": "realtime.session", + "model": "gpt-4o-realtime-preview-2024-12-18", + "expires_at": 1734626783, + "modalities": [ + "audio", + "text" + ], + "instructions": "", + "voice": "alloy", + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200 + }, + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": null, + "tool_choice": "auto", + "temperature": 0.8, + "max_response_output_tokens": "inf", + "tools": [], + "ice_servers": [ + { + "urls": ["stun:stun1.example.net"] + }, + { + "urls": ["turn:turn.example.org"], + "username": "user123", + "credential": "password123" + } + ], + "session_id": "session_real123abc", + "audio_input_config": { + "sampling_rate": 16000, + "channels": 1, + "encoding": "opus" + }, + "audio_output_config": { + "sampling_rate": 24000, + "channels": 1, + "encoding": "opus" + } + } + recorded_at: Wed, 18 Dec 2024 12:35:56 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/cassettes/realtime_session_create_default.yml b/spec/fixtures/cassettes/realtime_session_create_default.yml new file mode 100644 index 00000000..9b6bde5b --- /dev/null +++ b/spec/fixtures/cassettes/realtime_session_create_default.yml @@ -0,0 +1,112 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/realtime/sessions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-realtime-preview-2024-12-17"}' + headers: + Content-Type: + - application/json + Authorization: + - Bearer + Openai-Beta: + - realtime=v1 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + User-Agent: + - Ruby + response: + status: + code: 200 + message: OK + headers: + Date: + - Wed, 18 Dec 2024 12:35:56 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Openai-Organization: + - org-123456789 + Openai-Processing-Ms: + - '180' + Openai-Version: + - '2024-12-17' + Strict-Transport-Security: + - max-age=15724800; includeSubDomains + X-Ratelimit-Limit-Requests: + - '10000' + X-Ratelimit-Remaining-Requests: + - '9998' + X-Ratelimit-Reset-Requests: + - 6ms + X-Request-Id: + - req_987654321fedcba + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=def456; path=/; expires=Wed, 18-Dec-24 13:05:56 GMT; domain=.api.openai.com; + HttpOnly; Secure; SameSite=None + Server: + - cloudflare + Cf-Ray: + - 987654321fedcba-IAD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |- + { + "id": "session_real123abc", + "object": "realtime.session", + "model": "gpt-4o-realtime-preview-2024-12-17", + "expires_at": 1734626783, + "modalities": [ + "audio", + "text" + ], + "instructions": "", + "voice": "alloy", + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200 + }, + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": null, + "tool_choice": "auto", + "temperature": 0.8, + "max_response_output_tokens": "inf", + "tools": [], + "ice_servers": [ + { + "urls": ["stun:stun1.example.net"] + }, + { + "urls": ["turn:turn.example.org"], + "username": "user123", + "credential": "password123" + } + ], + "session_id": "session_real123abc", + "audio_input_config": { + "sampling_rate": 16000, + "channels": 1, + "encoding": "opus" + }, + "audio_output_config": { + "sampling_rate": 24000, + "channels": 1, + "encoding": "opus" + } + } + recorded_at: Wed, 18 Dec 2024 12:35:56 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/cassettes/realtime_session_create_with_params.yml b/spec/fixtures/cassettes/realtime_session_create_with_params.yml new file mode 100644 index 00000000..bc57574f --- /dev/null +++ b/spec/fixtures/cassettes/realtime_session_create_with_params.yml @@ -0,0 +1,113 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/realtime/sessions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-realtime-preview-2024-12-17","voice":"alloy","instructions":"You + are a helpful assistant."}' + headers: + Content-Type: + - application/json + Authorization: + - Bearer + Openai-Beta: + - realtime=v1 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + User-Agent: + - Ruby + response: + status: + code: 200 + message: OK + headers: + Date: + - Wed, 18 Dec 2024 12:35:56 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Openai-Organization: + - org-123456789 + Openai-Processing-Ms: + - '180' + Openai-Version: + - '2024-12-17' + Strict-Transport-Security: + - max-age=15724800; includeSubDomains + X-Ratelimit-Limit-Requests: + - '10000' + X-Ratelimit-Remaining-Requests: + - '9998' + X-Ratelimit-Reset-Requests: + - 6ms + X-Request-Id: + - req_987654321fedcba + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=def456; path=/; expires=Wed, 18-Dec-24 13:05:56 GMT; domain=.api.openai.com; + HttpOnly; Secure; SameSite=None + Server: + - cloudflare + Cf-Ray: + - 987654321fedcba-IAD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |- + { + "id": "session_real123abc", + "object": "realtime.session", + "model": "gpt-4o-realtime-preview-2024-12-17", + "expires_at": 1734626783, + "modalities": [ + "audio", + "text" + ], + "instructions": "You are a helpful assistant.", + "voice": "alloy", + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200 + }, + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": null, + "tool_choice": "auto", + "temperature": 0.8, + "max_response_output_tokens": "inf", + "tools": [], + "ice_servers": [ + { + "urls": ["stun:stun1.example.net"] + }, + { + "urls": ["turn:turn.example.org"], + "username": "user123", + "credential": "password123" + } + ], + "session_id": "session_real123abc", + "audio_input_config": { + "sampling_rate": 16000, + "channels": 1, + "encoding": "opus" + }, + "audio_output_config": { + "sampling_rate": 24000, + "channels": 1, + "encoding": "opus" + } + } + recorded_at: Wed, 18 Dec 2024 12:35:56 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/openai/client/real_time_spec.rb b/spec/openai/client/real_time_spec.rb new file mode 100644 index 00000000..e0516312 --- /dev/null +++ b/spec/openai/client/real_time_spec.rb @@ -0,0 +1,42 @@ +RSpec.describe OpenAI::RealTime do + let(:client) { OpenAI::Client.new } + let(:realtime) { client.real_time } + + describe '#create' do + context 'when no model is specified' do + it 'uses the default model' do + VCR.use_cassette('realtime_session_create_default') do + response = realtime.create + expect(response['model']).to eq(OpenAI::RealTime::DEFAULT_REALTIME_MODEL) + end + end + end + + context 'when a model is specified' do + it 'uses the specified model' do + custom_model = 'gpt-4o-realtime-preview-2024-12-18' + VCR.use_cassette('realtime_session_create_custom_model') do + response = realtime.create(parameters: { model: custom_model }) + expect(response['model']).to eq(custom_model) + end + end + end + + context 'with additional parameters' do + it 'sends all parameters to the API' do + parameters = { + model: 'gpt-4o-realtime-preview-2024-12-17', + voice: 'alloy', + instructions: 'You are a helpful assistant.' + } + + VCR.use_cassette('realtime_session_create_with_params') do + response = realtime.create(parameters: parameters) + expect(response['model']).to eq(parameters[:model]) + expect(response['voice']).to eq(parameters[:voice]) + expect(response['instructions']).to eq(parameters[:instructions]) + end + end + end + end +end \ No newline at end of file From 9c5f1f7933249992e70b032524c748bf6ebf60ec Mon Sep 17 00:00:00 2001 From: ngel Date: Thu, 20 Mar 2025 12:40:30 +0100 Subject: [PATCH 2/2] fix rubocop violations --- lib/openai/client.rb | 2 ++ lib/openai/real_time.rb | 13 +++++---- spec/openai/client/real_time_spec.rb | 42 ++++++++++++++-------------- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/lib/openai/client.rb b/lib/openai/client.rb index d392c053..9a4fd01e 100644 --- a/lib/openai/client.rb +++ b/lib/openai/client.rb @@ -1,3 +1,4 @@ +# rubocop:disable Metrics/ClassLength module OpenAI class Client include OpenAI::HTTP @@ -136,3 +137,4 @@ def inspect end end end +# rubocop:enable Metrics/ClassLength diff --git a/lib/openai/real_time.rb b/lib/openai/real_time.rb index 37c6018b..f3eb3e7d 100644 --- a/lib/openai/real_time.rb +++ b/lib/openai/real_time.rb @@ -1,10 +1,9 @@ module OpenAI class RealTime - - DEFAULT_REALTIME_MODEL = 'gpt-4o-realtime-preview-2024-12-17' + DEFAULT_REALTIME_MODEL = "gpt-4o-realtime-preview-2024-12-17".freeze def initialize(client:) - @client = client.beta(realtime: 'v1') + @client = client.beta(realtime: "v1") end # Create a new real-time session with OpenAI. @@ -12,14 +11,16 @@ def initialize(client:) # This method sets up a new session for real-time voice interaction with an OpenAI model. # It returns session details that can be used to establish a WebRTC connection. # - # By default, this method uses the 'gpt-4o-realtime-preview-2024-12-17' model unless specified otherwise. + # By default, this method uses the 'gpt-4o-realtime-preview-2024-12-17' model + # unless specified otherwise. # # @param parameters [Hash] parameters for the session (see: https://platform.openai.com/docs/api-reference/realtime-sessions/create) - # @return [Hash] Session details including session ID, ICE servers, and other connection information + # @return [Hash] Session details including session ID, ICE servers, and other + # connection information def create(parameters: {}) parameters = parameters.merge(model: DEFAULT_REALTIME_MODEL) unless parameters[:model] - @client.json_post(path: '/realtime/sessions', parameters: parameters) + @client.json_post(path: "/realtime/sessions", parameters: parameters) end end end diff --git a/spec/openai/client/real_time_spec.rb b/spec/openai/client/real_time_spec.rb index e0516312..9f480bb3 100644 --- a/spec/openai/client/real_time_spec.rb +++ b/spec/openai/client/real_time_spec.rb @@ -2,41 +2,41 @@ let(:client) { OpenAI::Client.new } let(:realtime) { client.real_time } - describe '#create' do - context 'when no model is specified' do - it 'uses the default model' do - VCR.use_cassette('realtime_session_create_default') do + describe "#create" do + context "when no model is specified" do + it "uses the default model" do + VCR.use_cassette("realtime_session_create_default") do response = realtime.create - expect(response['model']).to eq(OpenAI::RealTime::DEFAULT_REALTIME_MODEL) + expect(response["model"]).to eq(OpenAI::RealTime::DEFAULT_REALTIME_MODEL) end end end - context 'when a model is specified' do - it 'uses the specified model' do - custom_model = 'gpt-4o-realtime-preview-2024-12-18' - VCR.use_cassette('realtime_session_create_custom_model') do + context "when a model is specified" do + it "uses the specified model" do + custom_model = "gpt-4o-realtime-preview-2024-12-18" + VCR.use_cassette("realtime_session_create_custom_model") do response = realtime.create(parameters: { model: custom_model }) - expect(response['model']).to eq(custom_model) + expect(response["model"]).to eq(custom_model) end end end - context 'with additional parameters' do - it 'sends all parameters to the API' do + context "with additional parameters" do + it "sends all parameters to the API" do parameters = { - model: 'gpt-4o-realtime-preview-2024-12-17', - voice: 'alloy', - instructions: 'You are a helpful assistant.' + model: "gpt-4o-realtime-preview-2024-12-17", + voice: "alloy", + instructions: "You are a helpful assistant." } - - VCR.use_cassette('realtime_session_create_with_params') do + + VCR.use_cassette("realtime_session_create_with_params") do response = realtime.create(parameters: parameters) - expect(response['model']).to eq(parameters[:model]) - expect(response['voice']).to eq(parameters[:voice]) - expect(response['instructions']).to eq(parameters[:instructions]) + expect(response["model"]).to eq(parameters[:model]) + expect(response["voice"]).to eq(parameters[:voice]) + expect(response["instructions"]).to eq(parameters[:instructions]) end end end end -end \ No newline at end of file +end