Skip to content

Commit 351bd35

Browse files
committed
Add file search
1 parent cc1adc6 commit 351bd35

File tree

5 files changed

+204
-46
lines changed

5 files changed

+204
-46
lines changed

README.md

+16-10
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,6 @@ The engine options are currently "ada", "babbage", "curie" and "davinci". Hit th
5858
=> [", there lived a great"]
5959
```
6060

61-
### Search
62-
63-
Pass documents and a query string to get semantic search scores against each document:
64-
65-
```
66-
response = client.search(engine: "ada", documents: %w[washington hospital school], query: "president")
67-
puts response["data"].map { |d| d["score"] }
68-
=> [202.0, 48.052, 19.247]
69-
```
70-
7161
### Files
7262

7363
Put your data in a `.jsonl` file like this:
@@ -86,6 +76,22 @@ and pass the path to `client.files.upload` to upload it to OpenAI, and then inte
8676
client.files.delete(id: 123)
8777
```
8878

79+
### Search
80+
81+
Pass documents and a query string to get semantic search scores against each document:
82+
83+
```
84+
response = client.search(engine: "ada", documents: %w[washington hospital school], query: "president")
85+
puts response["data"].map { |d| d["score"] }
86+
=> [202.0, 48.052, 19.247]
87+
```
88+
89+
You can alternatively search using the ID of a file you've uploaded:
90+
91+
```
92+
client.search(engine: "ada", file: "abc123", query: "happy")
93+
```
94+
8995
## Development
9096

9197
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

lib/ruby/openai/client.rb

+2-4
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,14 @@ def files
2222
@files ||= OpenAI::Files.new(access_token: @access_token)
2323
end
2424

25-
def search(engine:, documents:, query:, version: default_version)
25+
def search(engine:, query:, documents: nil, file: nil, version: default_version)
2626
self.class.post(
2727
"/#{version}/engines/#{engine}/search",
2828
headers: {
2929
"Content-Type" => "application/json",
3030
"Authorization" => "Bearer #{@access_token}"
3131
},
32-
body: {
33-
documents: documents, query: query
34-
}.to_json
32+
body: { file: file, query: query }.merge(documents ? { documents: documents } : { file: file }).to_json
3533
)
3634
end
3735

spec/fixtures/cassettes/davinci_search_happy.yml

+63
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

spec/fixtures/cassettes/files_upload_search.yml

+64
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

spec/ruby/openai/client/search_spec.rb

+59-32
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,85 @@
11
RSpec.describe OpenAI::Client do
2-
describe "#search" do
3-
context "with a query" do
4-
let(:query) { "the president" }
2+
describe "#search", :vcr do
3+
let(:cassette) { "#{engine} search #{query}".downcase }
54

6-
context "with documents", :vcr do
7-
let(:documents) { %w[washington hospital school] }
5+
context "with a file" do
6+
let(:filename) { "puppy.jsonl" }
7+
let(:file) { File.join(RSPEC_ROOT, "fixtures/files", filename) }
8+
let!(:file_id) do
9+
response = VCR.use_cassette("files upload search") do
10+
OpenAI::Client.new.files.upload(parameters: { file: file, purpose: "search" })
11+
end
12+
JSON.parse(response.body)["id"]
13+
end
814

15+
context "with engine: davinci" do
16+
let(:query) { "happy" }
17+
let(:engine) { "davinci" }
918
let(:response) do
1019
OpenAI::Client.new.search(
1120
engine: engine,
12-
documents: documents,
21+
file: file_id,
1322
query: query
1423
)
1524
end
16-
let(:best_match) { JSON.parse(response.body)["data"].max_by { |d| d["score"] }["document"] }
17-
let(:cassette) { "#{engine} search #{query}".downcase }
1825

19-
context "with engine: ada" do
20-
let(:engine) { "ada" }
26+
it "finds the best match" do
27+
VCR.use_cassette(cassette) do
28+
expect(response.parsed_response['data'][0]["text"]).to eq("puppy A is happy")
29+
end
30+
end
31+
end
32+
end
33+
34+
context "with documents" do
35+
let(:documents) { %w[washington hospital school] }
36+
let(:query) { "the president" }
37+
let(:response) do
38+
OpenAI::Client.new.search(
39+
engine: engine,
40+
documents: documents,
41+
query: query
42+
)
43+
end
44+
let(:best_match) { JSON.parse(response.body)["data"].max_by { |d| d["score"] }["document"] }
45+
let(:cassette) { "#{engine} search #{query}".downcase }
46+
47+
context "with engine: ada" do
48+
let(:engine) { "ada" }
2149

22-
it "finds the best match" do
23-
VCR.use_cassette(cassette) do
24-
expect(documents[best_match]).to eq("washington")
25-
end
50+
it "finds the best match" do
51+
VCR.use_cassette(cassette) do
52+
expect(documents[best_match]).to eq("washington")
2653
end
2754
end
55+
end
2856

29-
context "with engine: babbage" do
30-
let(:engine) { "babbage" }
57+
context "with engine: babbage" do
58+
let(:engine) { "babbage" }
3159

32-
it "finds the best match" do
33-
VCR.use_cassette(cassette) do
34-
expect(documents[best_match]).to eq("washington")
35-
end
60+
it "finds the best match" do
61+
VCR.use_cassette(cassette) do
62+
expect(documents[best_match]).to eq("washington")
3663
end
3764
end
65+
end
3866

39-
context "with engine: curie" do
40-
let(:engine) { "curie" }
67+
context "with engine: curie" do
68+
let(:engine) { "curie" }
4169

42-
it "finds the best match" do
43-
VCR.use_cassette(cassette) do
44-
expect(documents[best_match]).to eq("washington")
45-
end
70+
it "finds the best match" do
71+
VCR.use_cassette(cassette) do
72+
expect(documents[best_match]).to eq("washington")
4673
end
4774
end
75+
end
4876

49-
context "with engine: davinci" do
50-
let(:engine) { "davinci" }
77+
context "with engine: davinci" do
78+
let(:engine) { "davinci" }
5179

52-
it "finds the best match" do
53-
VCR.use_cassette(cassette) do
54-
expect(documents[best_match]).to eq("washington")
55-
end
80+
it "finds the best match" do
81+
VCR.use_cassette(cassette) do
82+
expect(documents[best_match]).to eq("washington")
5683
end
5784
end
5885
end

0 commit comments

Comments
 (0)