From 175625409ee24377bf1c77687208346c98ab0f0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Thu, 9 Jan 2025 13:37:23 +0100 Subject: [PATCH 1/2] PHPORM-277 Add Builder::vectorSearch() --- src/Eloquent/Builder.php | 23 ++++++++++++ src/Query/Builder.php | 35 ++++++++++++++++++ tests/AtlasSearchTest.php | 78 +++++++++++++++++++++++++++++++++++---- 3 files changed, 128 insertions(+), 8 deletions(-) diff --git a/src/Eloquent/Builder.php b/src/Eloquent/Builder.php index fe0fec95d..afe968e4b 100644 --- a/src/Eloquent/Builder.php +++ b/src/Eloquent/Builder.php @@ -8,6 +8,7 @@ use Illuminate\Database\Eloquent\Collection; use Illuminate\Database\Eloquent\Model; use MongoDB\BSON\Document; +use MongoDB\Builder\Type\QueryInterface; use MongoDB\Builder\Type\SearchOperatorInterface; use MongoDB\Driver\CursorInterface; use MongoDB\Driver\Exception\WriteException; @@ -101,6 +102,28 @@ public function search( return $this->model->hydrate($results->all()); } + /** + * Performs a semantic search on data in your Atlas Vector Search index. + * NOTE: $vectorSearch is only available for MongoDB Atlas clusters, and is not available for self-managed deployments. + * + * @see https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ + * + * @return Collection + */ + public function vectorSearch( + string $index, + array|string $path, + array $queryVector, + int $limit, + bool $exact = false, + QueryInterface|array $filter = [], + int|null $numCandidates = null, + ): Collection { + $results = $this->toBase()->vectorSearch($index, $path, $queryVector, $limit, $exact, $filter, $numCandidates); + + return $this->model->hydrate($results->all()); + } + /** @inheritdoc */ public function update(array $values, array $options = []) { diff --git a/src/Query/Builder.php b/src/Query/Builder.php index 0e9e028bb..06eb5ac47 100644 --- a/src/Query/Builder.php +++ b/src/Query/Builder.php @@ -25,6 +25,7 @@ use MongoDB\BSON\UTCDateTime; use MongoDB\Builder\Search; use MongoDB\Builder\Stage\FluentFactoryTrait; +use MongoDB\Builder\Type\QueryInterface; use MongoDB\Builder\Type\SearchOperatorInterface; use MongoDB\Driver\Cursor; use Override; @@ -1532,6 +1533,40 @@ public function search( return $this->aggregate()->search(...$args)->get(); } + /** + * Performs a semantic search on data in your Atlas Vector Search index. + * NOTE: $vectorSearch is only available for MongoDB Atlas clusters, and is not available for self-managed deployments. + * + * @see https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ + * + * @return Collection + */ + public function vectorSearch( + string $index, + array|string $path, + array $queryVector, + int $limit, + bool $exact = false, + QueryInterface|array|null $filter = null, + int|null $numCandidates = null, + ): Collection { + // Forward named arguments to the vectorSearch stage, skip null values + $args = array_filter([ + 'index' => $index, + 'limit' => $limit, + 'path' => $path, + 'queryVector' => $queryVector, + 'exact' => $exact, + 'filter' => $filter, + 'numCandidates' => $numCandidates, + ], fn ($arg) => $arg !== null); + + return $this->aggregate() + ->vectorSearch(...$args) + ->addFields(vectorSearchScore: ['$meta' => 'vectorSearchScore']) + ->get(); + } + /** * Performs an autocomplete search of the field using an Atlas Search index. * NOTE: $search is only available for MongoDB Atlas clusters, and is not available for self-managed deployments. diff --git a/tests/AtlasSearchTest.php b/tests/AtlasSearchTest.php index 4dc58e902..124300481 100644 --- a/tests/AtlasSearchTest.php +++ b/tests/AtlasSearchTest.php @@ -5,23 +5,31 @@ use Illuminate\Database\Eloquent\Collection as EloquentCollection; use Illuminate\Support\Collection as LaravelCollection; use Illuminate\Support\Facades\Schema; +use MongoDB\Builder\Query; use MongoDB\Builder\Search; use MongoDB\Collection as MongoDBCollection; use MongoDB\Driver\Exception\ServerException; use MongoDB\Laravel\Schema\Builder; use MongoDB\Laravel\Tests\Models\Book; +use function array_map; use function assert; +use function mt_getrandmax; +use function rand; +use function range; +use function srand; use function usleep; use function usort; class AtlasSearchTest extends TestCase { + private array $vectors; + public function setUp(): void { parent::setUp(); - Book::insert([ + Book::insert($this->addVector([ ['title' => 'Introduction to Algorithms'], ['title' => 'Clean Code: A Handbook of Agile Software Craftsmanship'], ['title' => 'Design Patterns: Elements of Reusable Object-Oriented Software'], @@ -42,7 +50,7 @@ public function setUp(): void ['title' => 'Understanding Machine Learning: From Theory to Algorithms'], ['title' => 'Deep Learning'], ['title' => 'Pattern Recognition and Machine Learning'], - ]); + ])); $collection = $this->getConnection('mongodb')->getCollection('books'); assert($collection instanceof MongoDBCollection); @@ -66,8 +74,9 @@ public function setUp(): void $collection->createSearchIndex([ 'fields' => [ - ['type' => 'vector', 'numDimensions' => 16, 'path' => 'vector16', 'similarity' => 'cosine'], + ['type' => 'vector', 'numDimensions' => 4, 'path' => 'vector4', 'similarity' => 'cosine'], ['type' => 'vector', 'numDimensions' => 32, 'path' => 'vector32', 'similarity' => 'euclidean'], + ['type' => 'filter', 'path' => 'title'], ], ], ['name' => 'vector', 'type' => 'vectorSearch']); } catch (ServerException $e) { @@ -131,7 +140,7 @@ public function testGetIndexes() ], [ 'name' => 'vector', - 'columns' => ['vector16', 'vector32'], + 'columns' => ['vector4', 'vector32', 'title'], 'type' => 'vectorSearch', 'primary' => false, 'unique' => false, @@ -180,10 +189,10 @@ public function testEloquentBuilderAutocomplete() self::assertInstanceOf(LaravelCollection::class, $results); self::assertCount(3, $results); self::assertSame([ - 'Operating System Concepts', 'Database System Concepts', 'Modern Operating Systems', - ], $results->all()); + 'Operating System Concepts', + ], $results->sort()->values()->all()); } public function testDatabaseBuilderAutocomplete() @@ -194,9 +203,62 @@ public function testDatabaseBuilderAutocomplete() self::assertInstanceOf(LaravelCollection::class, $results); self::assertCount(3, $results); self::assertSame([ - 'Operating System Concepts', 'Database System Concepts', 'Modern Operating Systems', - ], $results->all()); + 'Operating System Concepts', + ], $results->sort()->values()->all()); + } + + public function testDatabaseBuilderVectorSearch() + { + $results = $this->getConnection('mongodb')->table('books') + ->vectorSearch( + index: 'vector', + path: 'vector4', + queryVector: $this->vectors[7], // This is an exact match of the vector + limit: 4, + exact: true, + ); + + self::assertInstanceOf(LaravelCollection::class, $results); + self::assertCount(4, $results); + self::assertSame('The Art of Computer Programming', $results->first()['title']); + self::assertSame(1.0, $results->first()['vectorSearchScore']); + } + + public function testEloquentBuilderVectorSearch() + { + $results = Book::vectorSearch( + index: 'vector', + path: 'vector4', + queryVector: $this->vectors[7], + limit: 5, + numCandidates: 15, + // excludes the exact match + filter: Query::query( + title: Query::ne('The Art of Computer Programming'), + ), + ); + + self::assertInstanceOf(EloquentCollection::class, $results); + self::assertCount(5, $results); + self::assertInstanceOf(Book::class, $results->first()); + self::assertNotSame('The Art of Computer Programming', $results->first()->title); + self::assertSame('The Mythical Man-Month: Essays on Software Engineering', $results->first()->title); + self::assertThat( + $results->first()->vectorSearchScore, + self::logicalAnd(self::isType('float'), self::greaterThan(0.9), self::lessThan(1.0)), + ); + } + + /** Generate random vectors */ + private function addVector(array $items): array + { + srand(1); + foreach ($items as &$item) { + $this->vectors[] = $item['vector4'] = array_map(fn () => rand() / mt_getrandmax(), range(0, 3)); + } + + return $items; } } From a05ee75b50bbcc44ab3e05b4c1687c63e3a747a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Mon, 13 Jan 2025 12:32:31 +0100 Subject: [PATCH 2/2] Improve comment --- tests/AtlasSearchTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/AtlasSearchTest.php b/tests/AtlasSearchTest.php index 124300481..c9cd2d5e3 100644 --- a/tests/AtlasSearchTest.php +++ b/tests/AtlasSearchTest.php @@ -251,7 +251,7 @@ public function testEloquentBuilderVectorSearch() ); } - /** Generate random vectors */ + /** Generate random vectors using fixed seed to make tests deterministic */ private function addVector(array $items): array { srand(1);