Skip to content

Commit f4d9b4f

Browse files
committed
FEATURE: Indexing using FULLTEXT index in MySQL/MariaDB
1 parent 0639ac8 commit f4d9b4f

File tree

5 files changed

+756
-5
lines changed

5 files changed

+756
-5
lines changed

Classes/Domain/Service/MysqlIndex.php

+349
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
<?php
2+
declare(strict_types=1);
3+
4+
namespace Flowpack\SimpleSearch\Domain\Service;
5+
6+
use Flowpack\SimpleSearch\Exception;
7+
use Neos\Flow\Annotations as Flow;
8+
9+
/**
10+
* The MysqlIndex class provides an index using MySQL and its FULLTEXT indexing feature
11+
*/
12+
class MysqlIndex implements IndexInterface
13+
{
14+
/**
15+
* @var \PDO
16+
*/
17+
protected $connection;
18+
19+
/**
20+
* @var string
21+
*/
22+
protected $dataSourceName = '';
23+
24+
/**
25+
* @var string
26+
*/
27+
protected $username;
28+
29+
/**
30+
* @var string
31+
*/
32+
protected $password;
33+
34+
/**
35+
* @var string
36+
*/
37+
protected $pdoDriver;
38+
39+
/**
40+
* @var string
41+
*/
42+
protected $indexName;
43+
44+
/**
45+
* Index of fields created for distinct properties of the indexed object
46+
*
47+
* @var array<string>
48+
*/
49+
protected $propertyFieldsAvailable;
50+
51+
/**
52+
* @param string $indexName
53+
* @param string $dataSourceName
54+
* @Flow\Autowiring(false)
55+
*/
56+
public function __construct(string $indexName, string $dataSourceName)
57+
{
58+
$this->indexName = $indexName;
59+
$this->dataSourceName = $dataSourceName;
60+
}
61+
62+
/**
63+
* Lifecycle method
64+
*
65+
* @throws Exception
66+
*/
67+
public function initializeObject(): void
68+
{
69+
$this->connect();
70+
}
71+
72+
/**
73+
* Connect to the database
74+
*
75+
* @return void
76+
* @throws Exception if the connection cannot be established
77+
*/
78+
protected function connect(): void
79+
{
80+
if ($this->connection !== null) {
81+
return;
82+
}
83+
84+
$splitdsn = explode(':', $this->dataSourceName, 2);
85+
$this->pdoDriver = $splitdsn[0];
86+
87+
try {
88+
$this->connection = new \PDO($this->dataSourceName, $this->username, $this->password);
89+
$this->connection->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION);
90+
91+
if ($this->pdoDriver === 'mysql') {
92+
$this->connection->exec('SET SESSION sql_mode=\'ANSI\';');
93+
}
94+
} catch (\PDOException $exception) {
95+
throw new Exception(sprintf('Could not connect to index database with DSN "%s". PDO error: %s', $this->dataSourceName, $exception->getMessage()), 1576771168, $exception);
96+
}
97+
98+
$this->createIndexTables();
99+
$this->loadAvailablePropertyFields();
100+
}
101+
102+
/**
103+
* @param string $identifier identifier for the data
104+
* @param array $properties Properties to put into index
105+
* @param array $fullText array to push to fulltext index for this entry (keys are h1,h2,h3,h4,h5,h6,text) - all keys optional, results weighted by key
106+
* @return void
107+
*/
108+
public function indexData(string $identifier, array $properties, array $fullText): void
109+
{
110+
$this->connection->exec('BEGIN');
111+
$this->adjustIndexToGivenProperties(array_keys($properties));
112+
$this->insertOrUpdatePropertiesToIndex($properties, $identifier);
113+
$this->insertOrUpdateFulltextToIndex($fullText, $identifier);
114+
$this->connection->exec('COMMIT');
115+
}
116+
117+
/**
118+
* @param string $identifier
119+
* @return void
120+
*/
121+
public function removeData(string $identifier): void
122+
{
123+
$this->connection->exec('BEGIN');
124+
$statement = $this->connection->prepare('DELETE FROM "fulltext_objects" WHERE "__identifier__" = :identifier');
125+
$statement->bindValue(':identifier', $identifier);
126+
$statement->execute();
127+
$statement = $this->connection->prepare('DELETE FROM "fulltext_index" WHERE "__identifier__" = :identifier');
128+
$statement->bindValue(':identifier', $identifier);
129+
$statement->execute();
130+
$this->connection->exec('COMMIT');
131+
}
132+
133+
/**
134+
* @param array $properties
135+
* @param string $identifier
136+
* @return void
137+
*/
138+
public function insertOrUpdatePropertiesToIndex(array $properties, string $identifier): void
139+
{
140+
$propertyColumnNamesString = '"__identifier__", ';
141+
$valueNamesString = ':__identifier__, ';
142+
$statementArgumentNumber = 1;
143+
foreach ($properties as $propertyName => $propertyValue) {
144+
$propertyColumnNamesString .= '"' . $propertyName . '", ';
145+
$valueNamesString .= $this->preparedStatementArgumentName($statementArgumentNumber) . ', ';
146+
$statementArgumentNumber++;
147+
}
148+
$propertyColumnNamesString = trim($propertyColumnNamesString, ', \t\n\r\0\x0B');
149+
$valueNamesString = trim($valueNamesString, ', \t\n\r\0\x0B');
150+
$preparedStatement = $this->connection->prepare('REPLACE INTO "fulltext_objects" (' . $propertyColumnNamesString . ') VALUES (' . $valueNamesString . ')');
151+
152+
$preparedStatement->bindValue(':__identifier__', $identifier);
153+
154+
$statementArgumentNumber = 1;
155+
foreach ($properties as $propertyValue) {
156+
if (is_array($propertyValue)) {
157+
$propertyValue = implode(',', $propertyValue);
158+
}
159+
$preparedStatement->bindValue($this->preparedStatementArgumentName($statementArgumentNumber), $propertyValue);
160+
$statementArgumentNumber++;
161+
}
162+
163+
$preparedStatement->execute();
164+
}
165+
166+
/**
167+
* @param integer $argumentNumber
168+
* @return string
169+
*/
170+
protected function preparedStatementArgumentName(int $argumentNumber): string
171+
{
172+
return ':arg' . $argumentNumber;
173+
}
174+
175+
/**
176+
* @param array $fulltext
177+
* @param string $identifier
178+
*/
179+
protected function insertOrUpdateFulltextToIndex(array $fulltext, string $identifier): void
180+
{
181+
$preparedStatement = $this->connection->prepare('REPLACE INTO "fulltext_index" ("__identifier__", "h1", "h2", "h3", "h4", "h5", "h6", "text") VALUES (:identifier, :h1, :h2, :h3, :h4, :h5, :h6, :text);');
182+
$preparedStatement->bindValue(':identifier', $identifier);
183+
$this->bindFulltextParametersToStatement($preparedStatement, $fulltext);
184+
$preparedStatement->execute();
185+
}
186+
187+
/**
188+
* @param array $fulltext
189+
* @param string $identifier
190+
*/
191+
public function addToFulltext(array $fulltext, string $identifier): void
192+
{
193+
$preparedStatement = $this->connection->prepare('UPDATE IGNORE "fulltext_index" SET "h1" = CONCAT("h1", \' \', :h1), "h2" = CONCAT("h2", \' \', :h2), "h3" = CONCAT("h3", \' \', :h3), "h4" = CONCAT("h4", \' \', :h4), "h5" = CONCAT("h5", \' \', :h5), "h6" = CONCAT("h6", \' \', :h6), "text" = CONCAT("text", \' \', :text) WHERE "__identifier__" = :identifier');
194+
$preparedStatement->bindValue(':identifier', $identifier);
195+
$this->bindFulltextParametersToStatement($preparedStatement, $fulltext);
196+
$preparedStatement->execute();
197+
}
198+
199+
/**
200+
* Binds fulltext parameters to a prepared statement as this happens in multiple places.
201+
*
202+
* @param \PDOStatement $preparedStatement
203+
* @param array $fulltext array (keys are h1,h2,h3,h4,h5,h6,text) - all keys optional
204+
*/
205+
protected function bindFulltextParametersToStatement(\PDOStatement $preparedStatement, array $fulltext): void
206+
{
207+
$preparedStatement->bindValue(':h1', $fulltext['h1'] ?? '');
208+
$preparedStatement->bindValue(':h2', $fulltext['h2'] ?? '');
209+
$preparedStatement->bindValue(':h3', $fulltext['h3'] ?? '');
210+
$preparedStatement->bindValue(':h4', $fulltext['h4'] ?? '');
211+
$preparedStatement->bindValue(':h5', $fulltext['h5'] ?? '');
212+
$preparedStatement->bindValue(':h6', $fulltext['h6'] ?? '');
213+
$preparedStatement->bindValue(':text', $fulltext['text'] ?? '');
214+
}
215+
216+
/**
217+
* Returns an index entry by identifier or NULL if it doesn't exist.
218+
*
219+
* @param string $identifier
220+
* @return array|FALSE
221+
*/
222+
public function findOneByIdentifier(string $identifier)
223+
{
224+
$statement = $this->connection->prepare('SELECT * FROM "fulltext_objects" WHERE "__identifier__" = :identifier LIMIT 1');
225+
$statement->bindValue(':identifier', $identifier);
226+
227+
if ($statement->execute()) {
228+
return $statement->fetch(\PDO::FETCH_ASSOC);
229+
}
230+
231+
return false;
232+
}
233+
234+
/**
235+
* Execute a prepared statement.
236+
*
237+
* @param string $statementQuery The statement query
238+
* @param array $parameters The statement parameters as map
239+
* @return array
240+
*/
241+
public function executeStatement(string $statementQuery, array $parameters): array
242+
{
243+
$statement = $this->connection->prepare($statementQuery);
244+
foreach ($parameters as $parameterName => $parameterValue) {
245+
$statement->bindValue($parameterName, $parameterValue);
246+
}
247+
248+
if ($statement->execute()) {
249+
return $statement->fetchAll(\PDO::FETCH_ASSOC);
250+
}
251+
252+
return [];
253+
}
254+
255+
/**
256+
* @return string
257+
*/
258+
public function getIndexName(): string
259+
{
260+
return $this->indexName;
261+
}
262+
263+
/**
264+
* completely empties the index.
265+
*/
266+
public function flush(): void
267+
{
268+
$this->connection->exec('DROP TABLE "fulltext_objects"');
269+
$this->connection->exec('DROP TABLE "fulltext_index"');
270+
$this->createIndexTables();
271+
}
272+
273+
/**
274+
* Optimize the sqlite database.
275+
*/
276+
public function optimize(): void
277+
{
278+
}
279+
280+
/**
281+
* @return void
282+
*/
283+
protected function createIndexTables(): void
284+
{
285+
$result = $this->connection->query('SHOW TABLES');
286+
$tables = $result->fetchAll(\PDO::FETCH_COLUMN);
287+
288+
if (!in_array('fulltext_objects', $tables, true)) {
289+
$this->connection->exec('CREATE TABLE "fulltext_objects" (
290+
"__identifier__" VARCHAR(40),
291+
PRIMARY KEY ("__identifier__")
292+
) DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci ENGINE = InnoDB');
293+
$this->propertyFieldsAvailable = [];
294+
}
295+
296+
if (!in_array('fulltext_index', $tables, true)) {
297+
$this->connection->exec('CREATE TABLE "fulltext_index" (
298+
"__identifier__" VARCHAR(40),
299+
"h1" MEDIUMTEXT,
300+
"h2" MEDIUMTEXT,
301+
"h3" MEDIUMTEXT,
302+
"h4" MEDIUMTEXT,
303+
"h5" MEDIUMTEXT,
304+
"h6" MEDIUMTEXT,
305+
"text" MEDIUMTEXT,
306+
PRIMARY KEY ("__identifier__"),
307+
FULLTEXT nodeindex ("h1",
308+
"h2",
309+
"h3",
310+
"h4",
311+
"h5",
312+
"h6",
313+
"text")
314+
) DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci ENGINE = InnoDB'
315+
);
316+
}
317+
}
318+
319+
/**
320+
* @return void
321+
*/
322+
protected function loadAvailablePropertyFields(): void
323+
{
324+
$result = $this->connection->query('DESCRIBE fulltext_objects');
325+
$this->propertyFieldsAvailable = $result->fetchAll(\PDO::FETCH_COLUMN);
326+
}
327+
328+
/**
329+
* @param string $propertyName
330+
*/
331+
protected function addPropertyToIndex(string $propertyName): void
332+
{
333+
$this->connection->exec('ALTER TABLE "fulltext_objects" ADD COLUMN "' . $propertyName . '" MEDIUMTEXT DEFAULT NULL');
334+
$this->propertyFieldsAvailable[] = $propertyName;
335+
}
336+
337+
/**
338+
* @param array $propertyNames
339+
* @return void
340+
*/
341+
protected function adjustIndexToGivenProperties(array $propertyNames): void
342+
{
343+
foreach ($propertyNames as $propertyName) {
344+
if (!in_array($propertyName, $this->propertyFieldsAvailable, true)) {
345+
$this->addPropertyToIndex($propertyName);
346+
}
347+
}
348+
}
349+
}

0 commit comments

Comments
 (0)