Skip to content

Commit e008593

Browse files
Issue #2926733 by drunken monkey, borisson_: Fixed indexing of leading/trailing whitespace in fulltext tokens on database backend.
1 parent 87cb223 commit e008593

File tree

3 files changed

+44
-0
lines changed

3 files changed

+44
-0
lines changed

Diff for: CHANGELOG.txt

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
Search API 1.x, dev (xxxx-xx-xx):
22
---------------------------------
3+
- #2926733 by drunken monkey, borisson_: Fixed indexing of leading/trailing
4+
whitespace in fulltext tokens on database backend.
35
- #2922024 by drunken monkey, borisson_: Fixed Stemmer incorrectly processing
46
non-English searches.
57
- #2931730 by drunken monkey: Adapted tests to changes in drupal_set_message().

Diff for: modules/search_api_db/src/Plugin/search_api/backend/Database.php

+6
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,12 @@ protected function indexItem(IndexInterface $index, ItemInterface $item) {
12341234
$word = $token->getText();
12351235
$score = $token->getBoost();
12361236

1237+
// In rare cases, tokens with leading or trailing whitespace can
1238+
// slip through. Since this can lead to errors when such tokens are
1239+
// part of a primary key (as in this case), we trim such whitespace
1240+
// here.
1241+
$word = trim($word);
1242+
12371243
// Store the first 30 characters of the string as the denormalized
12381244
// value.
12391245
if (Unicode::strlen($denormalized_value) < 30) {

Diff for: modules/search_api_db/tests/src/Kernel/BackendTest.php

+36
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@
44

55
use Drupal\Component\Render\FormattableMarkup;
66
use Drupal\search_api\Entity\Server;
7+
use Drupal\search_api\Plugin\search_api\data_type\value\TextToken;
8+
use Drupal\search_api\Plugin\search_api\data_type\value\TextValue;
79
use Drupal\search_api\Query\QueryInterface;
810
use Drupal\search_api\SearchApiException;
11+
use Drupal\search_api\Utility\Utility;
912
use Drupal\search_api_db\Plugin\search_api\backend\Database;
1013
use Drupal\search_api_db\Tests\DatabaseTestsTrait;
1114
use Drupal\Tests\search_api\Kernel\BackendTestBase;
@@ -80,6 +83,7 @@ protected function backendSpecificRegressionTests() {
8083
$this->regressionTest2557291();
8184
$this->regressionTest2511860();
8285
$this->regressionTest2846932();
86+
$this->regressionTest2926733();
8387
}
8488

8589
/**
@@ -476,6 +480,38 @@ protected function regressionTest2846932() {
476480
$index->save();
477481
}
478482

483+
/**
484+
* Tests indexing of text tokens with leading/trailing whitespace.
485+
*
486+
* @see https://www.drupal.org/node/2926733
487+
*/
488+
protected function regressionTest2926733() {
489+
$index = $this->getIndex();
490+
$item_id = $this->getItemIds([1])[0];
491+
$fields_helper = \Drupal::getContainer()
492+
->get('search_api.fields_helper');
493+
$item = $fields_helper->createItem($index, $item_id);
494+
$field = clone $index->getField('body');
495+
$value = new TextValue('test');
496+
$tokens = [];
497+
foreach (['test', ' test', ' test', 'test ', ' test '] as $token) {
498+
$tokens[] = new TextToken($token);
499+
}
500+
$value->setTokens($tokens);
501+
$field->setValues([$value]);
502+
$item->setFields([
503+
'body' => $field,
504+
]);
505+
$item->setFieldsExtracted(TRUE);
506+
$index->getServerInstance()->indexItems($index, [$item_id => $item]);
507+
508+
// Make sure to re-index the proper version of the item to avoid confusing
509+
// the other tests.
510+
list($datasource_id, $raw_id) = Utility::splitCombinedId($item_id);
511+
$index->trackItemsUpdated($datasource_id, [$raw_id]);
512+
$this->indexItems($index->id());
513+
}
514+
479515
/**
480516
* {@inheritdoc}
481517
*/

0 commit comments

Comments
 (0)