Skip to content

Commit 36e8d9a

Browse files
aamCommit Queue
authored and
Commit Queue
committed
[vm/compare] Speed up internal one/two-byte strings comparisons.
Ensure padding around data and length fields for these strings is zeroed-out, allowing for comparison to be done in word-sized chunks. For configurations where the hash is not in the header, swap the length and the hash in string class so that the length is adjacent to the data. x64 === LongStringCompare.2.3000reps 109.2% LongStringCompare.32.1000reps 526.4% LongStringCompare.1024.30reps 670.1% === ia32 === LongStringCompare.2.3000reps 90.68% LongStringCompare.32.1000reps 274.5% LongStringCompare.1024.30reps 211.4% === arm64 === LongStringCompare.2.3000reps 89.28% LongStringCompare.32.1000reps 454.5% LongStringCompare.1024.30reps 677.7% === arm === LongStringCompare.2.3000reps 68.76% LongStringCompare.32.1000reps 330.9% LongStringCompare.1024.30reps 426.8% === BUG=#50190 TEST=string_equals_test Change-Id: Ia4ab9bcb4daca5d4f403e0ece364bb6aafb68577 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/269600 Commit-Queue: Alexander Aprelev <[email protected]> Reviewed-by: Ryan Macnak <[email protected]>
1 parent fc4d9c6 commit 36e8d9a

13 files changed

+359
-128
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
//
5+
// Verifies that string equal implementation correctly handles strings of
6+
// various lengths.
7+
8+
import 'dart:math';
9+
10+
import "package:expect/expect.dart";
11+
12+
compare(List<int> ints, String s_piece, String t_piece, bool expectedEquality) {
13+
final s = String.fromCharCodes(ints);
14+
String s_mid = s + s_piece + s;
15+
String t_mid = s + t_piece + s;
16+
Expect.isFalse(identical(s_mid, t_mid));
17+
Expect.equals(s_mid == t_mid, expectedEquality);
18+
String s_tail = s + s_piece;
19+
String t_tail = s + t_piece;
20+
Expect.isFalse(identical(s_tail, t_tail));
21+
Expect.equals(s_tail == t_tail, expectedEquality);
22+
String s_head = s_piece + s;
23+
String t_head = t_piece + s;
24+
Expect.isFalse(identical(s_head, t_head));
25+
Expect.equals(s_head == t_head, expectedEquality);
26+
}
27+
28+
main() {
29+
const int maxStringLength = 128;
30+
// OneByteString
31+
for (int i = 0; i < maxStringLength; i++) {
32+
final l = List.generate(i, (n) => (Random().nextInt(30) + 40));
33+
compare(l, 'a', 'b', false);
34+
compare(l, 'a', 'a', true);
35+
}
36+
// TwoByteString
37+
for (int i = 0; i < maxStringLength; i++) {
38+
final l = List.generate(i, (n) => (Random().nextInt(1024) + 1024));
39+
compare(l, String.fromCharCodes(<int>[1042]),
40+
String.fromCharCodes(<int>[1043]), false);
41+
compare(l, String.fromCharCodes(<int>[1042]),
42+
String.fromCharCodes(<int>[1042]), true);
43+
}
44+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
//
5+
// @dart = 2.9
6+
//
7+
// Verifies that string equal implementation correctly handles strings of
8+
// various lengths.
9+
10+
import 'dart:math';
11+
12+
import "package:expect/expect.dart";
13+
14+
compare(List<int> ints, String s_piece, String t_piece, bool expectedEquality) {
15+
final s = String.fromCharCodes(ints);
16+
String s_mid = s + s_piece + s;
17+
String t_mid = s + t_piece + s;
18+
Expect.isFalse(identical(s_mid, t_mid));
19+
Expect.equals(s_mid == t_mid, expectedEquality);
20+
String s_tail = s + s_piece;
21+
String t_tail = s + t_piece;
22+
Expect.isFalse(identical(s_tail, t_tail));
23+
Expect.equals(s_tail == t_tail, expectedEquality);
24+
String s_head = s_piece + s;
25+
String t_head = t_piece + s;
26+
Expect.isFalse(identical(s_head, t_head));
27+
Expect.equals(s_head == t_head, expectedEquality);
28+
}
29+
30+
main() {
31+
const int maxStringLength = 128;
32+
// OneByteString
33+
for (int i = 0; i < maxStringLength; i++) {
34+
final l = List.generate(i, (n) => (Random().nextInt(30) + 40));
35+
compare(l, 'a', 'b', false);
36+
compare(l, 'a', 'a', true);
37+
}
38+
// TwoByteString
39+
for (int i = 0; i < maxStringLength; i++) {
40+
final l = List.generate(i, (n) => (Random().nextInt(1024) + 1024));
41+
compare(l, String.fromCharCodes(<int>[1042]),
42+
String.fromCharCodes(<int>[1043]), false);
43+
compare(l, String.fromCharCodes(<int>[1042]),
44+
String.fromCharCodes(<int>[1042]), true);
45+
}
46+
}

runtime/vm/app_snapshot.cc

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5977,16 +5977,41 @@ class StringDeserializationCluster
59775977
const intptr_t encoded = d.ReadUnsigned();
59785978
intptr_t cid = 0;
59795979
const intptr_t length = DecodeLengthAndCid(encoded, &cid);
5980-
Deserializer::InitializeHeader(str, cid, InstanceSize(length, cid),
5980+
const intptr_t instance_size = InstanceSize(length, cid);
5981+
// Clean up last two words of the string object to simplify future
5982+
// string comparisons.
5983+
// Objects are rounded up to two-word size boundary.
5984+
*reinterpret_cast<word*>(reinterpret_cast<uint8_t*>(str->untag()) +
5985+
instance_size - 1 * kWordSize) = 0;
5986+
*reinterpret_cast<word*>(reinterpret_cast<uint8_t*>(str->untag()) +
5987+
instance_size - 2 * kWordSize) = 0;
5988+
Deserializer::InitializeHeader(str, cid, instance_size,
59815989
primary && is_canonical());
5990+
#if DART_COMPRESSED_POINTERS
5991+
// Gap caused by less-than-a-word length_ smi sitting before data_.
5992+
const intptr_t length_offset =
5993+
reinterpret_cast<intptr_t>(&str->untag()->length_);
5994+
const intptr_t data_offset =
5995+
cid == kOneByteStringCid
5996+
? reinterpret_cast<intptr_t>(
5997+
static_cast<OneByteStringPtr>(str)->untag()->data())
5998+
: reinterpret_cast<intptr_t>(
5999+
static_cast<TwoByteStringPtr>(str)->untag()->data());
6000+
const intptr_t length_with_gap = data_offset - length_offset;
6001+
ASSERT(length_with_gap > kCompressedWordSize);
6002+
ASSERT(length_with_gap == kWordSize);
6003+
memset(reinterpret_cast<void*>(length_offset), 0, length_with_gap);
6004+
#endif
59826005
str->untag()->length_ = Smi::New(length);
6006+
59836007
StringHasher hasher;
59846008
if (cid == kOneByteStringCid) {
59856009
for (intptr_t j = 0; j < length; j++) {
59866010
uint8_t code_unit = d.Read<uint8_t>();
59876011
static_cast<OneByteStringPtr>(str)->untag()->data()[j] = code_unit;
59886012
hasher.Add(code_unit);
59896013
}
6014+
59906015
} else {
59916016
for (intptr_t j = 0; j < length; j++) {
59926017
uint16_t code_unit = d.Read<uint8_t>();

runtime/vm/compiler/asm_intrinsifier_arm.cc

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1572,6 +1572,12 @@ static void TryAllocateString(Assembler* assembler,
15721572
// next object start and initialize the object.
15731573
__ str(R1, Address(THR, target::Thread::top_offset()));
15741574
__ AddImmediate(R0, kHeapObjectTag);
1575+
// Clear last double word to ensure string comparison doesn't need to
1576+
// specially handle remainder of strings with lengths not factors of double
1577+
// offsets.
1578+
__ LoadImmediate(TMP, 0);
1579+
__ str(TMP, Address(R1, -1 * target::kWordSize));
1580+
__ str(TMP, Address(R1, -2 * target::kWordSize));
15751581

15761582
// Initialize the tags.
15771583
// R0: new object start as a tagged pointer.
@@ -1726,7 +1732,7 @@ static void StringEquality(Assembler* assembler,
17261732
__ cmp(R0, Operand(R1));
17271733
__ b(&is_true, EQ);
17281734

1729-
// Is other OneByteString?
1735+
// Is other same kind of string?
17301736
__ tst(R1, Operand(kSmiTagMask));
17311737
__ b(normal_ir_body, EQ);
17321738
__ CompareClassId(R1, string_cid, R2);
@@ -1742,29 +1748,30 @@ static void StringEquality(Assembler* assembler,
17421748
// TODO(zra): try out other sequences.
17431749
ASSERT((string_cid == kOneByteStringCid) ||
17441750
(string_cid == kTwoByteStringCid));
1745-
const intptr_t offset = (string_cid == kOneByteStringCid)
1746-
? target::OneByteString::data_offset()
1747-
: target::TwoByteString::data_offset();
1748-
__ AddImmediate(R0, offset - kHeapObjectTag);
1749-
__ AddImmediate(R1, offset - kHeapObjectTag);
1750-
__ SmiUntag(R2);
1751+
if (string_cid == kOneByteStringCid) {
1752+
__ SmiUntag(R2);
1753+
}
1754+
// R2 is length of data in bytes.
1755+
// Round up number of bytes to compare to word boundary since we
1756+
// are doing comparison in word chunks.
1757+
__ AddImmediate(R2, target::kWordSize - 1);
1758+
__ LsrImmediate(R2, R2, target::kWordSizeLog2);
1759+
ASSERT(target::OneByteString::data_offset() ==
1760+
target::String::length_offset() + target::kWordSize);
1761+
ASSERT(target::TwoByteString::data_offset() ==
1762+
target::String::length_offset() + target::kWordSize);
1763+
COMPILE_ASSERT(target::kWordSize == 4);
1764+
__ AddImmediate(
1765+
R0, target::String::length_offset() + target::kWordSize - kHeapObjectTag);
1766+
__ AddImmediate(
1767+
R1, target::String::length_offset() + target::kWordSize - kHeapObjectTag);
1768+
17511769
__ Bind(&loop);
17521770
__ AddImmediate(R2, -1);
17531771
__ cmp(R2, Operand(0));
17541772
__ b(&is_true, LT);
1755-
if (string_cid == kOneByteStringCid) {
1756-
__ ldrb(R3, Address(R0));
1757-
__ ldrb(R4, Address(R1));
1758-
__ AddImmediate(R0, 1);
1759-
__ AddImmediate(R1, 1);
1760-
} else if (string_cid == kTwoByteStringCid) {
1761-
__ ldrh(R3, Address(R0));
1762-
__ ldrh(R4, Address(R1));
1763-
__ AddImmediate(R0, 2);
1764-
__ AddImmediate(R1, 2);
1765-
} else {
1766-
UNIMPLEMENTED();
1767-
}
1773+
__ ldr(R3, Address(R0, 4, Address::PostIndex));
1774+
__ ldr(R4, Address(R1, 4, Address::PostIndex));
17681775
__ cmp(R3, Operand(R4));
17691776
__ b(&is_false, NE);
17701777
__ b(&loop);

runtime/vm/compiler/asm_intrinsifier_arm64.cc

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1789,6 +1789,10 @@ static void TryAllocateString(Assembler* assembler,
17891789
// next object start and initialize the object.
17901790
__ str(R1, Address(THR, target::Thread::top_offset()));
17911791
__ AddImmediate(R0, kHeapObjectTag);
1792+
// Clear last double word to ensure string comparison doesn't need to
1793+
// specially handle remainder of strings with lengths not factors of double
1794+
// offsets.
1795+
__ stp(ZR, ZR, Address(R1, -2 * target::kWordSize, Address::PairOffset));
17921796

17931797
// Initialize the tags.
17941798
// R0: new object start as a tagged pointer.
@@ -1812,6 +1816,10 @@ static void TryAllocateString(Assembler* assembler,
18121816
__ str(R2, FieldAddress(R0, target::Object::tags_offset())); // Store tags.
18131817
}
18141818

1819+
#if DART_COMPRESSED_POINTERS
1820+
// Clear out padding caused by alignment gap between length and data.
1821+
__ str(ZR, FieldAddress(R0, target::String::length_offset()));
1822+
#endif
18151823
// Set the length field using the saved length (R6).
18161824
__ StoreCompressedIntoObjectNoBarrier(
18171825
R0, FieldAddress(R0, target::String::length_offset()), R6);
@@ -1953,44 +1961,43 @@ static void StringEquality(Assembler* assembler,
19531961
__ CompareObjectRegisters(R0, R1);
19541962
__ b(&is_true, EQ);
19551963

1956-
// Is other OneByteString?
1964+
// Is other same kind of string?
19571965
__ BranchIfSmi(R1, normal_ir_body);
19581966
__ CompareClassId(R1, string_cid);
19591967
__ b(normal_ir_body, NE);
19601968

19611969
// Have same length?
1962-
__ LoadCompressedSmi(R2, FieldAddress(R0, target::String::length_offset()));
1963-
__ LoadCompressedSmi(R3, FieldAddress(R1, target::String::length_offset()));
1964-
__ CompareObjectRegisters(R2, R3);
1970+
__ ldr(R2, FieldAddress(R0, target::String::length_offset()));
1971+
__ ldr(R3, FieldAddress(R1, target::String::length_offset()));
1972+
__ CompareRegisters(R2, R3);
19651973
__ b(&is_false, NE);
19661974

1967-
// Check contents, no fall-through possible.
1968-
// TODO(zra): try out other sequences.
19691975
ASSERT((string_cid == kOneByteStringCid) ||
19701976
(string_cid == kTwoByteStringCid));
1971-
const intptr_t offset = (string_cid == kOneByteStringCid)
1972-
? target::OneByteString::data_offset()
1973-
: target::TwoByteString::data_offset();
1974-
__ AddImmediate(R0, offset - kHeapObjectTag);
1975-
__ AddImmediate(R1, offset - kHeapObjectTag);
1976-
__ SmiUntag(R2);
1977+
if (string_cid == kOneByteStringCid) {
1978+
__ SmiUntag(R2);
1979+
}
1980+
// R2 is length of data in bytes.
1981+
// Round up number of bytes to compare to word boundary since we
1982+
// are doing comparison in word chunks.
1983+
__ AddImmediate(R2, target::kWordSize - 1);
1984+
__ LsrImmediate(R2, R2, target::kWordSizeLog2);
1985+
ASSERT(target::OneByteString::data_offset() ==
1986+
target::String::length_offset() + target::kWordSize);
1987+
ASSERT(target::TwoByteString::data_offset() ==
1988+
target::String::length_offset() + target::kWordSize);
1989+
COMPILE_ASSERT(target::kWordSize == 8);
1990+
__ AddImmediate(
1991+
R0, target::String::length_offset() + target::kWordSize - kHeapObjectTag);
1992+
__ AddImmediate(
1993+
R1, target::String::length_offset() + target::kWordSize - kHeapObjectTag);
1994+
19771995
__ Bind(&loop);
19781996
__ AddImmediate(R2, -1);
19791997
__ CompareRegisters(R2, ZR);
19801998
__ b(&is_true, LT);
1981-
if (string_cid == kOneByteStringCid) {
1982-
__ ldr(R3, Address(R0), kUnsignedByte);
1983-
__ ldr(R4, Address(R1), kUnsignedByte);
1984-
__ AddImmediate(R0, 1);
1985-
__ AddImmediate(R1, 1);
1986-
} else if (string_cid == kTwoByteStringCid) {
1987-
__ ldr(R3, Address(R0), kUnsignedTwoBytes);
1988-
__ ldr(R4, Address(R1), kUnsignedTwoBytes);
1989-
__ AddImmediate(R0, 2);
1990-
__ AddImmediate(R1, 2);
1991-
} else {
1992-
UNIMPLEMENTED();
1993-
}
1999+
__ ldr(R3, Address(R0, 8, Address::PostIndex));
2000+
__ ldr(R4, Address(R1, 8, Address::PostIndex));
19942001
__ cmp(R3, Operand(R4));
19952002
__ b(&is_false, NE);
19962003
__ b(&loop);

runtime/vm/compiler/asm_intrinsifier_ia32.cc

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1627,7 +1627,12 @@ static void TryAllocateString(Assembler* assembler,
16271627
// next object start and initialize the object.
16281628
__ movl(Address(THR, target::Thread::top_offset()), EBX);
16291629
__ addl(EAX, Immediate(kHeapObjectTag));
1630-
1630+
// Clear last double word to ensure string comparison doesn't need to
1631+
// specially handle remainder of strings with lengths not factors of double
1632+
// offsets.
1633+
ASSERT(target::kWordSize == 4);
1634+
__ movl(Address(EBX, -1 * target::kWordSize), Immediate(0));
1635+
__ movl(Address(EBX, -2 * target::kWordSize), Immediate(0));
16311636
// Initialize the tags.
16321637
// EAX: new object start as a tagged pointer.
16331638
// EBX: new object end address.
@@ -1777,7 +1782,7 @@ static void StringEquality(Assembler* assembler,
17771782
__ cmpl(EAX, EBX);
17781783
__ j(EQUAL, &is_true, Assembler::kNearJump);
17791784

1780-
// Is other OneByteString?
1785+
// Is other same kind of string?
17811786
__ testl(EBX, Immediate(kSmiTagMask));
17821787
__ j(ZERO, &is_false); // Smi
17831788
__ CompareClassId(EBX, string_cid, EDI);
@@ -1788,27 +1793,28 @@ static void StringEquality(Assembler* assembler,
17881793
__ cmpl(EDI, FieldAddress(EBX, target::String::length_offset()));
17891794
__ j(NOT_EQUAL, &is_false, Assembler::kNearJump);
17901795

1791-
// Check contents, no fall-through possible.
1792-
// TODO(srdjan): write a faster check.
1793-
__ SmiUntag(EDI);
1796+
if (string_cid == kOneByteStringCid) {
1797+
__ SmiUntag(EDI);
1798+
}
1799+
1800+
// Round up number of bytes to compare to word boundary since we
1801+
// are doing comparison in word chunks.
1802+
__ addl(EDI, Immediate(target::kWordSize - 1));
1803+
__ sarl(EDI, Immediate(target::kWordSizeLog2));
17941804
__ Bind(&loop);
17951805
__ decl(EDI);
1796-
__ cmpl(EDI, Immediate(0));
17971806
__ j(LESS, &is_true, Assembler::kNearJump);
1798-
if (string_cid == kOneByteStringCid) {
1799-
__ movzxb(ECX, FieldAddress(EAX, EDI, TIMES_1,
1800-
target::OneByteString::data_offset()));
1801-
__ movzxb(EDX, FieldAddress(EBX, EDI, TIMES_1,
1802-
target::OneByteString::data_offset()));
1803-
} else if (string_cid == kTwoByteStringCid) {
1804-
__ movzxw(ECX, FieldAddress(EAX, EDI, TIMES_2,
1805-
target::TwoByteString::data_offset()));
1806-
__ movzxw(EDX, FieldAddress(EBX, EDI, TIMES_2,
1807-
target::TwoByteString::data_offset()));
1808-
} else {
1809-
UNIMPLEMENTED();
1810-
}
1811-
__ cmpl(ECX, EDX);
1807+
ASSERT(target::OneByteString::data_offset() ==
1808+
target::String::length_offset() + target::kWordSize);
1809+
ASSERT(target::TwoByteString::data_offset() ==
1810+
target::String::length_offset() + target::kWordSize);
1811+
COMPILE_ASSERT(target::kWordSize == 4);
1812+
__ movl(ECX, FieldAddress(EAX, EDI, TIMES_4,
1813+
target::String::length_offset() +
1814+
target::kWordSize)); // word with length itself
1815+
__ cmpl(ECX, FieldAddress(EBX, EDI, TIMES_4,
1816+
target::String::length_offset() +
1817+
target::kWordSize)); // word with length itself
18121818
__ j(NOT_EQUAL, &is_false, Assembler::kNearJump);
18131819
__ jmp(&loop, Assembler::kNearJump);
18141820

0 commit comments

Comments
 (0)