Skip to content

Commit c4a0f49

Browse files
committed
Add ARRAY_UNIQUE_IDENTICAL option
1 parent 9e6588b commit c4a0f49

16 files changed

+943
-2
lines changed

Diff for: Zend/zend_portability.h

+53
Original file line numberDiff line numberDiff line change
@@ -701,4 +701,57 @@ extern "C++" {
701701
# define ZEND_INDIRECT_RETURN
702702
#endif
703703

704+
// bswap compiler checks copied from https://github.com/google/cityhash/blob/8af9b8c2b889d80c22d6bc26ba0df1afb79a30db/src/city.cc#L50
705+
//
706+
// Copyright (c) 2011 Google, Inc.
707+
//
708+
// Permission is hereby granted, free of charge, to any person obtaining a copy
709+
// of this software and associated documentation files (the "Software"), to deal
710+
// in the Software without restriction, including without limitation the rights
711+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
712+
// copies of the Software, and to permit persons to whom the Software is
713+
// furnished to do so, subject to the following conditions:
714+
//
715+
// The above copyright notice and this permission notice shall be included in
716+
// all copies or substantial portions of the Software.
717+
//
718+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
719+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
720+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
721+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
722+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
723+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
724+
// THE SOFTWARE.
725+
#ifdef _MSC_VER
726+
# include <stdlib.h>
727+
# define ZEND_BSWAP_32(x) _byteswap_ulong(x)
728+
# define ZEND_BSWAP_64(x) _byteswap_uint64(x)
729+
#elif defined(__APPLE__)
730+
// Mac OS X / Darwin features
731+
# include <libkern/OSByteOrder.h>
732+
# define ZEND_BSWAP_32(x) OSSwapInt32(x)
733+
# define ZEND_BSWAP_64(x) OSSwapInt64(x)
734+
#elif defined(__sun) || defined(sun)
735+
# include <sys/byteorder.h>
736+
# define ZEND_BSWAP_32(x) BSWAP_32(x)
737+
# define ZEND_BSWAP_64(x) BSWAP_64(x)
738+
#elif defined(__FreeBSD__)
739+
# include <sys/endian.h>
740+
# define ZEND_BSWAP_32(x) bswap32(x)
741+
# define ZEND_BSWAP_64(x) bswap64(x)
742+
#elif defined(__OpenBSD__)
743+
# include <sys/types.h>
744+
# define ZEND_BSWAP_32(x) swap32(x)
745+
# define ZEND_BSWAP_64(x) swap64(x)
746+
#elif defined(__NetBSD__)
747+
# include <sys/types.h>
748+
# include <machine/bswap.h>
749+
# define ZEND_BSWAP_32(x) bswap32(x)
750+
# define ZEND_BSWAP_64(x) bswap64(x)
751+
#else
752+
# include <byteswap.h>
753+
# define ZEND_BSWAP_32(x) bswap_32(x)
754+
# define ZEND_BSWAP_64(x) bswap_64(x)
755+
#endif
756+
704757
#endif /* ZEND_PORTABILITY_H */

Diff for: Zend/zend_stricthash.c

+185
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
/*
2+
Copyright (c) 2021 Tyson Andre
3+
4+
All rights reserved.
5+
6+
Redistribution and use in source and binary forms, with or without
7+
modification, are permitted provided that the following conditions are met:
8+
9+
- Redistributions of source code must retain the above copyright notice, this
10+
list of conditions and the following disclaimer.
11+
12+
- Redistributions in binary form must reproduce the above copyright notice,
13+
this list of conditions and the following disclaimer in the documentation
14+
and/or other materials provided with the distribution.
15+
16+
- Neither the name of the 'teds' nor the names of its contributors may
17+
be used to endorse or promote products derived from this software without
18+
specific prior written permission.
19+
20+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30+
THE POSSIBILITY OF SUCH DAMAGE.
31+
*/
32+
33+
#include "php.h"
34+
#include "php_ini.h"
35+
36+
#include "Zend/zend_strictmap.h"
37+
38+
#define ZEND_STRICTHASH_HASH_NULL 8310
39+
#define ZEND_STRICTHASH_HASH_FALSE 8311
40+
#define ZEND_STRICTHASH_HASH_TRUE 8312
41+
#define ZEND_STRICTHASH_HASH_EMPTY_ARRAY 8313
42+
/*
43+
* See https://en.wikipedia.org/wiki/NaN
44+
* For nan, the 12 most significant bits are:
45+
* - 1 sign bit (0 or 1)
46+
* - 11 sign bits
47+
* (and at least one of the significand bits must be non-zero)
48+
*
49+
* Here, 0xff is the most significant byte with the sign and part of the exponent,
50+
* and 0xf8 is the second most significant byte with part of the exponent and significand.
51+
*
52+
* Return an arbitrary choice of 0xff f_, with bytes in the reverse order.
53+
*/
54+
#define ZEND_STRICTHASH_HASH_NAN 0xf8ff
55+
56+
#define ZEND_STRICTHASH_HASH_OFFSET_DOUBLE 8315
57+
#define ZEND_STRICTHASH_HASH_OFFSET_OBJECT 31415926
58+
#define ZEND_STRICTHASH_HASH_OFFSET_RESOURCE 27182818
59+
60+
typedef struct _array_rec_prot_node {
61+
const zend_array *ht;
62+
const struct _array_rec_prot_node *prev;
63+
} array_rec_prot_node;
64+
65+
static zend_long zend_stricthash_array(HashTable *const ht, const array_rec_prot_node *const node);
66+
static uint64_t zend_convert_double_to_uint64_t(double value);
67+
static zend_always_inline zend_long zend_stricthash_inner(zval *value, array_rec_prot_node *node);
68+
69+
static zend_always_inline uint64_t zend_inline_hash_of_uint64(uint64_t orig) {
70+
/* Copied from code written for igbinary. Works best when data that frequently
71+
* differs is in the least significant bits of data. */
72+
uint64_t data = orig * 0x5e2d58d8b3bce8d9;
73+
return ZEND_BSWAP_64(data);
74+
}
75+
76+
zend_long zend_stricthash_hash(zval *value) {
77+
uint64_t raw_data = zend_stricthash_inner(value, NULL);
78+
return zend_inline_hash_of_uint64(raw_data);
79+
}
80+
81+
static zend_always_inline zend_long zend_stricthash_inner(zval *value, array_rec_prot_node *node) {
82+
again:
83+
switch (Z_TYPE_P(value)) {
84+
case IS_NULL:
85+
return ZEND_STRICTHASH_HASH_NULL;
86+
case IS_FALSE:
87+
return ZEND_STRICTHASH_HASH_FALSE;
88+
case IS_TRUE:
89+
return ZEND_STRICTHASH_HASH_TRUE;
90+
case IS_LONG:
91+
return Z_LVAL_P(value);
92+
case IS_DOUBLE:
93+
return zend_convert_double_to_uint64_t(Z_DVAL_P(value)) + ZEND_STRICTHASH_HASH_OFFSET_DOUBLE;
94+
case IS_STRING:
95+
return ZSTR_HASH(Z_STR_P(value));
96+
case IS_ARRAY:
97+
return zend_stricthash_array(Z_ARR_P(value), node);
98+
case IS_OBJECT:
99+
return Z_OBJ_HANDLE_P(value) + ZEND_STRICTHASH_HASH_OFFSET_OBJECT;
100+
case IS_RESOURCE:
101+
return Z_RES_HANDLE_P(value) + ZEND_STRICTHASH_HASH_OFFSET_RESOURCE;
102+
case IS_REFERENCE:
103+
value = Z_REFVAL_P(value);
104+
goto again;
105+
case IS_INDIRECT:
106+
value = Z_INDIRECT_P(value);
107+
goto again;
108+
EMPTY_SWITCH_DEFAULT_CASE();
109+
}
110+
}
111+
112+
inline static uint64_t zend_convert_double_to_uint64_t(double value) {
113+
if (value == 0) {
114+
/* Signed positive and negative 0 have different bits. However, $signedZero === $signedNegativeZero in php and many other languages. */
115+
return 0;
116+
}
117+
if (UNEXPECTED(isnan(value))) {
118+
return ZEND_STRICTHASH_HASH_NAN;
119+
}
120+
uint8_t *data = (uint8_t *)&value;
121+
#ifndef WORDS_BIGENDIAN
122+
return
123+
(((uint64_t)data[0]) << 56) |
124+
(((uint64_t)data[1]) << 48) |
125+
(((uint64_t)data[2]) << 40) |
126+
(((uint64_t)data[3]) << 32) |
127+
(((uint64_t)data[4]) << 24) |
128+
(((uint64_t)data[5]) << 16) |
129+
(((uint64_t)data[6]) << 8) |
130+
(((uint64_t)data[7]));
131+
#else
132+
return
133+
(((uint64_t)data[7]) << 56) |
134+
(((uint64_t)data[6]) << 48) |
135+
(((uint64_t)data[5]) << 40) |
136+
(((uint64_t)data[4]) << 32) |
137+
(((uint64_t)data[3]) << 24) |
138+
(((uint64_t)data[2]) << 16) |
139+
(((uint64_t)data[1]) << 8) |
140+
(((uint64_t)data[0]));
141+
#endif
142+
}
143+
144+
static zend_long zend_stricthash_array(HashTable *const ht, const array_rec_prot_node *const node) {
145+
if (zend_hash_num_elements(ht) == 0) {
146+
return ZEND_STRICTHASH_HASH_EMPTY_ARRAY;
147+
}
148+
149+
uint64_t result = 1;
150+
bool protected_recursion = false;
151+
152+
array_rec_prot_node new_node;
153+
array_rec_prot_node *new_node_ptr = NULL;
154+
if (!(GC_FLAGS(ht) & GC_IMMUTABLE)) {
155+
new_node.prev = node;
156+
new_node.ht = ht;
157+
if (UNEXPECTED(GC_IS_RECURSIVE(ht))) {
158+
for (const array_rec_prot_node *tmp = node; tmp != NULL; tmp = tmp->prev) {
159+
if (tmp->ht == ht) {
160+
zend_error_noreturn(E_ERROR, "Nesting level too deep - recursive dependency?");
161+
}
162+
}
163+
} else {
164+
protected_recursion = true;
165+
GC_PROTECT_RECURSION(ht);
166+
}
167+
new_node_ptr = &new_node;
168+
}
169+
170+
zend_long num_key;
171+
zend_string *str_key;
172+
zval *field_value;
173+
ZEND_HASH_FOREACH_KEY_VAL(ht, num_key, str_key, field_value) {
174+
/* str_key is in a hash table, meaning that the hash was already computed. */
175+
result += str_key ? ZSTR_H(str_key) : (zend_ulong) num_key;
176+
zend_long field_hash = zend_stricthash_inner(field_value, new_node_ptr);
177+
result += (field_hash + (result << 7));
178+
result = zend_inline_hash_of_uint64(result);
179+
} ZEND_HASH_FOREACH_END();
180+
181+
if (protected_recursion) {
182+
GC_UNPROTECT_RECURSION(ht);
183+
}
184+
return result;
185+
}

Diff for: Zend/zend_stricthash.h

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
Copyright (c) 2021 Tyson Andre
3+
4+
All rights reserved.
5+
6+
Redistribution and use in source and binary forms, with or without
7+
modification, are permitted provided that the following conditions are met:
8+
9+
- Redistributions of source code must retain the above copyright notice, this
10+
list of conditions and the following disclaimer.
11+
12+
- Redistributions in binary form must reproduce the above copyright notice,
13+
this list of conditions and the following disclaimer in the documentation
14+
and/or other materials provided with the distribution.
15+
16+
- Neither the name of the 'teds' nor the names of its contributors may
17+
be used to endorse or promote products derived from this software without
18+
specific prior written permission.
19+
20+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30+
THE POSSIBILITY OF SUCH DAMAGE.
31+
*/
32+
33+
#ifndef ZEND_STRICTHASH_H
34+
#define ZEND_STRICTHASH_H
35+
36+
#include "Zend/zend_types.h"
37+
38+
zend_long zend_stricthash_hash(zval *value);
39+
40+
static zend_always_inline uint32_t zend_stricthash_hash_uint32_t(zval *value) {
41+
return (uint32_t) (zend_ulong) zend_stricthash_hash(value);
42+
}
43+
44+
#endif /* ZEND_STRICTHASH_H */

0 commit comments

Comments
 (0)