Skip to content

Add ARRAY_UNIQUE_IDENTICAL option #9804

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 58 additions & 1 deletion ext/standard/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,56 @@ static zend_always_inline int php_array_data_compare_unstable_i(Bucket *f, Bucke
}
/* }}} */

static int php_zval_compare_strict_function(zval *z1, zval *z2)
{
ZVAL_DEREF(z1);
ZVAL_DEREF(z2);

zend_uchar t1 = Z_TYPE_P(z1);
zend_uchar t2 = Z_TYPE_P(z2);
if (t1 != t2) {
return (t1 < t2) ? -1 : 1;
}

switch (t1) {
case IS_NULL:
case IS_FALSE:
case IS_TRUE:
return 0;
case IS_LONG:
return ZEND_THREEWAY_COMPARE(Z_LVAL_P(z1), Z_LVAL_P(z2));
case IS_DOUBLE: {
double d1 = Z_DVAL_P(z1);
double d2 = Z_DVAL_P(z2);
if (isnan(d1)) {
return -1;
} else if (isnan(d2)) {
return 1;
} else {
return ZEND_THREEWAY_COMPARE(d1, d2);
}
}
case IS_STRING:
return zend_binary_zval_strcmp(z1, z2);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Separately from my previous comment: This can be optimized, the sort order is internal and not user-visible.

So if any algorithms using sorting internally (where the sort order is not user-visible) are added to php, they can avoid string comparisons entirely in the common cases (comparing hash first):

I haven't benchmarked this, though

(for the worst case of long strings with common prefixes, or the normal case of short strings with common prefixes and a mix of lengths)

  • compare ZSTR_HASH(s1) to ZSTR_HASH(s2) first (compute if not already computed) - these should almost always be different for typical inputs
  • Then check if they're the same pointer
  • Then by length
  • Then by memcmp
#define ZSTR_H(zstr)    (zstr)->h
#define ZSTR_HASH(zstr) zend_string_hash_val(zstr) // gets hash value - computes the hash if it isn't already computed

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, let's do that!

case IS_ARRAY:
return zend_hash_compare(
Z_ARRVAL_P(z1), Z_ARRVAL_P(z2),
(compare_func_t) php_zval_compare_strict_function,
/* ordered */ true
);
case IS_OBJECT:
return ZEND_THREEWAY_COMPARE(Z_OBJ_P(z1)->handle, Z_OBJ_P(z2)->handle);
case IS_RESOURCE:
return ZEND_THREEWAY_COMPARE(Z_RES_HANDLE_P(z1), Z_RES_HANDLE_P(z2));
EMPTY_SWITCH_DEFAULT_CASE()
}
}

static zend_always_inline int php_array_data_compare_strict_unstable_i(Bucket *f, Bucket *s)
{
return php_zval_compare_strict_function(&f->val, &s->val);
}

static zend_always_inline int php_array_data_compare_numeric_unstable_i(Bucket *f, Bucket *s) /* {{{ */
{
return numeric_compare_function(&f->val, &s->val);
Expand Down Expand Up @@ -357,6 +407,11 @@ DEFINE_SORT_VARIANTS(data_compare_string_locale);
DEFINE_SORT_VARIANTS(natural_compare);
DEFINE_SORT_VARIANTS(natural_case_compare);

// Declare without macro since we don't need the other variants
static zend_never_inline int php_array_data_compare_strict_unstable(Bucket *a, Bucket *b) {
return php_array_data_compare_strict_unstable_i(a, b);
}

static bucket_compare_func_t php_get_key_compare_func(zend_long sort_type, int reverse) /* {{{ */
{
switch (sort_type & ~PHP_SORT_FLAG_CASE) {
Expand Down Expand Up @@ -4559,7 +4614,9 @@ PHP_FUNCTION(array_unique)
return;
}

cmp = php_get_data_compare_func_unstable(sort_type, 0);
cmp = sort_type == PHP_ARRAY_UNIQUE_IDENTICAL
? php_array_data_compare_strict_unstable
: php_get_data_compare_func_unstable(sort_type, 0);

RETVAL_ARR(zend_array_dup(Z_ARRVAL_P(array)));

Expand Down
5 changes: 5 additions & 0 deletions ext/standard/basic_functions.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@
* @cvalue PHP_SORT_FLAG_CASE
*/
const SORT_FLAG_CASE = UNKNOWN;
/**
* @var int
* @cvalue PHP_ARRAY_UNIQUE_IDENTICAL
*/
const ARRAY_UNIQUE_IDENTICAL = UNKNOWN;

/**
* @var int
Expand Down
3 changes: 2 additions & 1 deletion ext/standard/basic_functions_arginfo.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions ext/standard/php_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,12 @@ PHPAPI bool php_array_pick_keys(const php_random_algo *algo, php_random_status *
#define PHP_SORT_ASC 4
#define PHP_SORT_LOCALE_STRING 5
#define PHP_SORT_NATURAL 6
//#define PHP_DONT_USE 7
#define PHP_SORT_FLAG_CASE 8

// Must not clash with the PHP_SORT_ flags
#define PHP_ARRAY_UNIQUE_IDENTICAL 7

#define PHP_COUNT_NORMAL 0
#define PHP_COUNT_RECURSIVE 1

Expand Down
75 changes: 75 additions & 0 deletions ext/standard/tests/array/array_unique_identical.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
--TEST--
Test array_unique() with ARRAY_UNIQUE_IDENTICAL
--FILE--
<?php

var_dump(array_unique(['1234', '1234'], ARRAY_UNIQUE_IDENTICAL));
var_dump(array_unique(['1234', 1234], ARRAY_UNIQUE_IDENTICAL));
var_dump(array_unique([0, '0', 0.0, '0.0', '', null, null], ARRAY_UNIQUE_IDENTICAL));

$a = (object)[];
$b = (object)[];
$a2 = [$a];
$b2 = [$b];
$a3 = (object)['foo' => $a];
$b3 = (object)['foo' => $b];
var_dump(array_unique([$a, $b, $a2, $b2, $a3, $b3], ARRAY_UNIQUE_IDENTICAL));

?>
--EXPECT--
array(1) {
[0]=>
string(4) "1234"
}
array(2) {
[0]=>
string(4) "1234"
[1]=>
int(1234)
}
array(6) {
[0]=>
int(0)
[1]=>
string(1) "0"
[2]=>
float(0)
[3]=>
string(3) "0.0"
[4]=>
string(0) ""
[5]=>
NULL
}
array(6) {
[0]=>
object(stdClass)#1 (0) {
}
[1]=>
object(stdClass)#2 (0) {
}
[2]=>
array(1) {
[0]=>
object(stdClass)#1 (0) {
}
}
[3]=>
array(1) {
[0]=>
object(stdClass)#2 (0) {
}
}
[4]=>
object(stdClass)#3 (1) {
["foo"]=>
object(stdClass)#1 (0) {
}
}
[5]=>
object(stdClass)#4 (1) {
["foo"]=>
object(stdClass)#2 (0) {
}
}
}
83 changes: 83 additions & 0 deletions ext/standard/tests/array/array_unique_identical_enums.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
--TEST--
array_unique() with ARRAY_UNIQUE_IDENTICAL and enums
--FILE--
<?php

enum Foo {
case Bar;
case Baz;
}

$bar = Foo::Bar;
$bar2 = Foo::Bar;

var_dump(array_unique([
Foo::Bar,
Foo::Baz,
Foo::Baz,
Foo::Bar,
], ARRAY_UNIQUE_IDENTICAL));

var_dump(array_unique([
Foo::Bar,
Foo::Bar,
Foo::Baz,
], ARRAY_UNIQUE_IDENTICAL));

var_dump(array_unique([
'a' => Foo::Bar,
'b' => Foo::Baz,
'c' => Foo::Bar,
], ARRAY_UNIQUE_IDENTICAL));

var_dump(array_unique([
&$bar,
Foo::Bar,
&$bar2,
Foo::Baz,
], ARRAY_UNIQUE_IDENTICAL));

$value2 = "hello";
$value3 = 0;
$value4 = &$value2;
var_dump(array_unique([
0,
&$value4,
&$value2,
"hello",
&$value3,
$value4
], ARRAY_UNIQUE_IDENTICAL));

?>
--EXPECT--
array(2) {
[0]=>
enum(Foo::Bar)
[1]=>
enum(Foo::Baz)
}
array(2) {
[0]=>
enum(Foo::Bar)
[2]=>
enum(Foo::Baz)
}
array(2) {
["a"]=>
enum(Foo::Bar)
["b"]=>
enum(Foo::Baz)
}
array(2) {
[0]=>
&enum(Foo::Bar)
[3]=>
enum(Foo::Baz)
}
array(2) {
[0]=>
int(0)
[1]=>
&string(5) "hello"
}
77 changes: 77 additions & 0 deletions ext/standard/tests/array/array_unique_identical_nan.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
--TEST--
array_unique() with ARRAY_UNIQUE_IDENTICAL preserves NAN
--FILE--
<?php

var_dump(array_unique([
NAN,
NAN,
], ARRAY_UNIQUE_IDENTICAL));

var_dump(array_unique([
'foo' => NAN,
'bar' => NAN,
], ARRAY_UNIQUE_IDENTICAL));

var_dump(array_unique([
[NAN],
1.0,
[NAN],
], ARRAY_UNIQUE_IDENTICAL));

var_dump(array_unique([
1.0,
NAN,
1.0,
NAN,
1.0,
], ARRAY_UNIQUE_IDENTICAL));

var_dump(array_unique([
1.0,
NAN,
1.0,
], ARRAY_UNIQUE_IDENTICAL));

?>
--EXPECT--
array(2) {
[0]=>
float(NAN)
[1]=>
float(NAN)
}
array(2) {
["foo"]=>
float(NAN)
["bar"]=>
float(NAN)
}
array(3) {
[0]=>
array(1) {
[0]=>
float(NAN)
}
[1]=>
float(1)
[2]=>
array(1) {
[0]=>
float(NAN)
}
}
array(3) {
[0]=>
float(1)
[1]=>
float(NAN)
[3]=>
float(NAN)
}
array(2) {
[0]=>
float(1)
[1]=>
float(NAN)
}