-
Notifications
You must be signed in to change notification settings - Fork 100
/
Copy pathpcg_random.hpp
1751 lines (1470 loc) · 62.3 KB
/
pcg_random.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* PCG Random Number Generation for C++
*
* Copyright 2014 Melissa O'Neill <[email protected]>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For additional information about the PCG random number generation scheme,
* including its license and other licensing options, visit
*
* http://www.pcg-random.org
*/
/*
* This code provides the reference implementation of the PCG family of
* random number generators. The code is complex because it implements
*
* - several members of the PCG family, specifically members corresponding
* to the output functions:
* - XSH RR (good for 64-bit state, 32-bit output)
* - XSH RS (good for 64-bit state, 32-bit output)
* - XSL RR (good for 128-bit state, 64-bit output)
* - RXS M XS (statistically most powerful generator)
* - XSL RR RR (good for 128-bit state, 128-bit output)
* - and RXS, RXS M, XSH, XSL (mostly for testing)
* - at potentially *arbitrary* bit sizes
* - with four different techniques for random streams (MCG, one-stream
* LCG, settable-stream LCG, unique-stream LCG)
* - and the extended generation schemes allowing arbitrary periods
* - with all features of C++11 random number generation (and more),
* some of which are somewhat painful, including
* - initializing with a SeedSequence which writes 32-bit values
* to memory, even though the state of the generator may not
* use 32-bit values (it might use smaller or larger integers)
* - I/O for RNGs and a prescribed format, which needs to handle
* the issue that 8-bit and 128-bit integers don't have working
* I/O routines (e.g., normally 8-bit = char, not integer)
* - equality and inequality for RNGs
* - and a number of convenience typedefs to mask all the complexity
*
* The code employes a fairly heavy level of abstraction, and has to deal
* with various C++ minutia. If you're looking to learn about how the PCG
* scheme works, you're probably best of starting with one of the other
* codebases (see www.pcg-random.org). But if you're curious about the
* constants for the various output functions used in those other, simpler,
* codebases, this code shows how they are calculated.
*
* On the positive side, at least there are convenience typedefs so that you
* can say
*
* pcg32 myRNG;
*
* rather than:
*
* pcg_detail::engine<
* uint32_t, // Output Type
* uint64_t, // State Type
* pcg_detail::xsh_rr_mixin<uint32_t, uint64_t>, true, // Output Func
* pcg_detail::specific_stream<uint64_t>, // Stream Kind
* pcg_detail::default_multiplier<uint64_t> // LCG Mult
* > myRNG;
*
*/
#ifndef PCG_RAND_HPP_INCLUDED
#define PCG_RAND_HPP_INCLUDED 1
#include <cinttypes>
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <cassert>
#include <limits>
#include <iosfwd>
#include <type_traits>
#include <utility>
#include <locale>
#include <new>
#include <stdexcept>
/*
* The pcg_extras namespace contains some support code that is likley to
* be useful for a variety of RNGs, including:
* - 128-bit int support for platforms where it isn't available natively
* - bit twiddling operations
* - I/O of 128-bit and 8-bit integers
* - Handling the evilness of SeedSeq
* - Support for efficiently producing random numbers less than a given
* bound
*/
#include "pcg_extras.hpp"
namespace pcg_detail {
using namespace pcg_extras;
/*
* The LCG generators need some constants to function. This code lets you
* look up the constant by *type*. For example
*
* default_multiplier<uint32_t>::multiplier()
*
* gives you the default multipler for 32-bit integers. We use the name
* of the constant and not a generic word like value to allow these classes
* to be used as mixins.
*/
template <typename T>
struct default_multiplier {
// Not defined for an arbitrary type
};
template <typename T>
struct default_increment {
// Not defined for an arbitrary type
};
#define PCG_DEFINE_CONSTANT(type, what, kind, constant) \
template <> \
struct what ## _ ## kind<type> { \
static constexpr type kind() { \
return constant; \
} \
};
PCG_DEFINE_CONSTANT(uint8_t, default, multiplier, 141U)
PCG_DEFINE_CONSTANT(uint8_t, default, increment, 77U)
PCG_DEFINE_CONSTANT(uint16_t, default, multiplier, 12829U)
PCG_DEFINE_CONSTANT(uint16_t, default, increment, 47989U)
PCG_DEFINE_CONSTANT(uint32_t, default, multiplier, 747796405U)
PCG_DEFINE_CONSTANT(uint32_t, default, increment, 2891336453U)
PCG_DEFINE_CONSTANT(uint64_t, default, multiplier, 6364136223846793005ULL)
PCG_DEFINE_CONSTANT(uint64_t, default, increment, 1442695040888963407ULL)
PCG_DEFINE_CONSTANT(pcg128_t, default, multiplier,
PCG_128BIT_CONSTANT(2549297995355413924ULL,4865540595714422341ULL))
PCG_DEFINE_CONSTANT(pcg128_t, default, increment,
PCG_128BIT_CONSTANT(6364136223846793005ULL,1442695040888963407ULL))
/*
* Each PCG generator is available in four variants, based on how it applies
* the additive constant for its underlying LCG; the variations are:
*
* single stream - all instances use the same fixed constant, thus
* the RNG always somewhere in same sequence
* mcg - adds zero, resulting in a single stream and reduced
* period
* specific stream - the constant can be changed at any time, selecting
* a different random sequence
* unique stream - the constant is based on the memory addresss of the
* object, thus every RNG has its own unique sequence
*
* This variation is provided though mixin classes which define a function
* value called increment() that returns the nesessary additive constant.
*/
/*
* unique stream
*/
template <typename itype>
class unique_stream {
protected:
static constexpr bool is_mcg = false;
// Is never called, but is provided for symmetry with specific_stream
void set_stream(...)
{
abort();
}
public:
typedef itype state_type;
constexpr itype increment() const {
return itype(reinterpret_cast<unsigned long>(this) | 1);
}
constexpr itype stream() const
{
return increment() >> 1;
}
static constexpr bool can_specify_stream = false;
static constexpr size_t streams_pow2()
{
return (sizeof(itype) < sizeof(size_t) ? sizeof(itype)
: sizeof(size_t))*8 - 1u;
}
protected:
constexpr unique_stream() = default;
};
/*
* no stream (mcg)
*/
template <typename itype>
class no_stream {
protected:
static constexpr bool is_mcg = true;
// Is never called, but is provided for symmetry with specific_stream
void set_stream(...)
{
abort();
}
public:
typedef itype state_type;
static constexpr itype increment() {
return 0;
}
static constexpr bool can_specify_stream = false;
static constexpr size_t streams_pow2()
{
return 0u;
}
protected:
constexpr no_stream() = default;
};
/*
* single stream/sequence (oneseq)
*/
template <typename itype>
class oneseq_stream : public default_increment<itype> {
protected:
static constexpr bool is_mcg = false;
// Is never called, but is provided for symmetry with specific_stream
void set_stream(...)
{
abort();
}
public:
typedef itype state_type;
static constexpr itype stream()
{
return default_increment<itype>::increment() >> 1;
}
static constexpr bool can_specify_stream = false;
static constexpr size_t streams_pow2()
{
return 0u;
}
protected:
constexpr oneseq_stream() = default;
};
/*
* specific stream
*/
template <typename itype>
class specific_stream {
protected:
static constexpr bool is_mcg = false;
itype inc_ = default_increment<itype>::increment();
public:
typedef itype state_type;
typedef itype stream_state;
constexpr itype increment() const {
return inc_;
}
itype stream()
{
return inc_ >> 1;
}
void set_stream(itype specific_seq)
{
inc_ = (specific_seq << 1) | 1;
}
static constexpr bool can_specify_stream = true;
static constexpr size_t streams_pow2()
{
return (sizeof(itype)*8) - 1u;
}
protected:
specific_stream() = default;
specific_stream(itype specific_seq)
: inc_((specific_seq << 1) | itype(1U))
{
// Nothing (else) to do.
}
};
/*
* This is where it all comes together. This function joins together three
* mixin classes which define
* - the LCG additive constant (the stream)
* - the LCG multiplier
* - the output function
* in addition, we specify the type of the LCG state, and the result type,
* and whether to use the pre-advance version of the state for the output
* (increasing instruction-level parallelism) or the post-advance version
* (reducing register pressure).
*
* Given the high level of parameterization, the code has to use some
* template-metaprogramming tricks to handle some of the suble variations
* involved.
*/
template <typename xtype, typename itype,
typename output_mixin,
bool output_previous = true,
typename stream_mixin = oneseq_stream<itype>,
typename multiplier_mixin = default_multiplier<itype> >
class engine : protected output_mixin,
public stream_mixin,
protected multiplier_mixin {
protected:
itype state_;
struct can_specify_stream_tag {};
struct no_specifiable_stream_tag {};
using stream_mixin::increment;
using multiplier_mixin::multiplier;
public:
typedef xtype result_type;
typedef itype state_type;
static constexpr size_t period_pow2()
{
return sizeof(state_type)*8 - 2*stream_mixin::is_mcg;
}
// It would be nice to use std::numeric_limits for these, but
// we can't be sure that it'd be defined for the 128-bit types.
static constexpr result_type min()
{
return result_type(0UL);
}
static constexpr result_type max()
{
return ~result_type(0UL);
}
protected:
itype bump(itype state)
{
return state * multiplier() + increment();
}
itype base_generate()
{
return state_ = bump(state_);
}
itype base_generate0()
{
itype old_state = state_;
state_ = bump(state_);
return old_state;
}
public:
result_type operator()()
{
if (output_previous)
return this->output(base_generate0());
else
return this->output(base_generate());
}
result_type operator()(result_type upper_bound)
{
return bounded_rand(*this, upper_bound);
}
protected:
static itype advance(itype state, itype delta,
itype cur_mult, itype cur_plus);
static itype distance(itype cur_state, itype newstate, itype cur_mult,
itype cur_plus, itype mask = ~itype(0U));
itype distance(itype newstate, itype mask = ~itype(0U)) const
{
return distance(state_, newstate, multiplier(), increment(), mask);
}
public:
void advance(itype delta)
{
state_ = advance(state_, delta, this->multiplier(), this->increment());
}
void backstep(itype delta)
{
advance(-delta);
}
void discard(itype delta)
{
advance(delta);
}
bool wrapped()
{
if (stream_mixin::is_mcg) {
// For MCGs, the low order two bits never change. In this
// implementation, we keep them fixed at 3 to make this test
// easier.
return state_ == 3;
} else {
return state_ == 0;
}
}
engine(itype state = itype(0xcafef00dd15ea5e5ULL))
: state_(this->is_mcg ? state|state_type(3U)
: bump(state + this->increment()))
{
// Nothing else to do.
}
// This function may or may not exist. It thus has to be a template
// to use SFINAE; users don't have to worry about its template-ness.
template <typename sm = stream_mixin>
engine(itype state, typename sm::stream_state stream_seed)
: stream_mixin(stream_seed),
state_(this->is_mcg ? state|state_type(3U)
: bump(state + this->increment()))
{
// Nothing else to do.
}
template<typename SeedSeq>
engine(SeedSeq&& seedSeq, typename std::enable_if<
!stream_mixin::can_specify_stream
&& !std::is_convertible<SeedSeq, itype>::value
&& !std::is_convertible<SeedSeq, engine>::value,
no_specifiable_stream_tag>::type = {})
: engine(generate_one<itype>(std::forward<SeedSeq>(seedSeq)))
{
// Nothing else to do.
}
template<typename SeedSeq>
engine(SeedSeq&& seedSeq, typename std::enable_if<
stream_mixin::can_specify_stream
&& !std::is_convertible<SeedSeq, itype>::value
&& !std::is_convertible<SeedSeq, engine>::value,
can_specify_stream_tag>::type = {})
: engine(generate_one<itype,1,2>(seedSeq),
generate_one<itype,0,2>(seedSeq))
{
// Nothing else to do.
}
template<typename... Args>
void seed(Args&&... args)
{
new (this) engine(std::forward<Args>(args)...);
}
template <typename xtype1, typename itype1,
typename output_mixin1, bool output_previous1,
typename stream_mixin_lhs, typename multiplier_mixin_lhs,
typename stream_mixin_rhs, typename multiplier_mixin_rhs>
friend bool operator==(const engine<xtype1,itype1,
output_mixin1,output_previous1,
stream_mixin_lhs, multiplier_mixin_lhs>&,
const engine<xtype1,itype1,
output_mixin1,output_previous1,
stream_mixin_rhs, multiplier_mixin_rhs>&);
template <typename xtype1, typename itype1,
typename output_mixin1, bool output_previous1,
typename stream_mixin_lhs, typename multiplier_mixin_lhs,
typename stream_mixin_rhs, typename multiplier_mixin_rhs>
friend itype1 operator-(const engine<xtype1,itype1,
output_mixin1,output_previous1,
stream_mixin_lhs, multiplier_mixin_lhs>&,
const engine<xtype1,itype1,
output_mixin1,output_previous1,
stream_mixin_rhs, multiplier_mixin_rhs>&);
template <typename CharT, typename Traits,
typename xtype1, typename itype1,
typename output_mixin1, bool output_previous1,
typename stream_mixin1, typename multiplier_mixin1>
friend std::basic_ostream<CharT,Traits>&
operator<<(std::basic_ostream<CharT,Traits>& out,
const engine<xtype1,itype1,
output_mixin1,output_previous1,
stream_mixin1, multiplier_mixin1>&);
template <typename CharT, typename Traits,
typename xtype1, typename itype1,
typename output_mixin1, bool output_previous1,
typename stream_mixin1, typename multiplier_mixin1>
friend std::basic_istream<CharT,Traits>&
operator>>(std::basic_istream<CharT,Traits>& in,
engine<xtype1, itype1,
output_mixin1, output_previous1,
stream_mixin1, multiplier_mixin1>& rng);
};
template <typename CharT, typename Traits,
typename xtype, typename itype,
typename output_mixin, bool output_previous,
typename stream_mixin, typename multiplier_mixin>
std::basic_ostream<CharT,Traits>&
operator<<(std::basic_ostream<CharT,Traits>& out,
const engine<xtype,itype,
output_mixin,output_previous,
stream_mixin, multiplier_mixin>& rng)
{
auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left);
auto space = out.widen(' ');
auto orig_fill = out.fill();
out << rng.multiplier() << space
<< rng.increment() << space
<< rng.state_;
out.flags(orig_flags);
out.fill(orig_fill);
return out;
}
template <typename CharT, typename Traits,
typename xtype, typename itype,
typename output_mixin, bool output_previous,
typename stream_mixin, typename multiplier_mixin>
std::basic_istream<CharT,Traits>&
operator>>(std::basic_istream<CharT,Traits>& in,
engine<xtype,itype,
output_mixin,output_previous,
stream_mixin, multiplier_mixin>& rng)
{
auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws);
itype multiplier, increment, state;
in >> multiplier >> increment >> state;
if (!in.fail()) {
bool good = true;
if (multiplier != rng.multiplier()) {
good = false;
} else if (rng.can_specify_stream) {
rng.set_stream(increment >> 1);
} else if (increment != rng.increment()) {
good = false;
}
if (good) {
rng.state_ = state;
} else {
in.clear(std::ios::failbit);
}
}
in.flags(orig_flags);
return in;
}
template <typename xtype, typename itype,
typename output_mixin, bool output_previous,
typename stream_mixin, typename multiplier_mixin>
itype engine<xtype,itype,output_mixin,output_previous,stream_mixin,
multiplier_mixin>::advance(
itype state, itype delta, itype cur_mult, itype cur_plus)
{
// The method used here is based on Brown, "Random Number Generation
// with Arbitrary Stride,", Transactions of the American Nuclear
// Society (Nov. 1994). The algorithm is very similar to fast
// exponentiation.
//
// Even though delta is an unsigned integer, we can pass a
// signed integer to go backwards, it just goes "the long way round".
constexpr itype ZERO = 0u; // itype may be a non-trivial types, so
constexpr itype ONE = 1u; // we define some ugly constants.
itype acc_mult = 1;
itype acc_plus = 0;
while (delta > ZERO) {
if (delta & ONE) {
acc_mult *= cur_mult;
acc_plus = acc_plus*cur_mult + cur_plus;
}
cur_plus = (cur_mult+ONE)*cur_plus;
cur_mult *= cur_mult;
delta >>= 1;
}
return acc_mult * state + acc_plus;
}
template <typename xtype, typename itype,
typename output_mixin, bool output_previous,
typename stream_mixin, typename multiplier_mixin>
itype engine<xtype,itype,output_mixin,output_previous,stream_mixin,
multiplier_mixin>::distance(
itype cur_state, itype newstate, itype cur_mult, itype cur_plus, itype mask)
{
constexpr itype ONE = 1u; // itype could be weird, so use constant
itype the_bit = stream_mixin::is_mcg ? itype(4u) : itype(1u);
itype distance = 0u;
while ((cur_state & mask) != (newstate & mask)) {
if ((cur_state & the_bit) != (newstate & the_bit)) {
cur_state = cur_state * cur_mult + cur_plus;
distance |= the_bit;
}
assert((cur_state & the_bit) == (newstate & the_bit));
the_bit <<= 1;
cur_plus = (cur_mult+ONE)*cur_plus;
cur_mult *= cur_mult;
}
return stream_mixin::is_mcg ? distance >> 2 : distance;
}
template <typename xtype, typename itype,
typename output_mixin, bool output_previous,
typename stream_mixin_lhs, typename multiplier_mixin_lhs,
typename stream_mixin_rhs, typename multiplier_mixin_rhs>
itype operator-(const engine<xtype,itype,
output_mixin,output_previous,
stream_mixin_lhs, multiplier_mixin_lhs>& lhs,
const engine<xtype,itype,
output_mixin,output_previous,
stream_mixin_rhs, multiplier_mixin_rhs>& rhs)
{
if (lhs.multiplier() != rhs.multiplier()
|| lhs.increment() != rhs.increment())
throw std::logic_error("incomparable generators");
return rhs.distance(lhs.state_);
}
template <typename xtype, typename itype,
typename output_mixin, bool output_previous,
typename stream_mixin_lhs, typename multiplier_mixin_lhs,
typename stream_mixin_rhs, typename multiplier_mixin_rhs>
bool operator==(const engine<xtype,itype,
output_mixin,output_previous,
stream_mixin_lhs, multiplier_mixin_lhs>& lhs,
const engine<xtype,itype,
output_mixin,output_previous,
stream_mixin_rhs, multiplier_mixin_rhs>& rhs)
{
return (lhs.multiplier() == rhs.multiplier())
&& (lhs.increment() == rhs.increment())
&& (lhs.state_ == rhs.state_);
}
template <typename xtype, typename itype,
typename output_mixin, bool output_previous,
typename stream_mixin_lhs, typename multiplier_mixin_lhs,
typename stream_mixin_rhs, typename multiplier_mixin_rhs>
inline bool operator!=(const engine<xtype,itype,
output_mixin,output_previous,
stream_mixin_lhs, multiplier_mixin_lhs>& lhs,
const engine<xtype,itype,
output_mixin,output_previous,
stream_mixin_rhs, multiplier_mixin_rhs>& rhs)
{
return !operator==(lhs,rhs);
}
template <typename xtype, typename itype,
template<typename XT,typename IT> class output_mixin,
bool output_previous = (sizeof(itype) <= 8)>
using oneseq_base = engine<xtype, itype,
output_mixin<xtype, itype>, output_previous,
oneseq_stream<itype> >;
template <typename xtype, typename itype,
template<typename XT,typename IT> class output_mixin,
bool output_previous = (sizeof(itype) <= 8)>
using unique_base = engine<xtype, itype,
output_mixin<xtype, itype>, output_previous,
unique_stream<itype> >;
template <typename xtype, typename itype,
template<typename XT,typename IT> class output_mixin,
bool output_previous = (sizeof(itype) <= 8)>
using setseq_base = engine<xtype, itype,
output_mixin<xtype, itype>, output_previous,
specific_stream<itype> >;
template <typename xtype, typename itype,
template<typename XT,typename IT> class output_mixin,
bool output_previous = (sizeof(itype) <= 8)>
using mcg_base = engine<xtype, itype,
output_mixin<xtype, itype>, output_previous,
no_stream<itype> >;
/*
* OUTPUT FUNCTIONS.
*
* These are the core of the PCG generation scheme. They specify how to
* turn the base LCG's internal state into the output value of the final
* generator.
*
* They're implemented as mixin classes.
*
* All of the classes have code that is written to allow it to be applied
* at *arbitrary* bit sizes, although in practice they'll only be used at
* standard sizes supported by C++.
*/
/*
* XSH RS -- high xorshift, followed by a random shift
*
* Fast. A good performer.
*/
template <typename xtype, typename itype>
struct xsh_rs_mixin {
static xtype output(itype internal)
{
constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
constexpr bitcount_t sparebits = bits - xtypebits;
constexpr bitcount_t opbits =
sparebits-5 >= 64 ? 5
: sparebits-4 >= 32 ? 4
: sparebits-3 >= 16 ? 3
: sparebits-2 >= 4 ? 2
: sparebits-1 >= 1 ? 1
: 0;
constexpr bitcount_t mask = (1 << opbits) - 1;
constexpr bitcount_t maxrandshift = mask;
constexpr bitcount_t topspare = opbits;
constexpr bitcount_t bottomspare = sparebits - topspare;
constexpr bitcount_t xshift = topspare + (xtypebits+maxrandshift)/2;
bitcount_t rshift =
opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0;
internal ^= internal >> xshift;
xtype result = xtype(internal >> (bottomspare - maxrandshift + rshift));
return result;
}
};
/*
* XSH RR -- high xorshift, followed by a random rotate
*
* Fast. A good performer. Slightly better statistically than XSH RS.
*/
template <typename xtype, typename itype>
struct xsh_rr_mixin {
static xtype output(itype internal)
{
constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*8);
constexpr bitcount_t sparebits = bits - xtypebits;
constexpr bitcount_t wantedopbits =
xtypebits >= 128 ? 7
: xtypebits >= 64 ? 6
: xtypebits >= 32 ? 5
: xtypebits >= 16 ? 4
: 3;
constexpr bitcount_t opbits =
sparebits >= wantedopbits ? wantedopbits
: sparebits;
constexpr bitcount_t amplifier = wantedopbits - opbits;
constexpr bitcount_t mask = (1 << opbits) - 1;
constexpr bitcount_t topspare = opbits;
constexpr bitcount_t bottomspare = sparebits - topspare;
constexpr bitcount_t xshift = (topspare + xtypebits)/2;
bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask
: 0;
bitcount_t amprot = (rot << amplifier) & mask;
internal ^= internal >> xshift;
xtype result = xtype(internal >> bottomspare);
result = rotr(result, amprot);
return result;
}
};
/*
* RXS -- random xorshift
*/
template <typename xtype, typename itype>
struct rxs_mixin {
static xtype output_rxs(itype internal)
{
constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*8);
constexpr bitcount_t shift = bits - xtypebits;
constexpr bitcount_t extrashift = (xtypebits - shift)/2;
bitcount_t rshift = shift > 64+8 ? (internal >> (bits - 6)) & 63
: shift > 32+4 ? (internal >> (bits - 5)) & 31
: shift > 16+2 ? (internal >> (bits - 4)) & 15
: shift > 8+1 ? (internal >> (bits - 3)) & 7
: shift > 4+1 ? (internal >> (bits - 2)) & 3
: shift > 2+1 ? (internal >> (bits - 1)) & 1
: 0;
internal ^= internal >> (shift + extrashift - rshift);
xtype result = internal >> rshift;
return result;
}
};
/*
* RXS M XS -- random xorshift, mcg multiply, fixed xorshift
*
* The most statistically powerful generator, but all those steps
* make it slower than some of the others. We give it the rottenest jobs.
*
* Because it's usually used in contexts where the state type and the
* result type are the same, it is a permutation and is thus invertable.
* We thus provide a function to invert it. This function is used to
* for the "inside out" generator used by the extended generator.
*/
/* Defined type-based concepts for the multiplication step. They're actually
* all derived by truncating the 128-bit, which was computed to be a good
* "universal" constant.
*/
template <typename T>
struct mcg_multiplier {
// Not defined for an arbitrary type
};
template <typename T>
struct mcg_unmultiplier {
// Not defined for an arbitrary type
};
PCG_DEFINE_CONSTANT(uint8_t, mcg, multiplier, 217U)
PCG_DEFINE_CONSTANT(uint8_t, mcg, unmultiplier, 105U)
PCG_DEFINE_CONSTANT(uint16_t, mcg, multiplier, 62169U)
PCG_DEFINE_CONSTANT(uint16_t, mcg, unmultiplier, 28009U)
PCG_DEFINE_CONSTANT(uint32_t, mcg, multiplier, 277803737U)
PCG_DEFINE_CONSTANT(uint32_t, mcg, unmultiplier, 2897767785U)
PCG_DEFINE_CONSTANT(uint64_t, mcg, multiplier, 12605985483714917081ULL)
PCG_DEFINE_CONSTANT(uint64_t, mcg, unmultiplier, 15009553638781119849ULL)
PCG_DEFINE_CONSTANT(pcg128_t, mcg, multiplier,
PCG_128BIT_CONSTANT(17766728186571221404ULL, 12605985483714917081ULL))
PCG_DEFINE_CONSTANT(pcg128_t, mcg, unmultiplier,
PCG_128BIT_CONSTANT(14422606686972528997ULL, 15009553638781119849ULL))
template <typename xtype, typename itype>
struct rxs_m_xs_mixin {
static xtype output(itype internal)
{
constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
constexpr bitcount_t opbits = xtypebits >= 128 ? 6
: xtypebits >= 64 ? 5
: xtypebits >= 32 ? 4
: xtypebits >= 16 ? 3
: 2;
constexpr bitcount_t shift = bits - xtypebits;
constexpr bitcount_t mask = (1 << opbits) - 1;
bitcount_t rshift =
opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0;
internal ^= internal >> (opbits + rshift);
internal *= mcg_multiplier<itype>::multiplier();
xtype result = internal >> shift;
result ^= result >> ((2U*xtypebits+2U)/3U);
return result;
}
static itype unoutput(itype internal)
{
constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
constexpr bitcount_t opbits = bits >= 128 ? 6
: bits >= 64 ? 5
: bits >= 32 ? 4
: bits >= 16 ? 3
: 2;
constexpr bitcount_t mask = (1 << opbits) - 1;
internal = unxorshift(internal, bits, (2U*bits+2U)/3U);
internal *= mcg_unmultiplier<itype>::unmultiplier();
bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0;
internal = unxorshift(internal, bits, opbits + rshift);
return internal;
}
};
/*
* RXS M -- random xorshift, mcg multiply
*/
template <typename xtype, typename itype>
struct rxs_m_mixin {
static xtype output(itype internal)
{
constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
constexpr bitcount_t opbits = xtypebits >= 128 ? 6
: xtypebits >= 64 ? 5
: xtypebits >= 32 ? 4
: xtypebits >= 16 ? 3
: 2;
constexpr bitcount_t shift = bits - xtypebits;
constexpr bitcount_t mask = (1 << opbits) - 1;
bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0;
internal ^= internal >> (opbits + rshift);
internal *= mcg_multiplier<itype>::multiplier();
xtype result = internal >> shift;
return result;
}
};
/*
* XSL RR -- fixed xorshift (to low bits), random rotate
*
* Useful for 128-bit types that are split across two CPU registers.
*/
template <typename xtype, typename itype>
struct xsl_rr_mixin {
static xtype output(itype internal)
{
constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
constexpr bitcount_t sparebits = bits - xtypebits;
constexpr bitcount_t wantedopbits = xtypebits >= 128 ? 7
: xtypebits >= 64 ? 6
: xtypebits >= 32 ? 5
: xtypebits >= 16 ? 4
: 3;
constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits
: sparebits;
constexpr bitcount_t amplifier = wantedopbits - opbits;
constexpr bitcount_t mask = (1 << opbits) - 1;
constexpr bitcount_t topspare = sparebits;
constexpr bitcount_t bottomspare = sparebits - topspare;
constexpr bitcount_t xshift = (topspare + xtypebits) / 2;
bitcount_t rot =
opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0;
bitcount_t amprot = (rot << amplifier) & mask;
internal ^= internal >> xshift;
xtype result = xtype(internal >> bottomspare);
result = rotr(result, amprot);
return result;
}
};