|
| 1 | +// Copyright 2016-2020 Bruce hoult [email protected] |
| 2 | +// |
| 3 | +// Permission is hereby granted, free of charge, to any person obtaining a copy of |
| 4 | +// this software and associated documentation files (the "Software"), to deal in |
| 5 | +// the Software without restriction, including without limitation the rights to |
| 6 | +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
| 7 | +// of the Software, and to permit persons to whom the Software is furnished to do |
| 8 | +// so, subject to the following conditions: |
| 9 | +// |
| 10 | +// The above copyright notice and this permission notice shall be included in all |
| 11 | +// copies or substantial portions of the Software. |
| 12 | +// |
| 13 | +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, |
| 14 | +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A |
| 15 | +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
| 16 | +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
| 17 | +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 18 | +// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 19 | +// |
| 20 | +// Program to count primes. I wanted something that could run in 16 KB but took enough |
| 21 | +// time to measure on a modern x86 and is not susceptible to optimizer tricks. |
| 22 | +// Code size is for just the countPrimes() function with gcc -O. |
| 23 | +// |
| 24 | +// Original x86&ARM data 2016, received user contributions 2019&2020 from eevblog members. |
| 25 | +// |
| 26 | +// SZ = 1000 -> 3713160 primes, all primes up to 7919^2 = 62710561 |
| 27 | +// 2.735 sec i7 8650U @ 4.2 GHz 242 bytes 11.5 billion clocks |
| 28 | +// 2.795 sec Mac Mini M1 @ 3.2 GHz 212 bytes 8.9 billion clocks |
| 29 | +// 2.810 sec Mac Mini M1 arm64 Ubuntu in VM 280 bytes 9.0 billion clocks |
| 30 | +// 2.872 sec i7 6700K @ 4.2 GHz 240 bytes 12.1 billion clocks |
| 31 | +// 2.925 sec Mac Mini M1 @ 3.2 GHz Rosetta 208 bytes 9.4 billion clocks |
| 32 | +// 3.448 sec Ryzen 5 4500U @ 4.0 GHz WSL2 242 bytes 13.8 billion clocks |
| 33 | +// 3.505 sec Xeon Plat 8151 @ 4.0 GHz (AWS z1d) 244 bytes 14.0 billion clocks |
| 34 | +// 3.515 sec Threadripper 2990WX @ 4.2 GHz 242 bytes 14.8 billion clocks |
| 35 | +// 3.836 sec i7 4700MQ @ 3.4 GHz 258 bytes 13.0 billion clocks |
| 36 | +// 3.972 sec i7 8650U @ 4.2 GHz webasm 277 bytes 16.7 billion clocks |
| 37 | +// 4.868 sec i7 3770 @ 3.9 GHz 240 bytes 19.0 billion clocks |
| 38 | +// 6.377 sec AWS C6g graviton2 A64 @ 2.5 GHz 276 bytes 15.9 billion clocks |
| 39 | +// 6.757 sec M1 Mini, qemu-riscv64 in UbuntuVM 216 bytes 23.0 billion clocks |
| 40 | +// 8.538 sec NXP LX2160A A72 @ 2 GHz 260 bytes 17.1 billion clocks |
| 41 | +// 9.692 sec RISC-V Fedora in qemu in VM on M1 208 bytes 31.0 billion clocks |
| 42 | +// 9.740 sec i7 6700K qemu-riscv32 178 bytes 40.9 billion clocks |
| 43 | +// 10.046 sec i7 8650U @ 4.2 GHz qemu-riscv32 190 bytes 42.2 billion clocks |
| 44 | +// 11.190 sec Pi4 Cortex A72 @ 1.5 GHz T32 232 bytes 16.8 billion clocks |
| 45 | +// 11.445 sec Odroid XU4 A15 @ 2 GHz 204 bytes 22.9 billion clocks |
| 46 | +// 12.115 sec Pi4 Cortex A72 @ 1.5 GHz A64 300 bytes 18.2 billion clocks |
| 47 | +// 12.605 sec Pi4 Cortex A72 @ 1.5 GHz A32 300 bytes 18.9 billion clocks |
| 48 | +// 13.721 sec RISC-V Fedora in qemu on 2990wx 208 bytes 57.6 billion clocks |
| 49 | +// 17.394 sec RISCV U74 @1500 MHz (est vs FPGA) 228 bytes 26.1 billion clocks |
| 50 | +// 19.500 sec Odroid C2 A53 @ 1.536 GHz A64 276 bytes 30.0 billion clocks |
| 51 | +// 23.940 sec Odroid C2 A53 @ 1.536 GHz T32 204 bytes 36.8 billion clocks |
| 52 | +// 24.636 sec i7 6700K qemu-arm 204 bytes 103.5 billion clocks |
| 53 | +// 25.060 sec i7 6700K qemu-aarch64 276 bytes 105.3 billion clocks |
| 54 | +// 27.196 sec Teensy 4.0 Cortex M7 @ 960 MHz 228 bytes 26.1 billion clocks |
| 55 | +// 27.480 sec HiFive Unl RISCV U54 @ 1.45 GHz 228 bytes 39.8 billion clocks |
| 56 | +// 30.420 sec Pi3 Cortex A53 @ 1.2 GHz T32 204 bytes 36.5 billion clocks |
| 57 | +// 39.840 sec HiFive Unl RISCV U54 @ 1.0 GHz 228 bytes 39.8 billion clocks |
| 58 | +// 43.516 sec Teensy 4.0 Cortex M7 @ 600 MHz 228 bytes 26.1 billion clocks |
| 59 | +// 47.910 sec Pi2 Cortex A7 @ 900 MHz T32 204 bytes 42.1 billion clocks |
| 60 | +// 48.206 sec Zynq-7010 Cortex A9 @ 650MHz 248 bytes 31.3 billion clocks |
| 61 | +// 112.163 sec HiFive1 RISCV E31 @ 320 MHz 178 bytes 35.9 billion clocks |
| 62 | +// 260.907 sec RISCV U74 @ 100 MHz FPGA 228 bytes 26.1 billion clocks |
| 63 | +// 261.068 sec esp32/Arduino @ 240 MHz ??? bytes 62.7 billion clocks |
| 64 | +// 294.749 sec chipKIT Pro MZ pic32 @ 200 MHz ??? bytes 58.9 billion clocks |
| 65 | +// 306.988 sec esp8266 @ 160 MHz ??? bytes 49.1 billion clocks |
| 66 | +// 309.251 sec BlackPill Cortex M4F @ 168 MHz 228 bytes 52.0 billion clocks |
| 67 | +// 927.547 sec BluePill Cortex M3 @ 72 MHz 228 bytes 66.8 billion clocks |
| 68 | +// 13449.513 sec AVR ATmega2560 @ 20 MHz 318 bytes 269.0 billion clocks |
| 69 | + |
| 70 | +#include <stdio.h> |
| 71 | +#include <time.h> |
| 72 | +#include <stdint.h> |
| 73 | + |
| 74 | +#define SZ 1000 |
| 75 | +int32_t primes[SZ], sieve[SZ]; |
| 76 | +int nSieve = 0; |
| 77 | + |
| 78 | +int32_t countPrimes(){ |
| 79 | + primes[0] = 2; sieve[0] = 4; ++nSieve; |
| 80 | + int32_t nPrimes = 1, trial = 3, sqr=2; |
| 81 | + while (1){ |
| 82 | + while (sqr*sqr <= trial) ++sqr; |
| 83 | + --sqr; |
| 84 | + for (int i=0; i<nSieve; ++i){ |
| 85 | + if (primes[i] > sqr) goto found_prime; |
| 86 | + while (sieve[i] < trial) sieve[i] += primes[i]; |
| 87 | + if (sieve[i] == trial) goto try_next; |
| 88 | + } |
| 89 | + break; |
| 90 | + found_prime: |
| 91 | + if (nSieve < SZ){ |
| 92 | + primes[nSieve] = trial; |
| 93 | + sieve[nSieve] = trial*trial; |
| 94 | + ++nSieve; |
| 95 | + // printf("Saved %d: %d\n", nSieve, trial); |
| 96 | + } |
| 97 | + ++nPrimes; |
| 98 | + try_next: |
| 99 | + trial+=1; |
| 100 | + } |
| 101 | + return nPrimes; |
| 102 | +} |
| 103 | + |
| 104 | +int main(){ |
| 105 | + printf("Starting run\n"); |
| 106 | + clock_t start = clock(); |
| 107 | + int res = countPrimes(); |
| 108 | + int ms = (clock() - start) / (CLOCKS_PER_SEC / 1000.0) + 0.5; |
| 109 | + // Size calculation does not work if opt >1 or if compiler or linker |
| 110 | + // otherwise reorders functions in the binary. |
| 111 | + int codeSz = (char*)main - (char*)countPrimes; |
| 112 | + printf("%d primes found in %d ms\n", res, ms); |
| 113 | + printf("%d bytes of code in countPrimes()\n", codeSz); |
| 114 | + return 0; |
| 115 | +} |
0 commit comments