|
| 1 | +# |
| 2 | +# Beginning of user configuration |
| 3 | +# |
| 4 | + |
| 5 | +# This library's version |
| 6 | +VERSION = 0.3.6.dev |
| 7 | + |
| 8 | +# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a |
| 9 | +# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library |
| 10 | +# is libopenblas_$(LIBNAMESUFFIX).so.0. |
| 11 | +# LIBNAMESUFFIX = omp |
| 12 | + |
| 13 | +# You can specify the target architecture, otherwise it's |
| 14 | +# automatically detected. |
| 15 | +# TARGET = PENRYN |
| 16 | + |
| 17 | +# If you want to support multiple architecture in one binary |
| 18 | +# DYNAMIC_ARCH = 1 |
| 19 | + |
| 20 | +# If you want the full list of x86_64 architectures supported in DYNAMIC_ARCH |
| 21 | +# mode (including individual optimizied codes for PENRYN, DUNNINGTON, OPTERON, |
| 22 | +# OPTERON_SSE3, ATOM and NANO rather than fallbacks to older architectures) |
| 23 | +# DYNAMIC_OLDER = 1 |
| 24 | + |
| 25 | +# C compiler including binary type(32bit / 64bit). Default is gcc. |
| 26 | +# Don't use Intel Compiler or PGI, it won't generate right codes as I expect. |
| 27 | +# CC = gcc |
| 28 | + |
| 29 | +# Fortran compiler. Default is g77. |
| 30 | +# FC = gfortran |
| 31 | + |
| 32 | +# Even you can specify cross compiler. Meanwhile, please set HOSTCC. |
| 33 | + |
| 34 | +# cross compiler for Windows |
| 35 | +# CC = x86_64-w64-mingw32-gcc |
| 36 | +# FC = x86_64-w64-mingw32-gfortran |
| 37 | + |
| 38 | +# cross compiler for 32bit ARM |
| 39 | +# CC = arm-linux-gnueabihf-gcc |
| 40 | +# FC = arm-linux-gnueabihf-gfortran |
| 41 | + |
| 42 | +# cross compiler for 64bit ARM |
| 43 | +# CC = aarch64-linux-gnu-gcc |
| 44 | +# FC = aarch64-linux-gnu-gfortran |
| 45 | + |
| 46 | + |
| 47 | +# If you use the cross compiler, please set this host compiler. |
| 48 | +# HOSTCC = gcc |
| 49 | + |
| 50 | +# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64 |
| 51 | +BINARY=32 |
| 52 | + |
| 53 | +# About threaded BLAS. It will be automatically detected if you don't |
| 54 | +# specify it. |
| 55 | +# For force setting for single threaded, specify USE_THREAD = 0 |
| 56 | +# For force setting for multi threaded, specify USE_THREAD = 1 |
| 57 | +# USE_THREAD = 0 |
| 58 | + |
| 59 | +# If you're going to use this library with OpenMP, please comment it in. |
| 60 | +# This flag is always set for POWER8. Don't modify the flag |
| 61 | +# USE_OPENMP = 1 |
| 62 | + |
| 63 | +# The OpenMP scheduler to use - by default this is "static" and you |
| 64 | +# will normally not want to change this unless you know that your main |
| 65 | +# workload will involve tasks that have highly unbalanced running times |
| 66 | +# for individual threads. Changing away from "static" may also adversely |
| 67 | +# affect memory access locality in NUMA systems. Setting to "runtime" will |
| 68 | +# allow you to select the scheduler from the environment variable OMP_SCHEDULE |
| 69 | +# CCOMMON_OPT += -DOMP_SCHED=dynamic |
| 70 | + |
| 71 | +# You can define maximum number of threads. Basically it should be |
| 72 | +# less than actual number of cores. If you don't specify one, it's |
| 73 | +# automatically detected by the the script. |
| 74 | +# NUM_THREADS = 24 |
| 75 | + |
| 76 | +# If you have enabled USE_OPENMP and your application would call |
| 77 | +# OpenBLAS's calculation API from multi threads, please comment it in. |
| 78 | +# This flag defines how many instances of OpenBLAS's calculation API can |
| 79 | +# actually run in parallel. If more threads call OpenBLAS's calculation API, |
| 80 | +# they need to wait for the preceding API calls to finish or risk data corruption. |
| 81 | +# NUM_PARALLEL = 2 |
| 82 | + |
| 83 | +# if you don't need to install the static library, please comment it in. |
| 84 | +# NO_STATIC = 1 |
| 85 | + |
| 86 | +# if you don't need generate the shared library, please comment it in. |
| 87 | +# NO_SHARED = 1 |
| 88 | + |
| 89 | +# If you don't need CBLAS interface, please comment it in. |
| 90 | +# NO_CBLAS = 1 |
| 91 | + |
| 92 | +# If you only want CBLAS interface without installing Fortran compiler, |
| 93 | +# please comment it in. |
| 94 | +# ONLY_CBLAS = 1 |
| 95 | + |
| 96 | +# If you don't need LAPACK, please comment it in. |
| 97 | +# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1. |
| 98 | +# NO_LAPACK = 1 |
| 99 | + |
| 100 | +# If you don't need LAPACKE (C Interface to LAPACK), please comment it in. |
| 101 | +# NO_LAPACKE = 1 |
| 102 | + |
| 103 | +# Build LAPACK Deprecated functions since LAPACK 3.6.0 |
| 104 | +BUILD_LAPACK_DEPRECATED = 1 |
| 105 | + |
| 106 | +# Build RecursiveLAPACK on top of LAPACK |
| 107 | +# BUILD_RELAPACK = 1 |
| 108 | + |
| 109 | +# If you want to use legacy threaded Level 3 implementation. |
| 110 | +# USE_SIMPLE_THREADED_LEVEL3 = 1 |
| 111 | + |
| 112 | +# If you want to use the new, still somewhat experimental code that uses |
| 113 | +# thread-local storage instead of a central memory buffer in memory.c |
| 114 | +# Note that if your system uses GLIBC, it needs to have at least glibc 2.21 |
| 115 | +# for this to work. |
| 116 | +# USE_TLS = 1 |
| 117 | + |
| 118 | +# If you want to drive whole 64bit region by BLAS. Not all Fortran |
| 119 | +# compiler supports this. It's safe to keep comment it out if you |
| 120 | +# are not sure(equivalent to "-i8" option). |
| 121 | +# INTERFACE64 = 1 |
| 122 | + |
| 123 | +# Unfortunately most of kernel won't give us high quality buffer. |
| 124 | +# BLAS tries to find the best region before entering main function, |
| 125 | +# but it will consume time. If you don't like it, you can disable one. |
| 126 | +NO_WARMUP = 1 |
| 127 | + |
| 128 | +# If you want to disable CPU/Memory affinity on Linux. |
| 129 | +NO_AFFINITY = 1 |
| 130 | + |
| 131 | +# if you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus |
| 132 | +# BIGNUMA = 1 |
| 133 | + |
| 134 | +# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers |
| 135 | +# and OS. However, the performance is low. |
| 136 | +# NO_AVX = 1 |
| 137 | + |
| 138 | +# Don't use Haswell optimizations if binutils is too old (e.g. RHEL6) |
| 139 | +# NO_AVX2 = 1 |
| 140 | + |
| 141 | +# Don't use parallel make. |
| 142 | +# NO_PARALLEL_MAKE = 1 |
| 143 | + |
| 144 | +# Force number of make jobs. The default is the number of logical CPU of the host. |
| 145 | +# This is particularly useful when using distcc. |
| 146 | +# A negative value will disable adding a -j flag to make, allowing to use a parent |
| 147 | +# make -j value. This is useful to call OpenBLAS make from an other project |
| 148 | +# makefile |
| 149 | +# MAKE_NB_JOBS = 2 |
| 150 | + |
| 151 | +# If you would like to know minute performance report of GotoBLAS. |
| 152 | +# FUNCTION_PROFILE = 1 |
| 153 | + |
| 154 | +# Support for IEEE quad precision(it's *real* REAL*16)( under testing) |
| 155 | +# This option should not be used - it is a holdover from unfinished code present |
| 156 | +# in the original GotoBLAS2 library that may be usable as a starting point but |
| 157 | +# is not even expected to compile in its present form. |
| 158 | +# QUAD_PRECISION = 1 |
| 159 | + |
| 160 | +# Theads are still working for a while after finishing BLAS operation |
| 161 | +# to reduce thread activate/deactivate overhead. You can determine |
| 162 | +# time out to improve performance. This number should be from 4 to 30 |
| 163 | +# which corresponds to (1 << n) cycles. For example, if you set to 26, |
| 164 | +# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz |
| 165 | +# system). Also you can control this mumber by THREAD_TIMEOUT |
| 166 | +# CCOMMON_OPT += -DTHREAD_TIMEOUT=26 |
| 167 | + |
| 168 | +# Using special device driver for mapping physically contigous memory |
| 169 | +# to the user space. If bigphysarea is enabled, it will use it. |
| 170 | +# DEVICEDRIVER_ALLOCATION = 1 |
| 171 | + |
| 172 | +# If you need to synchronize FP CSR between threads (for x86/x86_64 only). |
| 173 | +# CONSISTENT_FPCSR = 1 |
| 174 | + |
| 175 | +# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute |
| 176 | +# with single thread. (Actually in recent versions this is a factor proportional to the |
| 177 | +# number of floating point operations necessary for the given problem size, no longer |
| 178 | +# an individual dimension). You can use this setting to avoid the overhead of multi- |
| 179 | +# threading in small matrix sizes. The default value is 4, but values as high as 50 have |
| 180 | +# been reported to be optimal for certain workloads (50 is the recommended value for Julia). |
| 181 | +# GEMM_MULTITHREAD_THRESHOLD = 4 |
| 182 | + |
| 183 | +# If you need santy check by comparing reference BLAS. It'll be very |
| 184 | +# slow (Not implemented yet). |
| 185 | +# SANITY_CHECK = 1 |
| 186 | + |
| 187 | +# The installation directory. |
| 188 | +# PREFIX = /opt/OpenBLAS |
| 189 | + |
| 190 | +# Common Optimization Flag; |
| 191 | +# The default -O2 is enough. |
| 192 | +# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT |
| 193 | +# COMMON_OPT = -O2 |
| 194 | + |
| 195 | +# gfortran option for LAPACK to improve thread-safety |
| 196 | +# It is enabled by default in Makefile.system for gfortran |
| 197 | +# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT |
| 198 | +# FCOMMON_OPT = -frecursive |
| 199 | + |
| 200 | +# Profiling flags |
| 201 | +COMMON_PROF = -pg |
| 202 | + |
| 203 | +# Build Debug version |
| 204 | +# DEBUG = 1 |
| 205 | + |
| 206 | +# Set maximum stack allocation. |
| 207 | +# The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV |
| 208 | +# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482 |
| 209 | +# |
| 210 | +# MAX_STACK_ALLOC = 0 |
| 211 | + |
| 212 | +# Add a prefix or suffix to all exported symbol names in the shared library. |
| 213 | +# Avoid conflicts with other BLAS libraries, especially when using |
| 214 | +# 64 bit integer interfaces in OpenBLAS. |
| 215 | +# For details, https://github.com/xianyi/OpenBLAS/pull/459 |
| 216 | +# |
| 217 | +# The same prefix and suffix are also added to the library name, |
| 218 | +# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas |
| 219 | +# |
| 220 | +# SYMBOLPREFIX= |
| 221 | +# SYMBOLSUFFIX= |
| 222 | + |
| 223 | +# |
| 224 | +# End of user configuration |
| 225 | +# |
0 commit comments