Increase Native build warning level to W3 and fix build warnings (#3370)

glebuk · web-flow · commit 1e166fba21b9 · 2019-04-19T12:26:08.000-07:00
* Reverted to Warning level 3 for unmanaged builds
* fixed three W3 security-critical warnings that were deemed security critical, but false positives
* Added safety checks for array initializers in 32-bit platform for LDA Native.  Prior code did not check allocation arrays correctly on 32-bit platforms.
* Fixed signed/unsigned warnings
* fixed warnings on Alias_multinomial_rng_int.gpp
* Removed error handling as the exception is not x-platform.
* update all space and formatting to &lt;spaces&gt;
* update SSE.cpp comments to reflect correct limits of a variable
-- checked for one more overflow condition
-- ensure all exceptions are thrown
* Added one more allocation check
- standardized to numeric_limits of int32_t
* fixed a clang-only warning
* fix size comments
diff --git a/src/Native/CMakeLists.txt b/src/Native/CMakeLists.txt
@@ -32,7 +32,6 @@ if(WIN32)
     add_compile_options(/FC) # use full pathnames in diagnostics
     add_compile_options(/DEBUG)
     add_compile_options(/GS)
-    add_compile_options(/W1)
     add_compile_options(/Zc:inline)
     add_compile_options(/fp:precise)
     add_compile_options(/EHsc)
diff --git a/src/Native/CpuMathNative/Sse.cpp b/src/Native/CpuMathNative/Sse.cpp
@@ -158,7 +158,7 @@ EXPORT_API(void) MatMulTran(_In_ const float * pmat, _In_ const float * psrc, _I
     }
 
     pm += 3 * crow;
-    
+
     for (; ps < psLim; ps += 4)
     {
         __m128 x01 = _mm_load_ps(ps);
@@ -219,9 +219,9 @@ EXPORT_API(void) Scale(float a, _Inout_ float * pd, int c)
     {
         switch (c)
         {
-            case 3: pd[2] *= a;
-            case 2: pd[1] *= a;
-            case 1: pd[0] *= a;
+        case 3: pd[2] *= a;
+        case 2: pd[1] *= a;
+        case 1: pd[0] *= a;
         }
         return;
     }
@@ -266,7 +266,8 @@ EXPORT_API(void) Scale(float a, _Inout_ float * pd, int c)
             _mm_storeu_ps(pd, result);
 
             pd += misalignment;
-            c -= misalignment;
+            // safe to downcast as misalignment <= 128.
+            c -= static_cast<int>(misalignment);
         }
 
         if (c > 3)
@@ -489,9 +490,9 @@ EXPORT_API(float) Sum(const float* pValues, int length)
 
         switch (length)
         {
-            case 3: result += pValues[2];
-            case 2: result += pValues[1];
-            case 1: result += pValues[0];
+        case 3: result += pValues[2];
+        case 2: result += pValues[1];
+        case 1: result += pValues[0];
         }
 
         return result;
@@ -532,7 +533,8 @@ EXPORT_API(float) Sum(const float* pValues, int length)
             result = _mm_add_ps(result, temp);
 
             pValues += misalignment;
-            length -= misalignment;
+            // safe to downcast as misalignment < 16.
+            length -= static_cast<int>(misalignment);
         }
 
         if (length > 3)
@@ -882,4 +884,4 @@ EXPORT_API(void) SdcaL1UpdateSU(float primalUpdate, _In_ const float * ps, _In_
         float d1 = pd1[i];
         pd2[i] = std::abs(d1) > threshold ? (d1 > 0 ? d1 - threshold : d1 + threshold) : 0;
     }
-}
+}
diff --git a/src/Native/LdaNative/alias_multinomial_rng_int.cpp b/src/Native/LdaNative/alias_multinomial_rng_int.cpp
@@ -23,13 +23,13 @@ namespace wood
             delete[]internal_memory_;
         }
     }
-    
+
     int32_t AliasMultinomialRNGInt::Next(xorshift_rng& rng, std::vector<alias_k_v>& alias_kv)
     {
         // NOTE: stl uniform_real_distribution generates the highest quality random numbers
         // yet, the other two are much faster
         auto sample = rng.rand();
-        
+
         // NOTE: use std::floor is too slow
         // here we guarantee sample * n_ is nonnegative, this makes cast work
         int idx = sample / a_int_;
diff --git a/src/Native/LdaNative/alias_multinomial_rng_int.hpp b/src/Native/LdaNative/alias_multinomial_rng_int.hpp
@@ -14,7 +14,7 @@
 #include <iostream>
 #include <assert.h>
 /*
-Algorithm described in 
+Algorithm described in
 https://www.jstatsoft.org/v11/i03/paper
 George Marsaglia
 Fast generation of discrete random variables
@@ -108,7 +108,12 @@ namespace wood
             int32_t H_head = 0;
             int32_t H_tail = 0;
 
-            for (auto i = 0; i < proportion_int_.size(); ++i)
+
+            // note that i must fit into int32_t of L_[L_tail].first
+            if (static_cast<uint32_t>(std::numeric_limits<int32_t>::max()) < proportion_int_.size() )
+                throw std::bad_alloc();
+            int32_t size = static_cast<int32_t>(proportion_int_.size());
+            for (int32_t i = 0; i < size; ++i)
             {
                 auto val = proportion_int_[i];
                 if (val < a_int_)
@@ -154,7 +159,7 @@ namespace wood
                 auto first = L_[L_head].first;
                 auto second = L_[L_head].second;
                 alias_kv[first].k_ = first;
-                alias_kv[first].v_ = first  * a_int_ + second;
+                alias_kv[first].v_ = first * a_int_ + second;
                 ++L_head;
             }
             while (H_head != H_tail)
@@ -227,7 +232,7 @@ namespace wood
                 *p = i;  p++;
                 *p = (i + 1) * a_int_;
             }
-            
+
             int32_t L_head = 0;
             int32_t L_tail = 0;
 
@@ -295,8 +300,8 @@ namespace wood
                 *p = first; p++;
                 *p = first * a_int_ + second;
                 ++H_head;
-            }    
-            memcpy(memory, internal_memory_, sizeof(int32_t)* 2 * n_);
+            }
+            memcpy(memory, internal_memory_, sizeof(int32_t) * 2 * n_);
         }
 
         inline void SetProportionMass(std::vector<float> &proportion,
diff --git a/src/Native/LdaNative/data_block.cpp b/src/Native/LdaNative/data_block.cpp
@@ -3,17 +3,21 @@
 // See the LICENSE file in the project root for more information.
 
 #include <iostream>
+#include <stdexcept>
+#include <limits>
 #include "data_block.h"
 #include "lda_document.h"
 
 namespace lda
 {
-    LDADataBlock::LDADataBlock(int32_t num_threads) : 
+    using namespace std;
+
+    LDADataBlock::LDADataBlock(int32_t num_threads) :
         num_threads_(num_threads), has_read_(false), index_document_(0), documents_buffer_(nullptr), offset_buffer_(nullptr)
     {
     }
 
-    LDADataBlock::~LDADataBlock() 
+    LDADataBlock::~LDADataBlock()
     {
         if (has_read_)
         {
@@ -46,7 +50,13 @@ namespace lda
     void LDADataBlock::Allocate(const int32_t num_document, const int64_t corpus_size)
     {
         num_documents_ = num_document;
-        corpus_size_ = corpus_size;
+
+        if (corpus_size < 0 || static_cast<uint64_t>(corpus_size) > numeric_limits<size_t>::max())
+            throw bad_alloc();
+        corpus_size_ = static_cast<size_t>(corpus_size);
+
+        if (num_documents_ < 0 || static_cast<uint64_t>(num_documents_) >(numeric_limits<size_t>::max() - 1))
+            throw bad_alloc();
 
         offset_buffer_ = new int64_t[num_documents_ + 1]; // +1: one for the end of last document,
         documents_buffer_ = new int32_t[corpus_size_];
@@ -86,7 +96,7 @@ namespace lda
     int LDADataBlock::AddDense(int32_t* term_freq, int32_t term_num)
     {
         int64_t data_length = 1;
-        
+
         int64_t idx = offset_buffer_[index_document_] + 1;
         for (int i = 0; i < term_num; ++i)
         {
diff --git a/src/Native/LdaNative/data_block.h b/src/Native/LdaNative/data_block.h
@@ -17,18 +17,18 @@ namespace lda
     public:
         explicit LDADataBlock(int32_t num_threads);
         ~LDADataBlock();
-        
+
         void Clear();
         //in data feedin scenario
-        void Allocate(const int32_t num_document, const int64_t corpus_size);        
+        void Allocate(const int32_t num_document, const int64_t corpus_size);
         //port the data from external process, for example, c#
         int AddDense(int32_t* term_freq, int32_t term_num);
         int Add(int32_t* term_id, int32_t* term_freq, int32_t term_num);
         std::shared_ptr<LDADocument> GetOneDoc(int32_t index) const;
 
         inline int32_t num_documents() const;
         // Return the first document for thread thread_id
-        inline int32_t Begin(int32_t thread_id) const;        
+        inline int32_t Begin(int32_t thread_id) const;
         // Return the next to last document for thread thread_i
         inline int32_t End(int32_t thread_id) const;
 
@@ -43,8 +43,8 @@ namespace lda
         int32_t index_document_;
         int64_t used_size_;
 
-        int32_t num_documents_; 
-        int64_t corpus_size_;
+        int32_t num_documents_;
+        size_t corpus_size_;
 
         int64_t* offset_buffer_;    // offset_buffer_ size = num_document_ + 1
         int32_t* documents_buffer_; // documents_buffer_ size = corpus_size_;
diff --git a/src/Native/LdaNative/lda_engine.cpp b/src/Native/LdaNative/lda_engine.cpp
@@ -377,7 +377,7 @@ namespace lda {
         std::vector<std::pair<int, double>> llcontainer;
         // Set core affinity which helps performance improvement
 #ifdef _MSC_VER
-        long long maskLL = 0;
+        DWORD maskLL = 0;
         maskLL |= (1LL << (thread_id));
         DWORD_PTR mask = maskLL;
         SetThreadAffinityMask(GetCurrentThread(), mask);
@@ -542,7 +542,7 @@ namespace lda {
         if (thread_id == 0)
         {
             //output the ll once
-            for (int i = 0; i < llcontainer.size(); i++)
+            for (size_t i = 0; i < llcontainer.size(); i++)
             {
                 printf("loglikelihood @iter%04d = %f\n", llcontainer[i].first, llcontainer[i].second);
             }
@@ -560,7 +560,7 @@ namespace lda {
 
         // Set core affinity which helps performance improvement
 #ifdef _MSC_VER
-        long long maskLL = 0;
+        DWORD maskLL = 0;
         maskLL |= (1LL << (thread_id));
         DWORD_PTR mask = maskLL;
         SetThreadAffinityMask(GetCurrentThread(), mask);
diff --git a/src/Native/LdaNative/model_block.cpp b/src/Native/LdaNative/model_block.cpp
diff --git a/src/Native/LdaNative/model_block.h b/src/Native/LdaNative/model_block.h
diff --git a/src/Native/MatrixFactorizationNative/UnmanagedMemory.cpp b/src/Native/MatrixFactorizationNative/UnmanagedMemory.cpp

Original file line number	Diff line number	Diff line change
`@@ -158,7 +158,7 @@ EXPORT_API(void) MatMulTran(_In_ const float * pmat, _In_ const float * psrc, _I`
`158`	`158`	`}`
`159`	`159`
`160`	`160`	`pm += 3 * crow;`
`161`		`-`
	`161`	`+`
`162`	`162`	`for (; ps < psLim; ps += 4)`
`163`	`163`	`{`
`164`	`164`	`__m128 x01 = _mm_load_ps(ps);`
`@@ -219,9 +219,9 @@ EXPORT_API(void) Scale(float a, _Inout_ float * pd, int c)`
`219`	`219`	`{`
`220`	`220`	`switch (c)`
`221`	`221`	`{`
`222`		`- case 3: pd[2] *= a;`
`223`		`- case 2: pd[1] *= a;`
`224`		`- case 1: pd[0] *= a;`
	`222`	`+ case 3: pd[2] *= a;`
	`223`	`+ case 2: pd[1] *= a;`
	`224`	`+ case 1: pd[0] *= a;`
`225`	`225`	`}`
`226`	`226`	`return;`
`227`	`227`	`}`
`@@ -266,7 +266,8 @@ EXPORT_API(void) Scale(float a, _Inout_ float * pd, int c)`
`266`	`266`	`_mm_storeu_ps(pd, result);`
`267`	`267`
`268`	`268`	`pd += misalignment;`
`269`		`- c -= misalignment;`
	`269`	`+ // safe to downcast as misalignment <= 128.`
	`270`	`+ c -= static_cast<int>(misalignment);`
`270`	`271`	`}`
`271`	`272`
`272`	`273`	`if (c > 3)`
`@@ -489,9 +490,9 @@ EXPORT_API(float) Sum(const float* pValues, int length)`
`489`	`490`
`490`	`491`	`switch (length)`
`491`	`492`	`{`
`492`		`- case 3: result += pValues[2];`
`493`		`- case 2: result += pValues[1];`
`494`		`- case 1: result += pValues[0];`
	`493`	`+ case 3: result += pValues[2];`
	`494`	`+ case 2: result += pValues[1];`
	`495`	`+ case 1: result += pValues[0];`
`495`	`496`	`}`
`496`	`497`
`497`	`498`	`return result;`
`@@ -532,7 +533,8 @@ EXPORT_API(float) Sum(const float* pValues, int length)`
`532`	`533`	`result = _mm_add_ps(result, temp);`
`533`	`534`
`534`	`535`	`pValues += misalignment;`
`535`		`- length -= misalignment;`
	`536`	`+ // safe to downcast as misalignment < 16.`
	`537`	`+ length -= static_cast<int>(misalignment);`
`536`	`538`	`}`
`537`	`539`
`538`	`540`	`if (length > 3)`
`@@ -882,4 +884,4 @@ EXPORT_API(void) SdcaL1UpdateSU(float primalUpdate, _In_ const float * ps, _In_`
`882`	`884`	`float d1 = pd1[i];`
`883`	`885`	`pd2[i] = std::abs(d1) > threshold ? (d1 > 0 ? d1 - threshold : d1 + threshold) : 0;`
`884`	`886`	`}`
`885`		`-}`
	`887`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -23,13 +23,13 @@ namespace wood`
`23`	`23`	`delete[]internal_memory_;`
`24`	`24`	`}`
`25`	`25`	`}`
`26`		`-`
	`26`	`+`
`27`	`27`	`int32_t AliasMultinomialRNGInt::Next(xorshift_rng& rng, std::vector<alias_k_v>& alias_kv)`
`28`	`28`	`{`
`29`	`29`	`// NOTE: stl uniform_real_distribution generates the highest quality random numbers`
`30`	`30`	`// yet, the other two are much faster`
`31`	`31`	`auto sample = rng.rand();`
`32`		`-`
	`32`	`+`
`33`	`33`	`// NOTE: use std::floor is too slow`
`34`	`34`	`// here we guarantee sample * n_ is nonnegative, this makes cast work`
`35`	`35`	`int idx = sample / a_int_;`
Original file line number	Diff line number	Diff line change
`@@ -3,17 +3,21 @@`
`3`	`3`	`// See the LICENSE file in the project root for more information.`
`4`	`4`
`5`	`5`	`#include <iostream>`
	`6`	`+#include <stdexcept>`
	`7`	`+#include <limits>`
`6`	`8`	`#include "data_block.h"`
`7`	`9`	`#include "lda_document.h"`
`8`	`10`
`9`	`11`	`namespace lda`
`10`	`12`	`{`
`11`		`- LDADataBlock::LDADataBlock(int32_t num_threads) :`
	`13`	`+ using namespace std;`
	`14`	`+`
	`15`	`+ LDADataBlock::LDADataBlock(int32_t num_threads) :`
`12`	`16`	`num_threads_(num_threads), has_read_(false), index_document_(0), documents_buffer_(nullptr), offset_buffer_(nullptr)`
`13`	`17`	`{`
`14`	`18`	`}`
`15`	`19`
`16`		`- LDADataBlock::~LDADataBlock()`
	`20`	`+ LDADataBlock::~LDADataBlock()`
`17`	`21`	`{`
`18`	`22`	`if (has_read_)`
`19`	`23`	`{`
`@@ -46,7 +50,13 @@ namespace lda`
`46`	`50`	`void LDADataBlock::Allocate(const int32_t num_document, const int64_t corpus_size)`
`47`	`51`	`{`
`48`	`52`	`num_documents_ = num_document;`
`49`		`- corpus_size_ = corpus_size;`
	`53`	`+`
	`54`	`+ if (corpus_size < 0 \|\| static_cast<uint64_t>(corpus_size) > numeric_limits<size_t>::max())`
	`55`	`+ throw bad_alloc();`
	`56`	`+ corpus_size_ = static_cast<size_t>(corpus_size);`
	`57`	`+`
	`58`	`+ if (num_documents_ < 0 \|\| static_cast<uint64_t>(num_documents_) >(numeric_limits<size_t>::max() - 1))`
	`59`	`+ throw bad_alloc();`
`50`	`60`
`51`	`61`	`offset_buffer_ = new int64_t[num_documents_ + 1]; // +1: one for the end of last document,`
`52`	`62`	`documents_buffer_ = new int32_t[corpus_size_];`
`@@ -86,7 +96,7 @@ namespace lda`
`86`	`96`	`int LDADataBlock::AddDense(int32_t* term_freq, int32_t term_num)`
`87`	`97`	`{`
`88`	`98`	`int64_t data_length = 1;`
`89`		`-`
	`99`	`+`
`90`	`100`	`int64_t idx = offset_buffer_[index_document_] + 1;`
`91`	`101`	`for (int i = 0; i < term_num; ++i)`
`92`	`102`	`{`