[Code] Proxy matrix tmp matrix-leak fixes:

EgorOrachyov · EgorOrachyov · commit 85401cdb9ef4 · 2021-03-18T20:40:58.000+03:00
- Fix tmp matrix lost release call in commitCache() method of core matrix class
- Minor fixes in cmake version
- Update py package
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,7 +1,7 @@
 # CuBool library Cmake config file
 # Add this file as sub-directory to your project to use library functionality
 
-cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
 project(cubool LANGUAGES CXX)
 
 # Exposed to the user build options
diff --git a/README.md b/README.md
@@ -78,7 +78,7 @@ These steps are required if you want to build library for your specific platform
 ### Requirements
 
 - Linux Ubuntu (tested on 20.04)
-- CMake Version 3.17 or higher
+- CMake Version 3.15 or higher
 - CUDA Compatible GPU device
 - GCC Compiler 
 - NVIDIA CUDA toolkit
diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt
@@ -146,15 +146,15 @@ if (CUBOOL_WITH_CUDA)
     set_target_properties(cubool PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 
     # Settings: https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
-    target_compile_options(cubool PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-        # todo: fix this flag later -arch=sm_30 ?
-        # todo: can we omit arch flag?
-        -gencode=arch=compute_30,code=sm_30
-        -gencode=arch=compute_50,code=sm_50
-        -gencode=arch=compute_52,code=sm_52
-        -gencode=arch=compute_60,code=sm_60
-        -gencode=arch=compute_61,code=sm_61
-        -gencode=arch=compute_61,code=compute_61>)
+    #target_compile_options(cubool PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
+    #    # todo: fix this flag later -arch=sm_30 ?
+    #    # todo: can we omit arch flag?
+    #    -gencode=arch=compute_30,code=sm_30
+    #    -gencode=arch=compute_50,code=sm_50
+    #    -gencode=arch=compute_52,code=sm_52
+    #    -gencode=arch=compute_60,code=sm_60
+    #    -gencode=arch=compute_61,code=sm_61
+    #    -gencode=arch=compute_61,code=compute_61>)
 
     target_compile_options(cubool PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: -use_fast_math -Xptxas -O2>)
 
diff --git a/cubool/sources/core/matrix.cpp b/cubool/sources/core/matrix.cpp
@@ -364,17 +364,17 @@ namespace cubool {
         bool isSorted = false;
         bool noDuplicates = false;
 
-        // We will have to join old and new values
         if (mHnd->getNvals() > 0) {
-            // Build tmp matrix with new values
+            // We will have to join old and new values
+            // Create tmp matrix and merge values
+
             MatrixBase* tmp = mProvider->createMatrix(getNrows(), getNcols());
             tmp->build(mCachedI.data(), mCachedJ.data(), cachedNvals, isSorted, noDuplicates);
-
-            // Add new values to current matrix content
             mHnd->eWiseAdd(*mHnd, *tmp, false);
+            mProvider->releaseMatrix(tmp);
         }
-        // Otherwise, new values are used to build matrix content
         else {
+            // Otherwise, new values are used to build matrix content
             mHnd->build(mCachedI.data(), mCachedJ.data(), cachedNvals, isSorted, noDuplicates);
         }
 
diff --git a/cubool/sources/cuda/cuda_backend.cu b/cubool/sources/cuda/cuda_backend.cu
@@ -24,6 +24,8 @@
 
 #include <cuda/cuda_backend.hpp>
 #include <cuda/matrix_csr.hpp>
+#include <core/library.hpp>
+#include <io/logger.hpp>
 
 namespace cubool {
 
@@ -36,6 +38,14 @@ namespace cubool {
     }
 
     void CudaBackend::finalize() {
+        assert(mMatCount == 0);
+
+        if (mMatCount > 0) {
+            LogStream stream(*Library::getLogger());
+            stream << Logger::Level::Error
+                   << "Lost some (" << mMatCount << ") matrix objects" << LogStream::cmt;
+        }
+
         if (mInstance) {
             delete mInstance;
             mInstance = nullptr;
@@ -47,10 +57,12 @@ namespace cubool {
     }
 
     MatrixBase *CudaBackend::createMatrix(size_t nrows, size_t ncols) {
+        mMatCount++;
         return new MatrixCsr(nrows, ncols, getInstance());
     }
 
     void CudaBackend::releaseMatrix(MatrixBase *matrixBase) {
+        mMatCount--;
         delete matrixBase;
     }
 
diff --git a/cubool/sources/cuda/cuda_backend.hpp b/cubool/sources/cuda/cuda_backend.hpp
@@ -49,6 +49,7 @@ namespace cubool {
 
     private:
         Instance* mInstance;
+        size_t mMatCount = 0;
     };
 
 }
diff --git a/cubool/sources/cuda/instance.cpp b/cubool/sources/cuda/instance.cpp
@@ -44,35 +44,13 @@ namespace cubool {
     void Instance::allocate(void* &ptr, size_t size) const {
         ptr = malloc(size);
         CHECK_RAISE_ERROR(ptr != nullptr, MemOpFailed, "Failed to allocate memory on the CPU");
+        mHostAllocCount++;
     }
 
     void Instance::deallocate(void* ptr) const {
         CHECK_RAISE_ERROR(ptr != nullptr, InvalidArgument, "Passed null ptr to free");
         free(ptr);
-    }
-
-    void Instance::printDeviceCapabilities() const {
-        static const size_t BUFFER_SIZE = 1024;
-
-        cuBool_DeviceCaps deviceCaps;
-        queryDeviceCapabilities(deviceCaps);
-
-        char deviceInfo[BUFFER_SIZE];
-        snprintf(deviceInfo, BUFFER_SIZE, "Device name: %s version: %i.%i",
-                 deviceCaps.name, deviceCaps.major, deviceCaps.major);
-
-        char memoryInfo[BUFFER_SIZE];
-        snprintf(memoryInfo, BUFFER_SIZE, "Global memory: %llu KiB",
-                 (unsigned long long) deviceCaps.globalMemoryKiBs);
-
-        char sharedMemoryInfo[BUFFER_SIZE];
-        snprintf(sharedMemoryInfo, BUFFER_SIZE, "Shared memory: multi-proc %llu KiB block %llu KiB",
-                 (unsigned long long) deviceCaps.sharedMemoryPerMultiProcKiBs, (unsigned long long) deviceCaps.sharedMemoryPerBlockKiBs);
-
-        char structInfo[BUFFER_SIZE];
-        snprintf(structInfo, BUFFER_SIZE, "Kernel: warp %llu", (unsigned long long) deviceCaps.warp);
-
-        // todo
+        mHostAllocCount--;
     }
 
     Instance& Instance::getInstanceRef() {
diff --git a/cubool/sources/cuda/instance.cu b/cubool/sources/cuda/instance.cu
@@ -31,6 +31,9 @@
 namespace cubool {
 
     Instance::~Instance() {
+        assert(mHostAllocCount == 0);
+        assert(mDeviceAllocCount == 0);
+
         gInstance = nullptr;
     }
 
@@ -53,13 +56,7 @@ namespace cubool {
             RAISE_ERROR(MemOpFailed, message);
         }
 
-#if 0
-        {
-            char buffer[2000];
-            snprintf(buffer, 2000, "============> allocate on gpu %llu", (long long unsigned) size);
-            this->sendMessage(CUBOOL_STATUS_SUCCESS, buffer);
-        }
-#endif
+        mDeviceAllocCount++;
     }
 
     void Instance::deallocateOnGpu(void* ptr) const {
@@ -69,6 +66,8 @@ namespace cubool {
             std::string message = std::string{"Failed to deallocate Gpu memory: "} + cudaGetErrorString(error);
             RAISE_ERROR(MemOpFailed, message);
         }
+
+        mDeviceAllocCount--;
     }
 
     void Instance::syncHostDevice() const {
diff --git a/cubool/sources/cuda/instance.hpp b/cubool/sources/cuda/instance.hpp
@@ -52,7 +52,6 @@ namespace cubool {
         void deallocateOnGpu(void* ptr) const;
 
         void syncHostDevice() const;
-        void printDeviceCapabilities() const;
 
         static bool isCudaDeviceSupported();
         static void queryDeviceCapabilities(cuBool_DeviceCaps& deviceCaps);
@@ -62,6 +61,8 @@ namespace cubool {
 
     private:
         MemType mMemoryType = Default;
+        mutable size_t mHostAllocCount = 0;
+        mutable size_t mDeviceAllocCount = 0;
 
         static volatile Instance* gInstance;
     };
diff --git a/cubool/sources/sequential/sq_backend.cpp b/cubool/sources/sequential/sq_backend.cpp
@@ -24,6 +24,10 @@
 
 #include <sequential/sq_backend.hpp>
 #include <sequential/sq_matrix.hpp>
+#include <core/library.hpp>
+#include <io/logger.hpp>
+#include <cassert>
+
 
 namespace cubool {
 
@@ -32,18 +36,26 @@ namespace cubool {
     }
 
     void SqBackend::finalize() {
-        // No special actions
+        assert(mMatCount == 0);
+
+        if (mMatCount > 0) {
+            LogStream stream(*Library::getLogger());
+            stream << Logger::Level::Error
+                   << "Lost some (" << mMatCount << ") matrix objects" << LogStream::cmt;
+        }
     }
 
     bool SqBackend::isInitialized() const {
         return true;
     }
 
     MatrixBase *SqBackend::createMatrix(size_t nrows, size_t ncols) {
+        mMatCount++;
         return new SqMatrix(nrows, ncols);
     }
 
     void SqBackend::releaseMatrix(MatrixBase *matrixBase) {
+        mMatCount--;
         delete matrixBase;
     }
 
diff --git a/cubool/sources/sequential/sq_backend.hpp b/cubool/sources/sequential/sq_backend.hpp
@@ -44,6 +44,9 @@ namespace cubool {
         void releaseMatrix(MatrixBase *matrixBase) override;
 
         void queryCapabilities(cuBool_DeviceCaps& caps) override;
+
+    private:
+        size_t mMatCount = 0;
     };
 
 }
diff --git a/deps/nsparse-um/CMakeLists.txt b/deps/nsparse-um/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.17)
+cmake_minimum_required(VERSION 3.15)
 project(nsparse_um LANGUAGES CXX CUDA)
 
 add_library(nsparse_um INTERFACE)

Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,7 @@ namespace cubool {`
`49`	`49`
`50`	`50`	`private:`
`51`	`51`	`Instance* mInstance;`
	`52`	`+ size_t mMatCount = 0;`
`52`	`53`	`};`
`53`	`54`
`54`	`55`	`}`
Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,9 @@`
`31`	`31`	`namespace cubool {`
`32`	`32`
`33`	`33`	`Instance::~Instance() {`
	`34`	`+ assert(mHostAllocCount == 0);`
	`35`	`+ assert(mDeviceAllocCount == 0);`
	`36`	`+`
`34`	`37`	`gInstance = nullptr;`
`35`	`38`	`}`
`36`	`39`
`@@ -53,13 +56,7 @@ namespace cubool {`
`53`	`56`	`RAISE_ERROR(MemOpFailed, message);`
`54`	`57`	`}`
`55`	`58`
`56`		`-#if 0`
`57`		`- {`
`58`		`- char buffer[2000];`
`59`		`- snprintf(buffer, 2000, "============> allocate on gpu %llu", (long long unsigned) size);`
`60`		`- this->sendMessage(CUBOOL_STATUS_SUCCESS, buffer);`
`61`		`- }`
`62`		`-#endif`
	`59`	`+ mDeviceAllocCount++;`
`63`	`60`	`}`
`64`	`61`
`65`	`62`	`void Instance::deallocateOnGpu(void* ptr) const {`
`@@ -69,6 +66,8 @@ namespace cubool {`
`69`	`66`	`std::string message = std::string{"Failed to deallocate Gpu memory: "} + cudaGetErrorString(error);`
`70`	`67`	`RAISE_ERROR(MemOpFailed, message);`
`71`	`68`	`}`
	`69`	`+`
	`70`	`+ mDeviceAllocCount--;`
`72`	`71`	`}`
`73`	`72`
`74`	`73`	`void Instance::syncHostDevice() const {`
Original file line number	Diff line number	Diff line change
`@@ -44,6 +44,9 @@ namespace cubool {`
`44`	`44`	`void releaseMatrix(MatrixBase *matrixBase) override;`
`45`	`45`
`46`	`46`	`void queryCapabilities(cuBool_DeviceCaps& caps) override;`
	`47`	`+`
	`48`	`+ private:`
	`49`	`+ size_t mMatCount = 0;`
`47`	`50`	`};`
`48`	`51`
`49`	`52`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-cmake_minimum_required(VERSION 3.17)`
	`1`	`+cmake_minimum_required(VERSION 3.15)`
`2`	`2`	`project(nsparse_um LANGUAGES CXX CUDA)`
`3`	`3`
`4`	`4`	`add_library(nsparse_um INTERFACE)`