[tfjs-node] fixed summary writer memory leak (#7490)

pyu10055 · web-flow · commit 2a4c873ddbc5 · 2023-03-16T15:57:54.000-07:00
BUG
* fixed summary writer memory leak due to the step Int64Scalar internal tensor not deleted after use.

* also fixed a similar issue related to saved model execution
diff --git a/tfjs-node/src/nodejs_kernel_backend.ts b/tfjs-node/src/nodejs_kernel_backend.ts
@@ -459,32 +459,41 @@ export class NodeJSKernelBackend extends KernelBackend {
   private getMappedInputTensorIds(
       inputs: Tensor[], inputTensorInfos: ModelTensorInfo[]) {
     const tensorIds = this.getInputTensorIds(inputs);
+    const newTensors = [];
     for (let i = 0; i < inputs.length; i++) {
       if (inputTensorInfos[i] != null) {
         if (inputTensorInfos[i].tfDtype === 'DT_UINT8') {
           const data = Uint8Array.from(inputs[i].dataSync());
           const inputTensorId = this.binding.createTensor(
               inputs[i].shape, this.binding.TF_UINT8, data);
           tensorIds[i] = inputTensorId;
+          newTensors.push(i);
         } else if (inputTensorInfos[i].tfDtype === 'DT_INT64') {
           const data =
               encodeInt32ArrayAsInt64(inputs[i].dataSync() as Int32Array);
           const inputTensorId = this.binding.createTensor(
               inputs[i].shape, this.binding.TF_INT64, data);
           tensorIds[i] = inputTensorId;
+          newTensors.push(i);
         }
       }
     }
-    return tensorIds;
+    return {tensorIds, newTensors};
   }
 
   runSavedModel(
       id: number, inputs: Tensor[], inputTensorInfos: ModelTensorInfo[],
       outputOpNames: string[]): Tensor[] {
+    const {tensorIds, newTensors} =
+        this.getMappedInputTensorIds(inputs, inputTensorInfos);
     const outputMetadata = this.binding.runSavedModel(
-        id, this.getMappedInputTensorIds(inputs, inputTensorInfos),
-        inputTensorInfos.map(info => info.name).join(','),
+        id, tensorIds, inputTensorInfos.map(info => info.name).join(','),
         outputOpNames.join(','));
+    for (let i = 0; i < tensorIds.length; i++) {
+      if (newTensors.includes(i)) {
+        this.binding.deleteTensor(tensorIds[i]);
+      }
+    }
     return outputMetadata.map(m => this.createOutputTensor(m));
   }
 
@@ -542,9 +551,10 @@ export class NodeJSKernelBackend extends KernelBackend {
       }
       const opAttrs: TFEOpAttr[] =
           [{name: 'T', type: this.binding.TF_ATTR_TYPE, value: typeAttr}];
-
-      this.binding.executeOp(
-          'WriteScalarSummary', opAttrs, this.getInputTensorIds(inputArgs), 0);
+      const ids = this.getInputTensorIds(inputArgs);
+      this.binding.executeOp('WriteScalarSummary', opAttrs, ids, 0);
+      // release the tensorflow tensor for Int64Scalar value of step
+      this.binding.deleteTensor(ids[1]);
     });
   }
 
@@ -561,9 +571,10 @@ export class NodeJSKernelBackend extends KernelBackend {
       // and places the values in 30 buckets, while WriteSummary expects a
       // tensor which already describes the bucket widths and counts.
       //
-      // If we were to use WriteHistogramSummary, we wouldn't have to implement
-      // the "bucketization" of the input tensor, but we also wouldn't have
-      // control over the number of buckets, or the description of the graph.
+      // If we were to use WriteHistogramSummary, we wouldn't have to
+      // implement the "bucketization" of the input tensor, but we also
+      // wouldn't have control over the number of buckets, or the description
+      // of the graph.
       //
       // Therefore, we instead use WriteSummary, which makes it possible to
       // support these features. However, the trade-off is that we have to
@@ -594,8 +605,10 @@ export class NodeJSKernelBackend extends KernelBackend {
       const typeAttr = this.typeAttributeFromTensor(buckets);
       const opAttrs: TFEOpAttr[] =
           [{name: 'T', type: this.binding.TF_ATTR_TYPE, value: typeAttr}];
-      this.binding.executeOp(
-          'WriteSummary', opAttrs, this.getInputTensorIds(inputArgs), 0);
+      const ids = this.getInputTensorIds(inputArgs);
+      this.binding.executeOp('WriteSummary', opAttrs, ids, 0);
+      // release the tensorflow tensor for Int64Scalar value of step
+      this.binding.deleteTensor(ids[1]);
     });
   }
 
@@ -609,9 +622,10 @@ export class NodeJSKernelBackend extends KernelBackend {
    *
    * @param data A `Tensor` of any shape. Must be castable to `float32`
    * @param bucketCount Optional positive `number`
-   * @returns A `Tensor` of shape `[k, 3]` and type `float32`. The `i`th row is
-   *   a triple `[leftEdge, rightEdge, count]` for a single bucket. The value of
-   *   `k` is either `bucketCount`, `1` or `0`.
+   * @returns A `Tensor` of shape `[k, 3]` and type `float32`. The `i`th row
+   *     is
+   *   a triple `[leftEdge, rightEdge, count]` for a single bucket. The value
+   * of `k` is either `bucketCount`, `1` or `0`.
    */
   private buckets(data: Tensor, bucketCount?: number): Tensor<tf.Rank> {
     if (data.size === 0) {
diff --git a/tfjs-node/src/tensorboard_test.ts b/tfjs-node/src/tensorboard_test.ts
@@ -90,6 +90,16 @@ describe('tensorboard', () => {
       expect(fileNames.length).toEqual(1);
     });
 
+    it('Writing tf.Scalar no memory leak', () => {
+      const writer = tfn.node.summaryFileWriter(tmpLogDir);
+      const beforeNumTFTensors = writer.backend.getNumOfTFTensors();
+      const value = scalar(42);
+      writer.scalar('foo', value, 0);
+      writer.flush();
+      value.dispose();
+      expect(writer.backend.getNumOfTFTensors()).toBe(beforeNumTFTensors);
+    });
+
     it('No crosstalk between two summary writers', () => {
       const logDir1 = path.join(tmpLogDir, '1');
       const writer1 = tfn.node.summaryFileWriter(logDir1);
@@ -180,6 +190,15 @@ describe('tensorboard', () => {
       expect(fileSize2 - fileSize1).toEqual(2 * incrementPerScalar);
     });
 
+    it('summaryFileWriter no memory leak', () => {
+      const writer = tfn.node.summaryFileWriter(tmpLogDir);
+      const beforeNumTFTensors = writer.backend.getNumOfTFTensors();
+      const value = tensor1d([1, 2, 3, 4, 5], 'int32');
+      writer.histogram('foo', value, 0, 5);
+      writer.flush();
+      value.dispose();
+      expect(writer.backend.getNumOfTFTensors()).toBe(beforeNumTFTensors);
+    });
     it('Can create multiple normal distribution', () => {
       const writer = tfn.node.summaryFileWriter(tmpLogDir);
       tf.tidy(() => {