how to retrain the image classify model incrementally? #5355

thicktao · 2020-08-20T04:44:04Z

System information

OS version/distro:Windows 10 Enterprise
.NET Version (eg., dotnet --info): .netcore 3.1 console

Issue

What did you do?
I want to retrain the image classify model incrementally
What happened?
when i predict it after my second training, it report an error,as below

What did you expect?
after my retraining model ,it can works well

Source code / logs

using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
namespace Test
{
  public class ContinueGraphTrainingTest
    {
        private static readonly string PrePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "train");
        private static readonly string TrainModelPath2 = Path.Combine(PrePath, "data2.zip");
        private static readonly string PreDataPath = Path.Combine(PrePath, "preData.zip");
        private static readonly string DataModelPath = Path.Combine(PrePath, "data.zip");
        private static readonly string InceptionPb = Path.Combine(PrePath, "tensorflow_inception_graph.pb");
        private static readonly string FirstScanDir = Path.Combine(PrePath, "TrainImage1");
        private static readonly string SecondScanDir = Path.Combine(PrePath, "TrainImage2");
        private static readonly string PredictImgs = Path.Combine(PrePath, "PredictImgs/111.png");
        private static readonly MLContext MlContext = new MLContext(1);

        public static void SaveRetrainModel()
        {

            List<ImageData> list1 = new List<ImageData>();
            ScanPic(list1, FirstScanDir);
            var fulldata1 = MlContext.Data.LoadFromEnumerable(list1);
            var trainTestData1 = MlContext.Data.TrainTestSplit(fulldata1);
            var trainingDataView1 = trainTestData1.TrainSet;

            var pipeline = MlContext.Transforms.Conversion.MapValueToKey("Label")
                .Append(MlContext.Transforms.ResizeImages(outputColumnName: "input", imageWidth: ImageSettings.ImageWidth, imageHeight: ImageSettings.ImageHeight, inputColumnName: "Image"))
                .Append(MlContext.Transforms.ExtractPixels(outputColumnName: "input", interleavePixelColors: ImageSettings.ChannelsLast, offsetImage: ImageSettings.Mean))
                .Append(MlContext.Model.LoadTensorFlowModel(InceptionPb).ScoreTensorFlowModel(outputColumnNames: new[] { "softmax2_pre_activation" }, inputColumnNames: new[] { "input" }, addBatchDimensionInput: true))
                .AppendCacheCheckpoint(MlContext);

            var trainingPipeline = pipeline.Append(MlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(labelColumnName: "Label", featureColumnName: "softmax2_pre_activation"));

            var dataPiple = trainingPipeline.Append(MlContext.Transforms.Conversion.MapKeyToValue("PredictedLabelValue", "PredictedLabel"));

            var preDataTransform = trainingPipeline.Fit(trainingDataView1);
            MlContext.Model.Save(preDataTransform, trainingDataView1.Schema, PreDataPath);

            ITransformer dataTransform = dataPiple.Fit(trainingDataView1);
            MlContext.Model.Save(dataTransform, trainingDataView1.Schema, DataModelPath);

            PredictScore();

        }

        public static void SecondTrainAndPredit()
        {
            var list2 = new List<ImageData>();
            ScanPic(list2, SecondScanDir);
            var fulldata2 = MlContext.Data.LoadFromEnumerable(list2);
            var trainTestData2 = MlContext.Data.TrainTestSplit(fulldata2);
            var trainingDataView2 = trainTestData2.TrainSet;


            var preDataModel = MlContext.Model.Load(PreDataPath, out DataViewSchema modelInputSchema2);
            var originalModelParameters = (preDataModel as TransformerChain<ITransformer>)?.LastTransformer as MulticlassPredictionTransformer<MaximumEntropyModelParameters>;

            ITransformer dataPrepPipeline = MlContext.Model.Load(DataModelPath, out var dataPrepPipelineSchema);
            IDataView newDataForm = dataPrepPipeline.Transform(trainingDataView2);
            var _keyToValueModel = MlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(labelColumnName: "Label", featureColumnName: "softmax2_pre_activation").Fit(newDataForm, originalModelParameters.Model);

            MlContext.Model.Save(_keyToValueModel, trainingDataView2.Schema, TrainModelPath2);

            PredictScore(TrainModelPath2);
        }

        public static void PredictScore(string dataModelPath = "")
        {
            if (string.IsNullOrEmpty(dataModelPath))
            {
                dataModelPath = DataModelPath;
            }
            var loadedModel = MlContext.Model.Load(dataModelPath, out var modelInputSchema);
            var predictor = MlContext.Model.CreatePredictionEngine<ImageData, ImagePrediction>(loadedModel);
            var imageData = new ImageData() { Image = (Bitmap)Image.FromFile(PredictImgs) };
            var result = predictor.Predict(imageData);
            Console.WriteLine(result.Score.Max());
        }

        private static void ScanPic(List<ImageData> list, string directory)
        {
            var files = Directory.GetFiles(directory, "*.*", SearchOption.AllDirectories);
            StringBuilder imgTags = new StringBuilder();
            foreach (var filePath in files)
            {
                if (!filePath.EndsWith(".jpg") && !filePath.EndsWith(".png"))
                {
                    continue;
                }
                var deviceModel = Directory.GetParent(filePath).Name;
                string imgPath = $"{deviceModel}/{Path.GetFileName(filePath)}";
                imgTags.AppendLine($"{imgPath}\t{deviceModel}");
                list.Add(new ImageData()
                {



                    Label = deviceModel,
                    Image = (Bitmap)Image.FromFile(filePath)
                });
            }
        }
    }

public class ImagePrediction
    {
        public float[] Score;

        public string PredictedLabelValue;
    }

 public class ImageData
    {
        //[LoadColumn(0)]
        //public string ImagePath;
        [ImageType(227, 227)]
        [LoadColumn(0)]
        public Bitmap Image;

        [LoadColumn(1)]
        public string Label;
    }
}

train.zip

and file "tensorflow_inception_graph.pb" is too big to upload

frank-dong-ms-zz · 2020-08-20T06:54:47Z

@thicktao thanks for using ML.NET, could you please upload the model file tensorflow_inception_graph.pb to some cloud storage and share the link so we can investigate what is going on here?

thicktao · 2020-08-21T01:02:45Z

@frank-dong-ms Ok，I created a repository,you can download it from https://github.com/thicktao/issuefiles/blob/master/tensorflow_inception_graph.zip

frank-dong-ms-zz · 2020-08-21T01:56:04Z

@thicktao seems we still need definition of class "ImageSettings", could you please share a repro solution and repro steps which will be much easier?

thicktao · 2020-08-21T05:57:03Z

@frank-dong-ms https://github.com/thicktao/issuefiles.git you can Reproduce this error by this repository

frank-dong-ms-zz · 2020-08-22T00:43:55Z

@thicktao Thanks for providing that.

I can see several issue in your sample code:

You missed MapKeyToValue convertor in your retrain method
second model saved at file system doesn't have complete transformer train thus the input and output is different from the first model
looks like missing data from TrainImage2 folder, I manually add some image to this folder

I have made a working version based on your sample code like below (I put code in a single method but you can get the idea), please take a look and let me know if you have any further questions:

` public static void SaveRetrainModel2()
{

        List<ImageData> list1 = new List<ImageData>();
        ScanPic(list1, FirstScanDir);
        var fulldata1 = MlContext.Data.LoadFromEnumerable(list1);
        var trainTestData1 = MlContext.Data.TrainTestSplit(fulldata1);
        var trainingDataView1 = trainTestData1.TrainSet;

        var list2 = new List<ImageData>();
        ScanPic(list2, SecondScanDir);
        var fulldata2 = MlContext.Data.LoadFromEnumerable(list2);
        var trainTestData2 = MlContext.Data.TrainTestSplit(fulldata2);
        var trainingDataView2 = trainTestData2.TrainSet;

        var pipeline = MlContext.Transforms.Conversion.MapValueToKey("Label")
            .Append(MlContext.Transforms.ResizeImages(outputColumnName: "input", imageWidth: ImageSettings.ImageWidth, imageHeight: ImageSettings.ImageHeight, inputColumnName: "Image"))
            .Append(MlContext.Transforms.ExtractPixels(outputColumnName: "input", interleavePixelColors: ImageSettings.ChannelsLast, offsetImage: ImageSettings.Mean))
            .Append(MlContext.Model.LoadTensorFlowModel(InceptionPb).ScoreTensorFlowModel(outputColumnNames: new[] { "softmax2_pre_activation" }, inputColumnNames: new[] { "input" }, addBatchDimensionInput: true))
            .AppendCacheCheckpoint(MlContext);
        var trainer = MlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(labelColumnName: "Label", featureColumnName: "softmax2_pre_activation");
        var trainingPipeline = pipeline.Append(trainer);

        var convertor = MlContext.Transforms.Conversion.MapKeyToValue("PredictedLabelValue", "PredictedLabel");
        var dataPiple = trainingPipeline.Append(convertor);

        var preDataTransform = trainingPipeline.Fit(trainingDataView1);
        MlContext.Model.Save(preDataTransform, trainingDataView1.Schema, PreDataPath);

        ITransformer dataTransform = dataPiple.Fit(trainingDataView1);
        MlContext.Model.Save(dataTransform, trainingDataView1.Schema, DataModelPath);

        PredictScore();


        //retrain
        var featureModel = pipeline.Fit(trainingDataView2);
        var transformedDataView = featureModel.Transform(trainingDataView2);
        var model = trainer.Fit(transformedDataView);
        var secondModel = trainer.Fit(transformedDataView, model.Model);
        var convertModel = convertor.Fit(secondModel.Transform(transformedDataView));
        var completeModel = featureModel.Append(secondModel).Append(convertModel);
        MlContext.Model.Save(completeModel, trainingDataView2.Schema, TrainModelPath2);

        PredictScore(TrainModelPath2);
    }`

thicktao · 2020-08-24T01:14:23Z

@frank-dong-ms Thank you so much! As I tested the code you replied to，It wokrs well! But I have another question, I find that it's not incremental training.
I specify the Predicted Image which comes from label 2 sets, and after the first time training ,the result is my expection,it's label 2 ; but after the second training ,the predict result is the label 3.

private static void PredictScore(string dataModelPath = "")
        {
            if (string.IsNullOrEmpty(dataModelPath))
            {
                dataModelPath = DataModelPath;
            }
            var loadedModel = MlContext.Model.Load(dataModelPath, out var modelInputSchema);
            var predictor = MlContext.Model.CreatePredictionEngine<ImageData, ImagePrediction>(loadedModel);
            var imageData = new ImageData() { Image = (Bitmap)Image.FromFile(PredictImgs) };
            var result = predictor.Predict(imageData);
            if (result.PredictedLabelValue == "2")
            {
                Console.WriteLine($"It is my expectation and PredictedLabelValue:{result.PredictedLabelValue} and score is {result.Score.Max()}");
            }
            else
            {
                Console.WriteLine($"It is not my expectation,PredictedLabelValue:{result.PredictedLabelValue} and score is {result.Score.Max()}");
            }
        }

frank-dong-ms-zz · 2020-08-24T20:05:21Z

@thicktao I checked the training data, the image used to predict(PredictImgs/111.png) is the same one as the only image marked as label 3 at train2 folder(\train\TrainImage2\3\111.png) so I would not surprise that this image is marked as label 3.

thicktao · 2020-08-25T01:21:19Z

@frank-dong-ms sorry，i forget to tell you that the repository was updated, and i changed the trainImage1、trainImage2 files, as below :

The first training set is more than 111.png in the label 2 of the second training set. if the second training result can be recognized as label 2, it means that the incremental training, because it inherited the results of the first training, but in fact the second training model did not recognize 111.png,it was recognized as label 3; Or my idea is wrong, the real scenario is like this , I have a physical machine memory of only 16G, but my training picture collection may be more than 16G, such as 60G. In order to complete the training of the entire model, so I hope to be able to train several times,maybe can be divided into 6 times,each time 10G, and this can be avoid the running out of memory。

frank-dong-ms-zz · 2020-08-25T04:44:40Z

@thicktao I see what you are trying to do now. Looks like you are not retraining the image classification model (only loads through MlContext.Model.LoadTensorFlowModel(InceptionPb).ScoreTensorFlowModel). There used to be a method called RetrainDnnModel serves this purpose but this method has been marked as internal due to not fully tested, see related issues and PRs below:
#4362
#5215
#4520

If you need this functionality, please describe your scenarios in detail and other necessary infos and I can tag this issue as feature request and we can prioritize this.

thicktao · 2020-08-25T07:21:51Z

@frank-dong-ms Thank you very much for your answer. I need a picture classification model, i want to constantly train to optimize this model. My training material is constantly produced by users, but my physical machine is only 16G, in the future the size of images collection will be much larger than 16G, I can not load all the pictures into memory, and then train it. So I need a solution of ml.net. ML.net is a good machine learning framework, I like .net very much, and I'm not willing to try other languages to achieve the above requirements, so can ml.net add this functionality?

frank-dong-ms-zz · 2020-08-25T17:16:22Z

@thicktao thanks for your support for ML.NET, I will mark this issue as feature request and will prioritize this later.

pardont · 2020-11-10T02:03:40Z

Is there any update? I need this feature too.

bigofraggle · 2021-07-02T09:42:27Z

Any update? I would need this feature as well

frank-dong-ms-zz added the image Bugs related image datatype tasks label Aug 20, 2020

frank-dong-ms-zz self-assigned this Aug 22, 2020

frank-dong-ms-zz added enhancement New feature or request P2 Priority of the issue for triage purpose: Needs to be fixed at some point. labels Aug 25, 2020

frank-dong-ms-zz removed their assignment Aug 25, 2020

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

how to retrain the image classify model incrementally? #5355

how to retrain the image classify model incrementally? #5355

thicktao commented Aug 20, 2020 •

edited

Loading

frank-dong-ms-zz commented Aug 20, 2020

thicktao commented Aug 21, 2020 •

edited

Loading

frank-dong-ms-zz commented Aug 21, 2020

thicktao commented Aug 21, 2020

frank-dong-ms-zz commented Aug 22, 2020 •

edited

Loading

thicktao commented Aug 24, 2020

frank-dong-ms-zz commented Aug 24, 2020

thicktao commented Aug 25, 2020

frank-dong-ms-zz commented Aug 25, 2020

thicktao commented Aug 25, 2020

frank-dong-ms-zz commented Aug 25, 2020

pardont commented Nov 10, 2020

bigofraggle commented Jul 2, 2021

how to retrain the image classify model incrementally? #5355

how to retrain the image classify model incrementally? #5355

Comments

thicktao commented Aug 20, 2020 • edited Loading

System information

Issue

Source code / logs

frank-dong-ms-zz commented Aug 20, 2020

thicktao commented Aug 21, 2020 • edited Loading

frank-dong-ms-zz commented Aug 21, 2020

thicktao commented Aug 21, 2020

frank-dong-ms-zz commented Aug 22, 2020 • edited Loading

thicktao commented Aug 24, 2020

frank-dong-ms-zz commented Aug 24, 2020

thicktao commented Aug 25, 2020

frank-dong-ms-zz commented Aug 25, 2020

thicktao commented Aug 25, 2020

frank-dong-ms-zz commented Aug 25, 2020

pardont commented Nov 10, 2020

bigofraggle commented Jul 2, 2021

thicktao commented Aug 20, 2020 •

edited

Loading

thicktao commented Aug 21, 2020 •

edited

Loading

frank-dong-ms-zz commented Aug 22, 2020 •

edited

Loading