Text Recognition: Add script to evaluate text recognition by ICDAR2003 (#71)

Charles-258 · web-flow · commit 370f75a916fb · 2022-09-07T19:49:40.000+08:00
* update readme

* add another script

* revise details for this pr
diff --git a/models/text_recognition_crnn/README.md b/models/text_recognition_crnn/README.md
@@ -2,11 +2,24 @@
 
 An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition
 
+Results of accuracy evaluation with [tools/eval](../../tools/eval) at different text recognition datasets.
+
+| Model name   | ICDAR03(%) | IIIT5k(%) | CUTE80(%) |
+|--------------|------------|-----------|-----------|
+| CRNN_EN      | 81.66      | 74.33     | 52.78     |
+| CRNN_EN_FP16 | 82.01      | 74.93     | 52.34     |
+| CRNN_CH      | 71.28      | 80.90     | 67.36     |
+| CRNN_CH_FP16 | 78.63      | 80.93     | 67.01     |
+
+\*: 'FP16' stands for 'model quantized into FP16'.
+
 Note:
 - Model source:
     - `text_recognition_CRNN_EN_2021sep.onnx`: https://docs.opencv.org/4.5.2/d9/d1e/tutorial_dnn_OCR.html (CRNN_VGG_BiLSTM_CTC.onnx)
+    - `text_recognition_CRNN_CH_2021sep.onnx`: https://docs.opencv.org/4.x/d4/d43/tutorial_dnn_text_spotting.html (crnn_cs.onnx)
     - `text_recognition_CRNN_CN_2021nov.onnx`: https://docs.opencv.org/4.5.2/d4/d43/tutorial_dnn_text_spotting.html (crnn_cs_CN.onnx)
 - `text_recognition_CRNN_EN_2021sep.onnx` can detect digits (0\~9) and letters (return lowercase letters a\~z) (view `charset_36_EN.txt` for details).
+- `text_recognition_CRNN_CH_2021sep.onnx` can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), and some special characters (view `charset_94_CH.txt` for details).
 - `text_recognition_CRNN_CN_2021nov.onnx` can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), some Chinese characters and some special characters (view `charset_3944_CN.txt` for details).
 - For details on training this model series, please visit https://github.com/zihaomu/deep-text-recognition-benchmark.
 
@@ -16,6 +29,7 @@ Note:
 - This demo uses [text_detection_db](../text_detection_db) as text detector.
 - Selected model must match with the charset:
     - Try `text_recognition_CRNN_EN_2021sep.onnx` with `charset_36_EN.txt`.
+    - Try `text_recognition_CRNN_CH_2021sep.onnx` with `charset_94_CH.txt`
     - Try `text_recognition_CRNN_CN_2021sep.onnx` with `charset_3944_CN.txt`.
 
 Run the demo detecting English:
diff --git a/models/text_recognition_crnn/charset_94_CH.txt b/models/text_recognition_crnn/charset_94_CH.txt
@@ -0,0 +1,94 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+^
+_
+`
+{
+|
+}
+~
diff --git a/models/text_recognition_crnn/crnn.py b/models/text_recognition_crnn/crnn.py
@@ -54,7 +54,9 @@ def _preprocess(self, image, rbbox):
         rotationMatrix = cv.getPerspectiveTransform(vertices, self._targetVertices)
         cropped = cv.warpPerspective(image, rotationMatrix, self._inputSize)
 
-        if 'CN' in self._model_path:
+        # 'CN' can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), and some special characters
+        # 'CH' can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), some Chinese characters and some special characters
+        if 'CN' in self._model_path or 'CH' in self._model_path:
             pass
         else:
             cropped = cv.cvtColor(cropped, cv.COLOR_BGR2GRAY)
diff --git a/models/text_recognition_crnn/text_recognition_CRNN_CH_2021sep.onnx b/models/text_recognition_crnn/text_recognition_CRNN_CH_2021sep.onnx
diff --git a/tools/eval/README.md b/tools/eval/README.md
@@ -19,6 +19,8 @@ Supported datasets:
 - [ImageNet](#imagenet)
 - [WIDERFace](#widerface)
 - [LFW](#lfw)
+- [ICDAR](#icdar)
+- [IIIT5K](#iiit5k)
 
 ## ImageNet
 
@@ -137,4 +139,55 @@ Run evaluation with the following command:
 
 ```shell
 python eval.py -m sface -d lfw -dr /path/to/lfw
+```
+
+## ICDAR2003
+
+### Prepare data
+
+Please visit http://iapr-tc11.org/mediawiki/index.php/ICDAR_2003_Robust_Reading_Competitions to download the ICDAR2003 dataset and the labels. 
+
+```shell
+$ tree -L 2 /path/to/icdar
+.
+├── word
+│   ├── 1
+│   │   ├── self
+│   │   ├── ...
+│   │   └── willcooks
+│   ├── ...
+│   └── 12
+└── word.xml
+    
+```
+
+### Evaluation
+
+Run evaluation with the following command:
+
+```shell
+python eval.py -m crnn -d icdar -dr /path/to/icdar
+```
+
+### Example
+
+```shell
+download zip file from http://www.iapr-tc11.org/dataset/ICDAR2003_RobustReading/TrialTrain/word.zip
+upzip file to /path/to/icdar
+python eval.py -m crnn -d icdar -dr /path/to/icdar
+```
+
+## IIIT5K
+
+### Prepare data
+
+Please visit https://github.com/cv-small-snails/Text-Recognition-Material to download the IIIT5K dataset and the labels.
+
+### Evaluation
+
+All the datasets in the format of lmdb can be evaluated by this script.<br>
+Run evaluation with the following command:
+
+```shell
+python eval.py -m crnn -d iiit5k -dr /path/to/iiit5k
 ```
diff --git a/tools/eval/datasets/__init__.py b/tools/eval/datasets/__init__.py
@@ -1,6 +1,8 @@
 from .imagenet import ImageNet
 from .widerface import WIDERFace
 from .lfw import LFW
+from .icdar import ICDAR
+from .iiit5k import IIIT5K
 
 class Registery:
     def __init__(self, name):
@@ -16,4 +18,6 @@ def register(self, item):
 DATASETS = Registery("Datasets")
 DATASETS.register(ImageNet)
 DATASETS.register(WIDERFace)
-DATASETS.register(LFW)
+DATASETS.register(LFW)
+DATASETS.register(ICDAR)
+DATASETS.register(IIIT5K)
diff --git a/tools/eval/datasets/icdar.py b/tools/eval/datasets/icdar.py
@@ -0,0 +1,53 @@
+import os
+import numpy as np
+import cv2 as cv
+import xml.dom.minidom as minidom
+from tqdm import tqdm
+
+class ICDAR:
+    def __init__(self, root):
+        self.root = root
+        self.acc = -1
+        self.inputSize = [100, 32]
+        self.val_label_file = os.path.join(root, "word.xml")
+        self.val_label = self.load_label(self.val_label_file)
+
+    @property
+    def name(self):
+        return self.__class__.__name__
+
+    def load_label(self, label_file):
+        label = list()
+        dom = minidom.getDOMImplementation().createDocument(None, 'Root', None)
+        root = dom.documentElement
+        dom = minidom.parse(self.val_label_file)
+        root = dom.documentElement
+        names = root.getElementsByTagName('image')
+        for name in names:
+            key = os.path.join(self.root, name.getAttribute('file'))
+            value = name.getAttribute('tag').lower()
+            label.append([key, value])
+
+        return label
+
+    def eval(self, model):
+        right_num = 0
+        pbar = tqdm(self.val_label)
+        for fn, label in pbar:
+            pbar.set_description("Evaluating {} with {} val set".format(model.name, self.name))
+
+            img = cv.imread(fn)
+
+            rbbox = np.array([0, img.shape[0], 0, 0, img.shape[1], 0, img.shape[1], img.shape[0]])
+            pred = model.infer(img, rbbox)
+            if label == pred:
+                right_num += 1
+
+        self.acc = right_num/(len(self.val_label) * 1.0)
+
+
+    def get_result(self):
+        return self.acc
+
+    def print_result(self):
+        print("Accuracy: {:.2f}%".format(self.acc*100))
diff --git a/tools/eval/datasets/iiit5k.py b/tools/eval/datasets/iiit5k.py
@@ -0,0 +1,55 @@
+import lmdb
+import os
+import numpy as np
+import cv2 as cv
+from tqdm import tqdm
+
+class IIIT5K:
+    def __init__(self, root):
+        self.root = root
+        self.acc = -1
+        self.inputSize = [100, 32]
+
+        self.val_label = self.load_label(self.root)
+
+    @property
+    def name(self):
+        return self.__class__.__name__
+
+    def load_label(self, root):
+        lmdb_file = root
+        lmdb_env = lmdb.open(lmdb_file)
+        lmdb_txn = lmdb_env.begin()
+        lmdb_cursor = lmdb_txn.cursor()
+        label = list()
+        for key, value in lmdb_cursor:
+            image_index = key.decode()
+            if image_index.split('-')[0] == 'image':
+                img = cv.imdecode(np.fromstring(value, np.uint8), 3)
+                label_index = 'label-' + image_index.split('-')[1]
+                value = lmdb_txn.get(label_index.encode()).decode().lower()
+                label.append([img, value])
+            else:
+                break
+        return label
+
+    def eval(self, model):
+        right_num = 0
+        pbar = tqdm(self.val_label)
+        for img, value in pbar:
+            pbar.set_description("Evaluating {} with {} val set".format(model.name, self.name))
+
+
+            rbbox = np.array([0, img.shape[0], 0, 0, img.shape[1], 0, img.shape[1], img.shape[0]])
+            pred = model.infer(img, rbbox).lower()
+            if value == pred:
+                right_num += 1
+
+        self.acc = right_num/(len(self.val_label) * 1.0)
+
+
+    def get_result(self):
+        return self.acc
+
+    def print_result(self):
+        print("Accuracy: {:.2f}%".format(self.acc*100))
diff --git a/tools/eval/eval.py b/tools/eval/eval.py
@@ -73,6 +73,11 @@
             name="SFace",
             topic="face_recognition",
             modelPath=os.path.join(root_dir, "models/face_recognition_sface/face_recognition_sface_2021dec-act_int8-wt_int8-quantized.onnx")),
+        crnn=dict(
+            name="CRNN",
+            topic="text_recognition",
+            modelPath=os.path.join(root_dir, "models/text_recognition_crnn/text_recognition_CRNN_EN_2021sep.onnx"),
+            charsetPath=os.path.join(root_dir, "models/text_recognition_crnn/charset_36_EN.txt")),
 )
 
 datasets = dict(
@@ -87,6 +92,12 @@
             name="LFW",
             topic="face_recognition",
             target_size=112),
+        icdar=dict(
+            name="ICDAR",
+            topic="text_recognition"),
+        iiit5k=dict(
+            name="IIIT5K",
+            topic="text_recognition"),
 )
 
 def main(args):

-Original file line number
+Diff line change
@@ @@ -0,0 +1,94 @@ @@
 +0
 +1
 +2
 +3
 +4
 +5
 +6
 +7
 +8
 +9
 +a
 +b
 +c
 +d
 +e
 +f
 +g
 +h
 +i
 +j
 +k
 +l
 +m
 +n
 +o
 +p
 +q
 +r
 +s
 +t
 +u
 +v
 +w
 +x
 +y
 +z
 +A
 +B
 +C
 +D
 +E
 +F
 +G
 +H
 +I
 +J
 +K
 +L
 +M
 +N
 +O
 +P
 +Q
 +R
 +S
 +T
 +U
 +V
 +W
 +X
 +Y
 +Z
 +!
 +"
 +#
 +$
 +%
 +&
 +'
 +(
 +)
 +*
 ++
 +,
 +-
 +.
 +/
 +:
 +;
 +<
 +=
 +>
 +?
 +@
 +[
 +\
 +]
 +^
 +_
 +`
 +{
 +|
 +}
 +~