diff --git a/pythainlp/corpus/__init__.py b/pythainlp/corpus/__init__.py
index 6dcc8678e..21abc56f4 100644
--- a/pythainlp/corpus/__init__.py
+++ b/pythainlp/corpus/__init__.py
@@ -1,29 +1,40 @@
 # -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals
-from pythainlp.tools import get_path_db,get_path_data
-from tinydb import TinyDB,Query
-from future.moves.urllib.request import urlopen
-from tqdm import tqdm
-import requests
+
+from __future__ import absolute_import, unicode_literals
+
 import os
+
 import requests
-#__all__ = ["thaipos", "thaiword","alphabet","tone","country","wordnet"]
-path_db_=get_path_db()
+from future.moves.urllib.request import urlopen
+from pythainlp.tools import get_path_data, get_path_db
+from tinydb import Query, TinyDB
+from tqdm import tqdm
+
+CORPUS_DB_URL = (
+    "https://raw.githubusercontent.com/PyThaiNLP/pythainlp-corpus/master/db.json"
+)
+
+# __all__ = ["thaipos", "thaiword","alphabet","tone","country","wordnet"]
+path_db_ = get_path_db()
+
+
 def get_file(name):
-    db=TinyDB(path_db_)
+    db = TinyDB(path_db_)
     temp = Query()
-    if len(db.search(temp.name==name))>0:
-        path= get_path_data(db.search(temp.name==name)[0]['file'])
+    if len(db.search(temp.name == name)) > 0:
+        path = get_path_data(db.search(temp.name == name)[0]["file"])
         db.close()
         if not os.path.exists(path):
             download(name)
         return path
+
+
 def download_(url, dst):
     """
     @param: url to download file
     @param: dst place to put the file
     """
-    file_size = int(urlopen(url).info().get('Content-Length', -1))
+    file_size = int(urlopen(url).info().get("Content-Length", -1))
     if os.path.exists(dst):
         first_byte = os.path.getsize(dst)
     else:
@@ -32,55 +43,90 @@ def download_(url, dst):
         return file_size
     header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
     pbar = tqdm(
-        total=file_size, initial=first_byte,
-        unit='B', unit_scale=True, desc=url.split('/')[-1])
+        total=file_size,
+        initial=first_byte,
+        unit="B",
+        unit_scale=True,
+        desc=url.split("/")[-1],
+    )
     req = requests.get(url, headers=header, stream=True)
-    with(open(get_path_data(dst), 'wb')) as f:
+    with (open(get_path_data(dst), "wb")) as f:
         for chunk in req.iter_content(chunk_size=1024):
             if chunk:
                 f.write(chunk)
                 pbar.update(1024)
     pbar.close()
-    #return file_size
-def download(name,force=False):
-    db=TinyDB(path_db_)
+    # return file_size
+
+
+def download(name, force=False):
+    db = TinyDB(path_db_)
     temp = Query()
-    data=requests.get("https://raw.githubusercontent.com/PyThaiNLP/pythainlp-corpus/master/db.json")
-    data_json=data.json()
+    data = requests.get(CORPUS_DB_URL)
+    data_json = data.json()
     if name in list(data_json.keys()):
-        temp_name=data_json[name]
-        print("Download : "+name)
-        if len(db.search(temp.name==name))==0:
-            print(name+" "+temp_name['version'])
-            download_(temp_name['download'],temp_name['file_name'])
-            db.insert({'name': name, 'version': temp_name['version'],'file':temp_name['file_name']})
+        temp_name = data_json[name]
+        print("Download : " + name)
+
+        if not db.search(temp.name == name):
+            print(name + " " + temp_name["version"])
+            download_(temp_name["download"], temp_name["file_name"])
+            db.insert(
+                {
+                    "name": name,
+                    "version": temp_name["version"],
+                    "file": temp_name["file_name"],
+                }
+            )
         else:
-            if len(db.search(temp.name==name and temp.version==temp_name['version']))==0:
+            if not db.search(
+                temp.name == name and temp.version == temp_name["version"]
+            ):
                 print("have update")
-                print("from "+name+" "+db.search(temp.name==name)[0]['version']+" update to "+name+" "+temp_name['version'])
-                yes_no="y"
-                if force==False:
-                    yes_no=str(input("y or n : ")).lower()
-                if "y"==yes_no:
-                    download_(temp_name['download'],temp_name['file_name'])
-                    db.update({'version':temp_name['version']},temp.name==name)
+                print(
+                    "from "
+                    + name
+                    + " "
+                    + db.search(temp.name == name)[0]["version"]
+                    + " update to "
+                    + name
+                    + " "
+                    + temp_name["version"]
+                )
+                yes_no = "y"
+                if not force:
+                    yes_no = str(input("y or n : ")).lower()
+                if "y" == yes_no:
+                    download_(temp_name["download"], temp_name["file_name"])
+                    db.update({"version": temp_name["version"]}, temp.name == name)
             else:
                 print("re-download")
-                print("from "+name+" "+db.search(temp.name==name)[0]['version']+" update to "+name+" "+temp_name['version'])
-                yes_no="y"
-                if force==False:
-                    yes_no=str(input("y or n : ")).lower()
-                if "y"==yes_no:
-                    download_(temp_name['download'],temp_name['file_name'])
-                    db.update({'version':temp_name['version']},temp.name==name)
+                print(
+                    "from "
+                    + name
+                    + " "
+                    + db.search(temp.name == name)[0]["version"]
+                    + " update to "
+                    + name
+                    + " "
+                    + temp_name["version"]
+                )
+                yes_no = "y"
+                if not force:
+                    yes_no = str(input("y or n : ")).lower()
+                if "y" == yes_no:
+                    download_(temp_name["download"], temp_name["file_name"])
+                    db.update({"version": temp_name["version"]}, temp.name == name)
     db.close()
+
+
 def remove(name):
-    db=TinyDB(path_db_)
+    db = TinyDB(path_db_)
     temp = Query()
-    data=db.search(temp.name==name)
-    if len(data)>0:
-        path=get_file(name)
+    data = db.search(temp.name == name)
+    if len(data) > 0:
+        path = get_file(name)
         os.remove(path)
-        db.remove(temp.name==name)
+        db.remove(temp.name == name)
         return True
-    return False
\ No newline at end of file
+    return False
diff --git a/pythainlp/tools/__init__.py b/pythainlp/tools/__init__.py
index e927e0903..398a9b331 100644
--- a/pythainlp/tools/__init__.py
+++ b/pythainlp/tools/__init__.py
@@ -1,25 +1,32 @@
 # -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals
+
+from __future__ import absolute_import, unicode_literals
+
 import os
-import dill
-from pythainlp.tokenize import tcc
-import marisa_trie
 import subprocess
 import sys
 
+
 def install_package(package):
     subprocess.call([sys.executable, "-m", "pip", "install", package])
+
+
 def get_path_db():
-	path = os.path.join(get_path_pythainlp_data(), "db.json")
-	if not os.path.exists(path):
-		from tinydb import TinyDB
-		db=TinyDB(path)
-		#db.insert({'name': 'hi', 'version': '0.1','file':''})
-	return path
+    path = os.path.join(get_path_pythainlp_data(), "db.json")
+    if not os.path.exists(path):
+        from tinydb import TinyDB
+
+        db = TinyDB(path)
+        # db.insert({'name': 'hi', 'version': '0.1','file':''})
+    return path
+
+
 def get_path_data(filename):
-	return os.path.join(get_path_pythainlp_data(), filename)
+    return os.path.join(get_path_pythainlp_data(), filename)
+
+
 def get_path_pythainlp_data():
-	path= os.path.join(os.path.expanduser("~"), 'pythainlp-data')
-	if not os.path.exists(path):
-		os.makedirs(path)
-	return path
+    path = os.path.join(os.path.expanduser("~"), "pythainlp-data")
+    if not os.path.exists(path):
+        os.makedirs(path)
+    return path
diff --git a/pythainlp/ulmfit/__init__.py b/pythainlp/ulmfit/__init__.py
index d61c3dfd8..ee14b01ed 100644
--- a/pythainlp/ulmfit/__init__.py
+++ b/pythainlp/ulmfit/__init__.py
@@ -1,2 +1,3 @@
 # -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals
\ No newline at end of file
+
+from __future__ import absolute_import, unicode_literals
diff --git a/pythainlp/ulmfit/utils.py b/pythainlp/ulmfit/utils.py
index dcb19ba4b..dd5adaad4 100644
--- a/pythainlp/ulmfit/utils.py
+++ b/pythainlp/ulmfit/utils.py
@@ -1,22 +1,28 @@
 # -*- coding: utf-8 -*-
-'''
+
+"""
 Code by https://github.com/cstorm125/thai2vec/tree/master/notebook
-'''
-from __future__ import absolute_import,unicode_literals
-import os
-import sys
+"""
+
+from __future__ import absolute_import, unicode_literals
+
 import re
-import torch
+import sys
 
-#numpy and fastai
+from pythainlp.corpus import download, get_file
+from pythainlp.tokenize import word_tokenize
+
+
+# numpy and fastai
 try:
     import numpy as np
     from fastai.text import *
     import dill as pickle
 except ImportError:
     from pythainlp.tools import install_package
-    install_package('fastai')
-    install_package('numpy')
+
+    install_package("fastai")
+    install_package("numpy")
     try:
         import numpy as np
         from fastai.text import *
@@ -25,53 +31,54 @@
         print("Error installing using 'pip install fastai numpy dill'")
         sys.exit(0)
 
-#import torch
+# import torch
 try:
     import torch
 except ImportError:
-    print('PyTorch required. See https://pytorch.org/.')
+    print("PyTorch required. See https://pytorch.org/.")
 
-from pythainlp.tokenize import word_tokenize
-from pythainlp.corpus import get_file
-from pythainlp.corpus import download
-MODEL_NAME = 'thwiki_model2'
-ITOS_NAME = 'itos'
-
-#paralellized thai tokenizer with some text cleaning
-class ThaiTokenizer():
-    def __init__(self, engine='newmm'):
+
+MODEL_NAME = "thwiki_model2"
+ITOS_NAME = "itos"
+
+
+# paralellized Thai tokenizer with some text cleaning
+class ThaiTokenizer:
+    def __init__(self, engine="newmm"):
         """
         :parameters for tokenization engine:
             * newmm - Maximum Matching algorithm + TCC
-            * icu -  IBM ICU
+            * icu - IBM ICU
             * longest-matching - Longest matching
             * mm - Maximum Matching algorithm
             * pylexto - LexTo
             * deepcut - Deep Neural Network
         """
         self.engine = engine
-        self.re_br = re.compile(r'<\s*br\s*/?>', re.IGNORECASE)
-        self.re_rep = re.compile(r'(\S)(\1{3,})')
+        self.__RE_BR = re.compile(r"<\s*br\s*/?>", re.IGNORECASE)
+        self.__RE_REP = re.compile(r"(\S)(\1{3,})")
+        self.__RE_SLASH_HASH = re.compile(r"([/#])")
+        self.__RE_DOUBLE_SPACE = re.compile(" {2,}")
 
-    def sub_br(self,text): 
+    def sub_br(self, text):
         """
         :meth:`sub_br` replace `<br>` tags with `\n`
         :param str text: text to process
         :return: procssed text
         """
-        return self.re_br.sub("\n", text)
+        return self.__RE_BR.sub("\n", text)
 
-    def tokenize(self,text):
+    def tokenize(self, text):
         """
         :meth: tokenize text with selected engine
         :param str text: text to tokenize
         :return: tokenized text
         """
-        return [t for t in word_tokenize(self.sub_br(text),engine=self.engine)]
-  
+        return [t for t in word_tokenize(self.sub_br(text), engine=self.engine)]
+
     @staticmethod
     def replace_rep(text):
-        '''
+        """
         :meth:`replace_rep` replace 3 or above repetitive characters with `tkrep`
         :param str text: text to process
         :return: processed text where repetitions are replaced by `tkrep` followed by number of repetitions
@@ -80,10 +87,10 @@ def replace_rep(text):
             >>> tt = ThaiTokenizer()
             >>> tt.replace_rep('คือดียยยยยย')
             คือดีtkrep6ย
-        '''
-        TK_REP = 'tkrep'
-        c,cc = text.groups()
-        return f'{TK_REP}{len(cc)+1}{c}'
+        """
+        TK_REP = "tkrep"
+        c, cc = text.groups()
+        return f"{TK_REP}{len(cc)+1}{c}"
 
     def proc_text(self, text):
         """
@@ -91,10 +98,9 @@ def proc_text(self, text):
         :param str text: text to process
         :return: processed and tokenized text
         """
-        s = self.re_rep.sub(ThaiTokenizer.replace_rep, text)
-        s = re.sub(r'([/#])', r' \1 ', s)
-        #remvoe double space
-        s = re.sub(' {2,}', ' ', s)
+        s = self.__RE_REP.sub(ThaiTokenizer.replace_rep, text)
+        s = self.__RE_SLASH_HASH.sub(r" \1 ", s)
+        s = self.__RE_DOUBLE_SPACE(" ", s)
         return self.tokenize(s)
 
     @staticmethod
@@ -114,12 +120,15 @@ def proc_all_mp(ss):
         :param str text: text to process
         :return: processed and tokenized text
         """
-        ncpus = num_cpus()//2
+        ncpus = num_cpus() // 2
         with ProcessPoolExecutor(ncpus) as e:
             return sum(e.map(ThaiTokenizer.proc_all, ss), [])
 
-#ulmfit helper functions
-BOS = 'xbos'  # beginning-of-sentence tag
+
+# ulmfit helper functions
+BOS = "xbos"  # beginning-of-sentence tag
+
+
 def get_texts(df):
     """
     :meth: `get_texts` get tuple of tokenized texts and labels
@@ -128,10 +137,11 @@ def get_texts(df):
         * tok - lists of tokenized texts with beginning-of-sentence tag `xbos` as first element of each list
         * labels - list of labels
     """
-    labels = df.iloc[:,0].values.astype(np.int64)
-    texts = BOS+df.iloc[:,1].astype(str).apply(lambda x: x.rstrip())
+    labels = df.iloc[:, 0].values.astype(np.int64)
+    texts = BOS + df.iloc[:, 1].astype(str).apply(lambda x: x.rstrip())
     tok = ThaiTokenizer().proc_all_mp(partition_by_cores(texts))
-    return(tok, list(labels))
+    return (tok, list(labels))
+
 
 def get_all(df):
     """
@@ -142,13 +152,16 @@ def get_all(df):
         * labels - list of labels
     """
     tok, labels = [], []
-    for i, r in enumerate(df):
+    for _, r in enumerate(df):
         tok_, labels_ = get_texts(r)
-        tok += tok_;
+        tok += tok_
         labels += labels_
-    return(tok, labels)
+    return (tok, labels)
+
 
-def numericalizer(df, itos=None, max_vocab = 60000, min_freq = 2, pad_tok = '_pad_', unk_tok = '_unk_'):
+def numericalizer(
+    df, itos=None, max_vocab=60000, min_freq=2, pad_tok="_pad_", unk_tok="_unk_"
+):
     """
     :meth: `numericalize` numericalize tokenized texts for:
         * tokens with word frequency more than `min_freq`
@@ -172,12 +185,13 @@ def numericalizer(df, itos=None, max_vocab = 60000, min_freq = 2, pad_tok = '_pa
     tok, labels = get_all(df)
     freq = Counter(p for o in tok for p in o)
     if itos is None:
-        itos = [o for o,c in freq.most_common(max_vocab) if c>min_freq]
+        itos = [o for o, c in freq.most_common(max_vocab) if c > min_freq]
         itos.insert(0, pad_tok)
         itos.insert(0, unk_tok)
-    stoi = collections.defaultdict(lambda:0, {v:k for k,v in enumerate(itos)})
+    stoi = collections.defaultdict(lambda: 0, {v: k for k, v in enumerate(itos)})
     lm = np.array([[stoi[o] for o in p] for p in tok])
-    return(lm,tok,labels,itos,stoi,freq)
+    return (lm, tok, labels, itos, stoi, freq)
+
 
 def merge_wgts(em_sz, wgts, itos_pre, itos_cls):
     """
@@ -189,23 +203,26 @@ def merge_wgts(em_sz, wgts, itos_pre, itos_cls):
     :return: merged weights of the model for current dataset
     """
     vocab_size = len(itos_cls)
-    enc_wgts = to_np(wgts['0.encoder.weight'])
-    #average weight of encoding
+    enc_wgts = to_np(wgts["0.encoder.weight"])
+    # average weight of encoding
     row_m = enc_wgts.mean(0)
-    stoi_pre = collections.defaultdict(lambda:-1, {v:k for k,v in enumerate(itos_pre)})
-    #new embedding based on classification dataset
+    stoi_pre = collections.defaultdict(
+        lambda: -1, {v: k for k, v in enumerate(itos_pre)}
+    )
+    # new embedding based on classification dataset
     new_w = np.zeros((vocab_size, em_sz), dtype=np.float32)
-    for i,w in enumerate(itos_cls):
+    for i, w in enumerate(itos_cls):
         r = stoi_pre[w]
-        #use pretrianed embedding if present; else use the average
-        new_w[i] = enc_wgts[r] if r>=0 else row_m
-    wgts['0.encoder.weight'] = T(new_w)
-    wgts['0.encoder_with_dropout.embed.weight'] = T(np.copy(new_w))
-    wgts['1.decoder.weight'] = T(np.copy(new_w))
-    return(wgts)
-
-#feature extractor
-def document_vector(ss, m, stoi,tok_engine='newmm'):
+        # use pretrianed embedding if present; else use the average
+        new_w[i] = enc_wgts[r] if r >= 0 else row_m
+    wgts["0.encoder.weight"] = T(new_w)
+    wgts["0.encoder_with_dropout.embed.weight"] = T(np.copy(new_w))
+    wgts["1.decoder.weight"] = T(np.copy(new_w))
+    return wgts
+
+
+# feature extractor
+def document_vector(ss, m, stoi, tok_engine="newmm"):
     """
     :meth: `document_vector` get document vector using pretrained ULMFit model
     :param str ss: sentence to extract embeddings
@@ -215,44 +232,55 @@ def document_vector(ss, m, stoi,tok_engine='newmm'):
     :return: `numpy.array` of document vector sized 300
     """
     s = word_tokenize(ss)
-    t = LongTensor([stoi[i] for i in s]).view(-1,1).cuda()
-    t = Variable(t,volatile=False)
+    t = LongTensor([stoi[i] for i in s]).view(-1, 1).cuda()
+    t = Variable(t, volatile=False)
     m.reset()
-    pred,*_ = m[0](t)
-    #get average of last lstm layer along bptt
-    res = to_np(torch.mean(pred[-1],0).view(-1))
-    return(res)
-
-class SaveFeatures():
-    features=None
-    def __init__(self, m): self.hook = m.register_forward_hook(self.hook_fn)
-    def hook_fn(self, module, input, output): self.features = output
-    def remove(self): self.hook.remove()
-
-#Download pretrained models
+    pred, *_ = m[0](t)
+    # get average of last lstm layer along bptt
+    res = to_np(torch.mean(pred[-1], 0).view(-1))
+    return res
+
+
+class SaveFeatures:
+    features = None
+
+    def __init__(self, m):
+        self.hook = m.register_forward_hook(self.hook_fn)
+
+    def hook_fn(self, module, input, output):
+        self.features = output
+
+    def remove(self):
+        self.hook.remove()
+
+
+# Download pretrained models
 def get_path(fname):
-	path = get_file(fname)
-	if path==None:
-		download(fname)
-		path = get_file(fname)
-	return(path)
+    path = get_file(fname)
+    if not path:
+        download(fname)
+        path = get_file(fname)
+    return path
+
 
 def load_pretrained_model():
     path = get_path(MODEL_NAME)
     wgts = torch.load(path, map_location=lambda storage, loc: storage)
-    return(wgts)
+    return wgts
+
 
 def load_pretrained_itos():
     path = get_path(ITOS_NAME)
-    itos = pickle.load(open(path,'rb'))
-    return(itos)
+    itos = pickle.load(open(path, "rb"))
+    return itos
+
 
 def about():
-	return '''
-	thai2vec
-	State-of-the-Art Language Modeling, Text Feature Extraction and Text Classification in Thai Language.
-    Created as part of pyThaiNLP with ULMFit implementation from fast.ai
-	
-	Development : Charin Polpanumas
-	GitHub : https://github.com/cstorm125/thai2vec
-	'''
+    return """
+    thai2vec
+    State-of-the-Art Language Modeling, Text Feature Extraction and Text Classification in Thai Language.
+    Created as part of PyThaiNLP with ULMFit implementation from fast.ai
+
+    Development : Charin Polpanumas
+    GitHub : https://github.com/cstorm125/thai2vec
+    """