myshell-ai · Desmond0804 · Feb 20, 2025 · Mar 6, 2025
diff --git a/docs/install.md b/docs/install.md
@@ -14,6 +14,8 @@ The repo is developed and tested on `Ubuntu 20.04` and `Python 3.9`.
 git clone https://github.com/myshell-ai/MeloTTS.git
 cd MeloTTS
 pip install -e .
+# use pip below for Intel XPU
+# pip install -r requirements-intel.txt
 python -m unidic download
 ```
 If you encountered issues in macOS install, try the [Docker Install](#docker-install)
@@ -112,7 +114,7 @@ from melo.api import TTS
 speed = 1.0
 
 # CPU is sufficient for real-time inference.
-# You can set it manually to 'cpu' or 'cuda' or 'cuda:0' or 'mps'
+# You can set it manually to 'cpu' or 'cuda' or 'cuda:0' or 'mps' or 'xpu'
 device = 'auto' # Will automatically use GPU if available
 
 # English 

diff --git a/melo/api.py b/melo/api.py
@@ -28,9 +28,12 @@ def __init__(self,
         if device == 'auto':
             device = 'cpu'
             if torch.cuda.is_available(): device = 'cuda'
-            if torch.backends.mps.is_available(): device = 'mps'
+            elif torch.xpu.is_available(): device = 'xpu'
+            elif torch.backends.mps.is_available(): device = 'mps'
         if 'cuda' in device:
             assert torch.cuda.is_available()
+        if 'xpu' in device:
+            assert torch.xpu.is_available()
 
         # config_path = 
         hps = load_or_download_config(language, use_hf=use_hf, config_path=config_path)
@@ -123,7 +126,8 @@ def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_s
                 del x_tst, tones, lang_ids, bert, ja_bert, x_tst_lengths, speakers
                 # 
             audio_list.append(audio)
-        torch.cuda.empty_cache()
+        if torch.cuda.is_available(): torch.cuda.empty_cache()
+        if torch.xpu.is_available(): torch.xpu.empty_cache()
         audio = self.audio_numpy_concat(audio_list, sr=self.hps.data.sampling_rate, speed=speed)
 
         if output_path is None:

diff --git a/melo/preprocess_text.py b/melo/preprocess_text.py
@@ -45,14 +45,17 @@ def main(
 
     if cleaned_path is None:
         cleaned_path = metadata + ".cleaned"
+
+    if torch.cuda.is_available(): device = "cuda:0"
+    elif torch.xpu.is_available(): device = "xpu"
 
     if clean:
         out_file = open(cleaned_path, "w", encoding="utf-8")
         new_symbols = []
         for line in tqdm(open(metadata, encoding="utf-8").readlines()):
             try:
                 utt, spk, language, text = line.strip().split("|")
-                norm_text, phones, tones, word2ph, bert = clean_text_bert(text, language, device='cuda:0')
+                norm_text, phones, tones, word2ph, bert = clean_text_bert(text, language, device=device)
                 for ph in phones:
                     if ph not in symbols and ph not in new_symbols:
                         new_symbols.append(ph)

diff --git a/melo/text/chinese_bert.py b/melo/text/chinese_bert.py
@@ -26,7 +26,8 @@ def get_bert_feature(text, word2ph, device=None, model_id='hfl/chinese-roberta-w
     ):
         device = "mps"
     if not device:
-        device = "cuda"
+        if torch.cuda.is_available(): device = "cuda"
+        elif torch.xpu.is_available(): device = "xpu"
 
     with torch.no_grad():
         inputs = tokenizer(text, return_tensors="pt")

diff --git a/melo/text/chinese_mix.py b/melo/text/chinese_mix.py
@@ -243,7 +243,9 @@ def _g2p_v2(segments):
     text = text_normalize(text)
     print(text)
     phones, tones, word2ph = g2p(text, impl='v2')
-    bert = get_bert_feature(text, word2ph, device='cuda:0')
+    if torch.cuda.is_available(): device = "cuda:0"
+    elif torch.xpu.is_available(): device = "xpu"
+    bert = get_bert_feature(text, word2ph, device=device)
     print(phones)
     import pdb; pdb.set_trace()
 

diff --git a/melo/text/english_bert.py b/melo/text/english_bert.py
@@ -15,7 +15,8 @@ def get_bert_feature(text, word2ph, device=None):
     ):
         device = "mps"
     if not device:
-        device = "cuda"
+        if torch.cuda.is_available(): device = "cuda"
+        elif torch.xpu.is_available(): device = "xpu"
     if model is None:
         model = AutoModelForMaskedLM.from_pretrained(model_id).to(
             device

diff --git a/melo/text/french_bert.py b/melo/text/french_bert.py
@@ -15,7 +15,8 @@ def get_bert_feature(text, word2ph, device=None):
     ):
         device = "mps"
     if not device:
-        device = "cuda"
+        if torch.cuda.is_available(): device = "cuda"
+        elif torch.xpu.is_available(): device = "xpu"
     if model is None:
         model = AutoModelForMaskedLM.from_pretrained(model_id).to(
             device

diff --git a/melo/text/japanese_bert.py b/melo/text/japanese_bert.py
@@ -16,7 +16,8 @@ def get_bert_feature(text, word2ph, device=None, model_id='tohoku-nlp/bert-base-
     ):
         device = "mps"
     if not device:
-        device = "cuda"
+        if torch.cuda.is_available(): device = "cuda"
+        elif torch.xpu.is_available(): device = "xpu"
     if model_id not in models:
         model = AutoModelForMaskedLM.from_pretrained(model_id).to(
             device

diff --git a/melo/text/korean.py b/melo/text/korean.py
@@ -138,7 +138,7 @@ def g2p(norm_text):
     assert len(word2ph) == len(tokenized) + 2
     return phones, tones, word2ph
 
-def get_bert_feature(text, word2ph, device='cuda'):
+def get_bert_feature(text, word2ph, device=None):
     from . import japanese_bert
     return japanese_bert.get_bert_feature(text, word2ph, device=device, model_id=model_id)
 
@@ -189,4 +189,4 @@ def get_bert_feature(text, word2ph, device='cuda'):
 #     conv = kakasi.getConverter()
 #     katakana_text = conv.do('ええ、僕はおきなと申します。こちらの小さいわらべは杏子。ご挨拶が遅れてしまいすみません。あなたの名は?')  # Replace with your Chinese text
 
-#     print(katakana_text)  # Output: ニーハオセカイ
+#     print(katakana_text)  # Output: ニーハオセカイ
diff --git a/melo/text/spanish_bert.py b/melo/text/spanish_bert.py
@@ -15,7 +15,8 @@ def get_bert_feature(text, word2ph, device=None):
     ):
         device = "mps"
     if not device:
-        device = "cuda"
+        if torch.cuda.is_available(): device = "cuda"
+        elif torch.xpu.is_available(): device = "xpu"
     if model is None:
         model = AutoModelForMaskedLM.from_pretrained(model_id).to(
             device

diff --git a/requirements-intel.txt b/requirements-intel.txt
@@ -0,0 +1,31 @@
+--extra-index-url https://download.pytorch.org/whl/test/xpu
+torch
+torchaudio
+txtsplit
+cached_path
+transformers==4.27.4
+num2words==0.5.12
+unidic_lite==1.0.8
+unidic==1.1.0
+mecab-python3==1.0.9
+pykakasi==2.2.1
+fugashi==1.3.0
+g2p_en==2.1.0
+anyascii==0.3.2
+jamo==0.4.1
+gruut[de,es,fr]==2.4.0
+g2pkk>=0.1.1
+librosa==0.9.1
+pydub==0.25.1
+eng_to_ipa==0.0.2
+inflect==7.0.0
+unidecode==1.3.7
+pypinyin==0.50.0
+cn2an==0.5.22
+jieba==0.42.1
+gradio
+langid==1.1.6
+tqdm
+tensorboard==2.16.2
+loguru==0.7.2
+python-mecab-ko