Spaces:
Running
Running
| """ | |
| Script ini dibuat oleh __drat dan BF667 di GitHub. | |
| Petunjuk: | |
| 1. Mengkonversi teks menjadi suara menggunakan Edge TTS dan Retrieval-based Voice Conversion (RVC). | |
| 2. Mendukung model text-to-speech (TTS) untuk bahasa Indonesia, Jawa, dan Sunda. | |
| 3. Antarmuka menggunakan Gradio dengan tema kustom IndonesiaTheme. | |
| Cara Menggunakan: | |
| 1. Pilih model suara dari dropdown. | |
| 2. Atur parameter (kecepatan bicara, pitch, dll.). | |
| 3. Masukkan teks untuk dikonversi. | |
| 4. Klik "Convert" untuk menghasilkan suara. | |
| 5. Dengarkan hasil melalui komponen audio. | |
| """ | |
| import asyncio | |
| import datetime | |
| import logging | |
| import os | |
| import time | |
| import traceback | |
| import warnings | |
| from pathlib import Path | |
| import edge_tts | |
| import gradio as gr | |
| import librosa | |
| import torch | |
| import tqdm | |
| import requests | |
| from config import Config | |
| from lib.infer_pack.models import ( | |
| SynthesizerTrnMs256NSFsid, | |
| SynthesizerTrnMs256NSFsid_nono, | |
| SynthesizerTrnMs768NSFsid, | |
| SynthesizerTrnMs768NSFsid_nono, | |
| ) | |
| from rmvpe import RMVPE | |
| from vc_infer_pipeline import VC | |
| # Konfigurasi awal | |
| warnings.filterwarnings("ignore") | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| for logger_name in ["fairseq", "numba", "markdown_it", "urllib3", "matplotlib"]: | |
| logging.getLogger(logger_name).setLevel(logging.ERROR) | |
| config = Config() | |
| BASE_DIR = Path.cwd() | |
| MODEL_ROOT = BASE_DIR / "weights" | |
| EDGE_OUTPUT_FILENAME = "edge_output.mp3" | |
| LIMITATION = os.getenv("SYSTEM") == "spaces" | |
| # Memuat daftar suara Edge TTS | |
| tts_voice_list = asyncio.run(edge_tts.list_voices()) | |
| tts_voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list] | |
| # Memuat model RVC dari direktori weights | |
| models = sorted([d for d in MODEL_ROOT.iterdir() if d.is_dir()]) | |
| def model_data(model_name: str): | |
| """Memuat data model berdasarkan nama model.""" | |
| try: | |
| pth_path = next(MODEL_ROOT / model_name).glob("*.pth") | |
| logging.info(f"Memuat model: {pth_path}") | |
| cpt = torch.load(pth_path, map_location="cpu") | |
| tgt_sr = cpt["config"][-1] | |
| cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] | |
| if_f0 = cpt.get("f0", 1) | |
| version = cpt.get("version", "v1") | |
| # Pilih model berdasarkan versi dan f0 | |
| model_classes = { | |
| ("v1", 1): SynthesizerTrnMs256NSFsid, | |
| ("v1", 0): SynthesizerTrnMs256NSFsid_nono, | |
| ("v2", 1): SynthesizerTrnMs768NSFsid, | |
| ("v2", 0): SynthesizerTrnMs768NSFsid_nono, | |
| } | |
| model_class = model_classes.get((version, if_f0)) | |
| if not model_class: | |
| raise ValueError(f"Versi model tidak valid: {version}, f0: {if_f0}") | |
| net_g = model_class(*cpt["config"], is_half=config.is_half) | |
| del net_g.enc_q | |
| net_g.load_state_dict(cpt["weight"], strict=False) | |
| net_g.eval().to(config.device) | |
| net_g = net_g.half() if config.is_half else net_g.float() | |
| vc = VC(tgt_sr, config) | |
| index_file = next((MODEL_ROOT / model_name).glob("*.index"), "") | |
| logging.info(f"File indeks: {index_file or 'Tidak ditemukan'}") | |
| return tgt_sr, net_g, vc, version, str(index_file), if_f0 | |
| except Exception as e: | |
| logging.error(f"Error memuat model: {e}") | |
| raise | |
| def load_hubert(): | |
| from fairseq import fairseq | |
| forward_dml = fairseq.GradMultiply.forward | |
| models, _, _ = fairseq.load_model( | |
| f"{BASE_DIR}/hubert_base.pt", | |
| ) | |
| hubert_model = models[0] | |
| hubert_model = hubert_model.to(config.device) | |
| if config.is_half: | |
| hubert_model = hubert_model.half() | |
| else: | |
| hubert_model = hubert_model.float() | |
| return hubert_model.eval() | |
| def download_file(url: str, output_path: str = None): | |
| """Mengunduh file dari URL dengan progress bar.""" | |
| try: | |
| url = url.replace("/blob/", "/resolve/").replace("?download=true", "").strip() | |
| output_path = Path(output_path or os.path.basename(url)) | |
| response = requests.get(url, stream=True, timeout=300) | |
| response.raise_for_status() | |
| total_size = int(response.headers.get("content-length", 0)) | |
| with open(output_path, "wb") as f, tqdm.tqdm( | |
| desc=output_path.name, total=total_size, unit="B", unit_scale=True | |
| ) as pbar: | |
| for chunk in response.iter_content(chunk_size=10 * 1024 * 1024): | |
| f.write(chunk) | |
| pbar.update(len(chunk)) | |
| return str(output_path) | |
| except Exception as e: | |
| logging.error(f"Error mengunduh file: {e}") | |
| raise | |
| def tts( | |
| model_name: str, | |
| speed: int, | |
| tts_text: str, | |
| tts_voice: str, | |
| f0_up_key: int, | |
| index_rate: float, | |
| protect: float, | |
| filter_radius: int = 3, | |
| resample_sr: int = 0, | |
| rms_mix_rate: float = 0.25, | |
| ): | |
| """Fungsi utama untuk konversi teks ke suara.""" | |
| logging.info(f"Memulai TTS: {model_name}, teks: {tts_text[:50]}...") | |
| try: | |
| if LIMITATION and len(tts_text) > 500: | |
| return f"Teks terlalu panjang: {len(tts_text)} karakter (>500).", None, None | |
| t0 = time.time() | |
| speed_str = f"+{speed}%" if speed >= 0 else f"{speed}%" | |
| asyncio.run( | |
| edge_tts.Communicate( | |
| tts_text, "-".join(tts_voice.split("-")[:-1]), rate=speed_str | |
| ).save(EDGE_OUTPUT_FILENAME) | |
| ) | |
| edge_time = time.time() - t0 | |
| audio, sr = librosa.load(EDGE_OUTPUT_FILENAME, sr=16000, mono=True) | |
| duration = len(audio) / sr | |
| if LIMITATION and duration >= 50: | |
| return f"Audio terlalu panjang: {duration}s (>50s).", EDGE_OUTPUT_FILENAME, None | |
| tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name) | |
| vc.model_rmvpe = rmvpe_model | |
| times = [0, 0, 0] | |
| audio_opt = vc.pipeline( | |
| hubert_model, | |
| net_g, | |
| 0, | |
| audio, | |
| EDGE_OUTPUT_FILENAME, | |
| times, | |
| f0_up_key, | |
| "rmvpe", | |
| index_file, | |
| index_rate, | |
| if_f0, | |
| filter_radius, | |
| tgt_sr, | |
| resample_sr, | |
| rms_mix_rate, | |
| version, | |
| protect, | |
| None, | |
| ) | |
| tgt_sr = resample_sr if resample_sr >= 16000 else tgt_sr | |
| info = f"Berhasil. Waktu: edge-tts: {edge_time:.2f}s, npy: {times[0]:.2f}s, f0: {times[1]:.2f}s, infer: {times[2]:.2f}s" | |
| return info, EDGE_OUTPUT_FILENAME, (tgt_sr, audio_opt) | |
| except Exception as e: | |
| error_msg = f"Error: {str(e)}\n{traceback.format_exc()}" | |
| logging.error(error_msg) | |
| return error_msg, None, None | |
| # Memuat model | |
| logging.info("Memuat model Hubert...") | |
| hubert_model = load_hubert() | |
| logging.info("Memuat model RMVPE...") | |
| rmvpe_model = RMVPE("rmvpe.pt", config.is_half, config.device) | |
| import zipfile # Tambahkan impor ini di bagian atas file | |
| def download_model(url: str, model_name: str): | |
| """Mengunduh, menyimpan, dan mengekstrak model ke direktori weights.""" | |
| try: | |
| output_path = MODEL_ROOT / model_name | |
| output_path.mkdir(exist_ok=True) | |
| downloaded_file = download_file(url, output_path / "ekstrak.zip") | |
| # Ekstrak file ZIP ke direktori weights/{nama_model} | |
| with zipfile.ZipFile(downloaded_file, 'r') as zip_ref: | |
| zip_ref.extractall(output_path) | |
| logging.info(f"File ZIP diekstrak ke: {output_path}") | |
| # Hapus file ZIP setelah ekstraksi (opsional) | |
| os.remove(downloaded_file) | |
| logging.info(f"File ZIP {downloaded_file} dihapus setelah ekstraksi") | |
| return f"Model {model_name} berhasil diunduh dan diekstrak ke {output_path}" | |
| except Exception as e: | |
| logging.error(f"Error saat mengunduh atau mengekstrak model: {e}") | |
| raise | |
| # Antarmuka Gradio | |
| initial_md = """ | |
| <h1 align="center"><b>TTS RVC Indonesia ๐ต</b></h1> | |
| <p align="center">Konversi teks ke suara menggunakan Edge TTS dan RVC untuk suara artis Indonesia.</p> | |
| <p><b>Perhatian:</b> Jangan menyalahgunakan teknologi ini. <b>Limitasi:</b> Teks maks. 500 karakter, audio maks. 50 detik.</p> | |
| """ | |
| with gr.Blocks(theme="Thatguy099/Sonix", title="TTS-RVC Indonesia") as app: | |
| gr.Markdown(initial_md) | |
| with gr.Row(): | |
| model_name = gr.Dropdown(label="Model", choices=models, value=models[0]) | |
| f0_key_up = gr.Number(label="Tune (oktaf dari edge-tts)", value=2) | |
| with gr.Column(): | |
| with gr.Row(): | |
| with gr.Tab("Unduh Model"): | |
| url = gr.Textbox(label="URL Model") | |
| model_nae = gr.Textbox(label="Nama Model") | |
| dlm = gr.Button("Unduh Model") | |
| dlm.click(fn=download_model, inputs=[url, model_nae], outputs=None) | |
| index_rate = gr.Slider(minimum=0, maximum=1, label="Tingkat Indeks", value=0.5) | |
| protect0 = gr.Slider(minimum=0, maximum=0.5, label="Perlindungan", value=0.33, step=0.01) | |
| tts_voice = gr.Dropdown( | |
| label="Pembicara Edge-TTS (bahasa-Negara-Nama-Jenis Kelamin)", | |
| choices=tts_voices, | |
| value="id-ID-ArdiNeural-Male", | |
| ) | |
| speed = gr.Slider(minimum=-100, maximum=100, label="Kecepatan Bicara (%)", value=0, step=10) | |
| tts_text = gr.Textbox(label="Teks Input", value="Konversi teks ke suara dalam bahasa Indonesia.") | |
| but0 = gr.Button("Konversi", variant="primary") | |
| info_text = gr.Textbox(label="Informasi Output") | |
| with gr.Row(): | |
| edge_tts_output = gr.Audio(label="Suara Edge", type="filepath") | |
| tts_output = gr.Audio(label="Hasil") | |
| but0.click( | |
| tts, | |
| [model_name, speed, tts_text, tts_voice, f0_key_up, index_rate, protect0], | |
| [info_text, edge_tts_output, tts_output], | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["Ini adalah demo percobaan menggunakan Bahasa Indonesia untuk pria.", "id-ID-ArdiNeural-Male"], | |
| ["Ini adalah teks percobaan menggunakan Bahasa Indonesia pada wanita.", "id-ID-GadisNeural-Female"], | |
| ], | |
| inputs=[tts_text, tts_voice], | |
| ) | |
| gr.HTML(""" | |
| <footer style="text-align: center; margin-top: 20px; color:silver;"> | |
| Energi Semesta Digital ยฉ 2024 __drat. | ๐ฎ๐ฉ Untuk Indonesia Jaya! | |
| </footer> | |
| """) | |
| app.launch() |