1 | #!flask/bin/python |
2 | |
3 | # installation on Ubuntu 18: |
4 | # apt install python3-pip |
5 | # pip3 install --upgrade pip |
6 | # pip3 --version |
7 | # pip3 install TTS |
8 | # find /usr -name server.py |
9 | # wget -O /usr/local/lib/python3.6/dist-packages/TTS/server/server.py https://botcompany.de/serve/1032538 |
10 | # tts-server -h |
11 | |
12 | import argparse |
13 | import io |
14 | import json |
15 | import os |
16 | import sys |
17 | from pathlib import Path |
18 | from typing import Union |
19 | |
20 | from flask import Flask, render_template, request, send_file |
21 | |
22 | from TTS.config import load_config |
23 | from TTS.utils.manage import ModelManager |
24 | from TTS.utils.synthesizer import Synthesizer |
25 | |
26 | print("tts-server patched from https://code.botcompany.de/1032538") |
27 | |
28 | def create_argparser(): |
29 | def convert_boolean(x): |
30 | return x.lower() in ["true", "1", "yes"] |
31 | |
32 | parser = argparse.ArgumentParser() |
33 | parser.add_argument( |
34 | "--list_models", |
35 | type=convert_boolean, |
36 | nargs="?", |
37 | const=True, |
38 | default=False, |
39 | help="list available pre-trained tts and vocoder models.", |
40 | ) |
41 | parser.add_argument( |
42 | "--model_name", |
43 | type=str, |
44 | default="tts_models/en/ljspeech/tacotron2-DDC", |
45 | help="Name of one of the pre-trained tts models in format <language>/<dataset>/<model_name>", |
46 | ) |
47 | parser.add_argument("--vocoder_name", type=str, default=None, help="name of one of the released vocoder models.") |
48 | |
49 | # Args for running custom models |
50 | parser.add_argument("--config_path", default=None, type=str, help="Path to model config file.") |
51 | parser.add_argument( |
52 | "--model_path", |
53 | type=str, |
54 | default=None, |
55 | help="Path to model file.", |
56 | ) |
57 | parser.add_argument( |
58 | "--vocoder_path", |
59 | type=str, |
60 | help="Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).", |
61 | default=None, |
62 | ) |
63 | parser.add_argument("--vocoder_config_path", type=str, help="Path to vocoder model config file.", default=None) |
64 | parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None) |
65 | parser.add_argument("--port", type=int, default=5002, help="port to listen on.") |
66 | parser.add_argument("--use_cuda", type=convert_boolean, default=False, help="true to use CUDA.") |
67 | parser.add_argument("--debug", type=convert_boolean, default=False, help="true to enable Flask debug mode.") |
68 | parser.add_argument("--show_details", type=convert_boolean, default=False, help="Generate model detail page.") |
69 | return parser |
70 | |
71 | |
72 | # parse the args |
73 | args = create_argparser().parse_args() |
74 | |
75 | path = Path(__file__).parent / "../.models.json" |
76 | manager = ModelManager(path) |
77 | |
78 | if args.list_models: |
79 | manager.list_models() |
80 | sys.exit() |
81 | |
82 | # update in-use models to the specified released models. |
83 | model_path = None |
84 | config_path = None |
85 | speakers_file_path = None |
86 | vocoder_path = None |
87 | vocoder_config_path = None |
88 | |
89 | # CASE1: list pre-trained TTS models |
90 | if args.list_models: |
91 | manager.list_models() |
92 | sys.exit() |
93 | |
94 | # CASE2: load pre-trained model paths |
95 | if args.model_name is not None and not args.model_path: |
96 | model_path, config_path, model_item = manager.download_model(args.model_name) |
97 | args.vocoder_name = model_item["default_vocoder"] if args.vocoder_name is None else args.vocoder_name |
98 | |
99 | if args.vocoder_name is not None and not args.vocoder_path: |
100 | vocoder_path, vocoder_config_path, _ = manager.download_model(args.vocoder_name) |
101 | |
102 | # CASE3: set custome model paths |
103 | if args.model_path is not None: |
104 | model_path = args.model_path |
105 | config_path = args.config_path |
106 | speakers_file_path = args.speakers_file_path |
107 | |
108 | if args.vocoder_path is not None: |
109 | vocoder_path = args.vocoder_path |
110 | vocoder_config_path = args.vocoder_config_path |
111 | |
112 | # load models |
113 | synthesizer = Synthesizer( |
114 | model_path, config_path, speakers_file_path, vocoder_path, vocoder_config_path, use_cuda=args.use_cuda |
115 | ) |
116 | |
117 | use_multi_speaker = hasattr(synthesizer.tts_model, "speaker_manager") and synthesizer.tts_model.num_speakers > 1 |
118 | speaker_manager = getattr(synthesizer.tts_model, "speaker_manager", None) |
119 | # TODO: set this from SpeakerManager |
120 | use_gst = synthesizer.tts_config.get("use_gst", False) |
121 | app = Flask(__name__) |
122 | |
123 | |
124 | def style_wav_uri_to_dict(style_wav: str) -> Union[str, dict]: |
125 | """Transform an uri style_wav, in either a string (path to wav file to be use for style transfer) |
126 | or a dict (gst tokens/values to be use for styling) |
127 | |
128 | Args: |
129 | style_wav (str): uri |
130 | |
131 | Returns: |
132 | Union[str, dict]: path to file (str) or gst style (dict) |
133 | """ |
134 | if style_wav: |
135 | if os.path.isfile(style_wav) and style_wav.endswith(".wav"): |
136 | return style_wav # style_wav is a .wav file located on the server |
137 | |
138 | style_wav = json.loads(style_wav) |
139 | return style_wav # style_wav is a gst dictionary with {token1_id : token1_weigth, ...} |
140 | return None |
141 | |
142 | |
143 | @app.route("/") |
144 | def index(): |
145 | return render_template( |
146 | "index.html", |
147 | show_details=args.show_details, |
148 | use_multi_speaker=use_multi_speaker, |
149 | speaker_ids=speaker_manager.speaker_ids if speaker_manager is not None else None, |
150 | use_gst=use_gst, |
151 | ) |
152 | |
153 | |
154 | @app.route("/details") |
155 | def details(): |
156 | model_config = load_config(args.tts_config) |
157 | if args.vocoder_config is not None and os.path.isfile(args.vocoder_config): |
158 | vocoder_config = load_config(args.vocoder_config) |
159 | else: |
160 | vocoder_config = None |
161 | |
162 | return render_template( |
163 | "details.html", |
164 | show_details=args.show_details, |
165 | model_config=model_config, |
166 | vocoder_config=vocoder_config, |
167 | args=args.__dict__, |
168 | ) |
169 | |
170 | |
171 | @app.route("/api/tts", methods=["GET", "POST"]) |
172 | def tts(): |
173 | text = request.values.get("text") |
174 | speaker_idx = request.values.get("speaker_id", "") |
175 | style_wav = request.values.get("style_wav", "") |
176 | |
177 | style_wav = style_wav_uri_to_dict(style_wav) |
178 | print(" > Model input: {}".format(text)) |
179 | wavs = synthesizer.tts(text, speaker_idx=speaker_idx, style_wav=style_wav) |
180 | out = io.BytesIO() |
181 | synthesizer.save_wav(wavs, out) |
182 | return send_file(out, mimetype="audio/wav") |
183 | |
184 | |
185 | def main(): |
186 | app.run(debug=args.debug, port=args.port) |
187 | |
188 | |
189 | if __name__ == "__main__": |
190 | main() |
Travelled to 2 computer(s): bhatertpkbcr, mqqgnosmbjvj
No comments. add comment
Snippet ID: | #1032538 |
Snippet name: | coqai tts server.py |
Eternal ID of this version: | #1032538/5 |
Text MD5: | 8dfaf37259242ad1492a5f085f367ed8 |
Author: | stefan |
Category: | javax |
Type: | Document |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2021-09-21 16:15:18 |
Source code size: | 6410 bytes / 190 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 218 / 118 |
Version history: | 4 change(s) |
Referenced in: | [show references] |