Spaces:
Runtime error
Runtime error
Hendrik Schroeter
commited on
use logger
Browse files- .flake8 +17 -0
- app.py +11 -15
- pyproject.toml +10 -0
.flake8
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[flake8]
|
| 2 |
+
ignore = E203, E266, E501, W503
|
| 3 |
+
max-line-length = 100
|
| 4 |
+
import-order-style = google
|
| 5 |
+
application-import-names = flake8
|
| 6 |
+
select = B,C,E,F,W,T4,B9
|
| 7 |
+
exclude =
|
| 8 |
+
.tox,
|
| 9 |
+
.git,
|
| 10 |
+
__pycache__,
|
| 11 |
+
docs,
|
| 12 |
+
sbatch,
|
| 13 |
+
.venv,
|
| 14 |
+
*.pyc,
|
| 15 |
+
*.egg-info,
|
| 16 |
+
.cache,
|
| 17 |
+
.eggs
|
app.py
CHANGED
|
@@ -8,6 +8,8 @@ import markdown
|
|
| 8 |
import matplotlib.pyplot as plt
|
| 9 |
import numpy as np
|
| 10 |
import torch
|
|
|
|
|
|
|
| 11 |
from df import config
|
| 12 |
from df.enhance import enhance, init_df, load_audio, save_audio
|
| 13 |
from df.utils import resample
|
|
@@ -55,14 +57,13 @@ def mix_at_snr(clean, noise, snr, eps=1e-10):
|
|
| 55 |
|
| 56 |
def mix_and_denoise(speech_rec, speech_upl, noise, snr):
|
| 57 |
sr = config("sr", 48000, int, section="df")
|
| 58 |
-
|
| 59 |
if noise is None:
|
| 60 |
noise = "samples/dkitchen.wav"
|
| 61 |
sp_kwargs = {}
|
| 62 |
if speech_rec is None or "none" in speech_rec:
|
| 63 |
speech_file = "samples/p232_013_clean.wav"
|
| 64 |
if speech_upl is not None and "none" not in speech_upl:
|
| 65 |
-
print("using speech_upl")
|
| 66 |
speech_file = speech_upl
|
| 67 |
else:
|
| 68 |
speech_file = speech_rec
|
|
@@ -70,7 +71,7 @@ def mix_and_denoise(speech_rec, speech_upl, noise, snr):
|
|
| 70 |
try:
|
| 71 |
speech, meta = load_audio(speech_file, sr, **sp_kwargs)
|
| 72 |
except RuntimeError as e:
|
| 73 |
-
|
| 74 |
import os
|
| 75 |
|
| 76 |
print(os.path.getsize(speech_file))
|
|
@@ -78,16 +79,16 @@ def mix_and_denoise(speech_rec, speech_upl, noise, snr):
|
|
| 78 |
print(os.path.getctime(speech_file))
|
| 79 |
raise e
|
| 80 |
|
| 81 |
-
|
| 82 |
noise, _ = load_audio(noise, sr)
|
| 83 |
if meta.sample_rate != sr:
|
| 84 |
# Low pass filter by resampling
|
| 85 |
noise = resample(resample(noise, sr, meta.sample_rate), meta.sample_rate, sr)
|
| 86 |
-
|
| 87 |
speech, noise, noisy = mix_at_snr(speech, noise, snr)
|
| 88 |
-
|
| 89 |
enhanced = enhance(model, df, noisy)
|
| 90 |
-
|
| 91 |
lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
|
| 92 |
lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
|
| 93 |
enhanced = enhanced * lim
|
|
@@ -99,7 +100,7 @@ def mix_and_denoise(speech_rec, speech_upl, noise, snr):
|
|
| 99 |
save_audio(noisy_fn, noisy, sr)
|
| 100 |
enhanced_fn = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
|
| 101 |
save_audio(enhanced_fn, enhanced, sr)
|
| 102 |
-
|
| 103 |
return (
|
| 104 |
noisy_fn,
|
| 105 |
spec_figure(noisy, sr=sr),
|
|
@@ -198,10 +199,7 @@ def spec_figure(
|
|
| 198 |
ckwargs = {}
|
| 199 |
if "ax" in kwargs:
|
| 200 |
if colorbar_format is None:
|
| 201 |
-
if (
|
| 202 |
-
kwargs.get("vmin", None) is not None
|
| 203 |
-
or kwargs.get("vmax", None) is not None
|
| 204 |
-
):
|
| 205 |
colorbar_format = "%+2.0f dB"
|
| 206 |
ckwargs = {"ax": kwargs["ax"]}
|
| 207 |
plt.colorbar(im, format=colorbar_format, **ckwargs)
|
|
@@ -248,9 +246,7 @@ outputs = [
|
|
| 248 |
gradio.outputs.Audio(label="Enhanced"),
|
| 249 |
gradio.outputs.Image(type="plot"),
|
| 250 |
]
|
| 251 |
-
description =
|
| 252 |
-
"This demo denoises audio files using DeepFilterNet. Try it with your own voice!"
|
| 253 |
-
)
|
| 254 |
iface = gradio.Interface(
|
| 255 |
fn=mix_and_denoise,
|
| 256 |
title="DeepFilterNet Demo",
|
|
|
|
| 8 |
import matplotlib.pyplot as plt
|
| 9 |
import numpy as np
|
| 10 |
import torch
|
| 11 |
+
from loguru import logger
|
| 12 |
+
|
| 13 |
from df import config
|
| 14 |
from df.enhance import enhance, init_df, load_audio, save_audio
|
| 15 |
from df.utils import resample
|
|
|
|
| 57 |
|
| 58 |
def mix_and_denoise(speech_rec, speech_upl, noise, snr):
|
| 59 |
sr = config("sr", 48000, int, section="df")
|
| 60 |
+
logger.info(f"Got parameters speech_rec: {speech_rec}, speech_upl: {speech_upl}, noise: {noise}, snr: {snr}")
|
| 61 |
if noise is None:
|
| 62 |
noise = "samples/dkitchen.wav"
|
| 63 |
sp_kwargs = {}
|
| 64 |
if speech_rec is None or "none" in speech_rec:
|
| 65 |
speech_file = "samples/p232_013_clean.wav"
|
| 66 |
if speech_upl is not None and "none" not in speech_upl:
|
|
|
|
| 67 |
speech_file = speech_upl
|
| 68 |
else:
|
| 69 |
speech_file = speech_rec
|
|
|
|
| 71 |
try:
|
| 72 |
speech, meta = load_audio(speech_file, sr, **sp_kwargs)
|
| 73 |
except RuntimeError as e:
|
| 74 |
+
logger.error("Could not load audio: " + str(e))
|
| 75 |
import os
|
| 76 |
|
| 77 |
print(os.path.getsize(speech_file))
|
|
|
|
| 79 |
print(os.path.getctime(speech_file))
|
| 80 |
raise e
|
| 81 |
|
| 82 |
+
logger.info(f"Loaded speech with shape {speech.shape}")
|
| 83 |
noise, _ = load_audio(noise, sr)
|
| 84 |
if meta.sample_rate != sr:
|
| 85 |
# Low pass filter by resampling
|
| 86 |
noise = resample(resample(noise, sr, meta.sample_rate), meta.sample_rate, sr)
|
| 87 |
+
logger.info(f"Loaded noise with shape {noise.shape}")
|
| 88 |
speech, noise, noisy = mix_at_snr(speech, noise, snr)
|
| 89 |
+
logger.info("Start denoising audio")
|
| 90 |
enhanced = enhance(model, df, noisy)
|
| 91 |
+
logger.info("Denoising finished")
|
| 92 |
lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
|
| 93 |
lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
|
| 94 |
enhanced = enhanced * lim
|
|
|
|
| 100 |
save_audio(noisy_fn, noisy, sr)
|
| 101 |
enhanced_fn = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
|
| 102 |
save_audio(enhanced_fn, enhanced, sr)
|
| 103 |
+
logger.info(f"saved audios: {noisy_fn}, {enhanced_fn}")
|
| 104 |
return (
|
| 105 |
noisy_fn,
|
| 106 |
spec_figure(noisy, sr=sr),
|
|
|
|
| 199 |
ckwargs = {}
|
| 200 |
if "ax" in kwargs:
|
| 201 |
if colorbar_format is None:
|
| 202 |
+
if kwargs.get("vmin", None) is not None or kwargs.get("vmax", None) is not None:
|
|
|
|
|
|
|
|
|
|
| 203 |
colorbar_format = "%+2.0f dB"
|
| 204 |
ckwargs = {"ax": kwargs["ax"]}
|
| 205 |
plt.colorbar(im, format=colorbar_format, **ckwargs)
|
|
|
|
| 246 |
gradio.outputs.Audio(label="Enhanced"),
|
| 247 |
gradio.outputs.Image(type="plot"),
|
| 248 |
]
|
| 249 |
+
description = "This demo denoises audio files using DeepFilterNet. Try it with your own voice!"
|
|
|
|
|
|
|
| 250 |
iface = gradio.Interface(
|
| 251 |
fn=mix_and_denoise,
|
| 252 |
title="DeepFilterNet Demo",
|
pyproject.toml
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[tool.black]
|
| 2 |
+
line-length = 100
|
| 3 |
+
target-version = ["py37", "py38", "py39", "py310"]
|
| 4 |
+
include = '\.pyi?$'
|
| 5 |
+
|
| 6 |
+
[tool.isort]
|
| 7 |
+
profile = "black"
|
| 8 |
+
line_length = 100
|
| 9 |
+
skip_gitignore = true
|
| 10 |
+
known_first_party = ["df", "libdf", "libdfdata"]
|