Spaces:
Runtime error
Runtime error
tts
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +39 -44
- examples/driven_audio/bus_chinese.wav +0 -0
- examples/source_image/full_body_1.png +0 -0
- examples/source_image/full_body_2.png +0 -0
- examples/source_image/happy.png +0 -0
- examples/source_image/happy1.png +0 -0
- examples/source_image/people_0.png +0 -0
- examples/source_image/sad.png +0 -0
- examples/source_image/sad1.png +0 -0
- modules/__pycache__/sadtalker_test.cpython-38.pyc +0 -0
- src/__pycache__/generate_batch.cpython-38.pyc +0 -0
- src/__pycache__/generate_facerender_batch.cpython-38.pyc +0 -0
- src/__pycache__/test_audio2coeff.cpython-38.pyc +0 -0
- src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc +0 -0
- src/audio2exp_models/__pycache__/networks.cpython-38.pyc +0 -0
- src/audio2exp_models/audio2exp.py +2 -1
- src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc +0 -0
- src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc +0 -0
- src/audio2pose_models/__pycache__/cvae.cpython-38.pyc +0 -0
- src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc +0 -0
- src/audio2pose_models/__pycache__/networks.cpython-38.pyc +0 -0
- src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc +0 -0
- src/audio2pose_models/audio2pose.py +5 -5
- src/audio2pose_models/audio_encoder.py +3 -3
- src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc +0 -0
- src/face3d/__pycache__/visualize.cpython-38.pyc +0 -0
- src/face3d/models/__pycache__/__init__.cpython-38.pyc +0 -0
- src/face3d/models/__pycache__/base_model.cpython-38.pyc +0 -0
- src/face3d/models/__pycache__/bfm.cpython-38.pyc +0 -0
- src/face3d/models/__pycache__/facerecon_model.cpython-38.pyc +0 -0
- src/face3d/models/__pycache__/losses.cpython-38.pyc +0 -0
- src/face3d/models/__pycache__/networks.cpython-38.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-36.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-37.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-39.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-36.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-37.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-39.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-36.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-37.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-39.pyc +0 -0
- src/face3d/util/__pycache__/__init__.cpython-38.pyc +0 -0
- src/face3d/util/__pycache__/load_mats.cpython-38.pyc +0 -0
- src/face3d/util/__pycache__/nvdiffrast.cpython-38.pyc +0 -0
- src/face3d/util/__pycache__/preprocess.cpython-38.pyc +0 -0
- src/face3d/util/__pycache__/util.cpython-38.pyc +0 -0
- src/facerender/__pycache__/animate.cpython-38.pyc +0 -0
app.py
CHANGED
|
@@ -1,90 +1,88 @@
|
|
| 1 |
import os, sys
|
| 2 |
import tempfile
|
| 3 |
import gradio as gr
|
| 4 |
-
from
|
| 5 |
-
from
|
| 6 |
-
|
| 7 |
-
def get_driven_audio(audio):
|
| 8 |
-
if os.path.isfile(audio):
|
| 9 |
-
return audio
|
| 10 |
-
else:
|
| 11 |
-
save_path = tempfile.NamedTemporaryFile(
|
| 12 |
-
delete=False,
|
| 13 |
-
suffix=("." + "wav"),
|
| 14 |
-
)
|
| 15 |
-
gen_audio = text2speech(audio, save_path.name)
|
| 16 |
-
return gen_audio, gen_audio
|
| 17 |
|
| 18 |
def get_source_image(image):
|
| 19 |
return image
|
| 20 |
|
| 21 |
-
|
|
|
|
|
|
|
| 22 |
|
| 23 |
sad_talker = SadTalker()
|
|
|
|
|
|
|
| 24 |
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
|
| 25 |
-
gr.Markdown("<div align='center'> <
|
| 26 |
<a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> \
|
| 27 |
<a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> \
|
| 28 |
-
<a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </
|
| 29 |
|
| 30 |
-
with gr.Row():
|
| 31 |
with gr.Column(variant='panel'):
|
| 32 |
with gr.Tabs(elem_id="sadtalker_source_image"):
|
| 33 |
with gr.TabItem('Upload image'):
|
| 34 |
with gr.Row():
|
| 35 |
-
source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256)
|
| 36 |
|
| 37 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
| 38 |
-
with gr.TabItem('Upload
|
| 39 |
with gr.Column(variant='panel'):
|
| 40 |
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
with gr.Column(variant='panel'):
|
| 43 |
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
| 44 |
with gr.TabItem('Settings'):
|
| 45 |
with gr.Column(variant='panel'):
|
| 46 |
-
is_still_mode = gr.Checkbox(label="Still Mode (fewer
|
| 47 |
-
|
| 48 |
-
is_enhance_mode = gr.Checkbox(label="Enhance Mode (better face quality )").style(container=True)
|
| 49 |
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
|
| 50 |
|
| 51 |
with gr.Tabs(elem_id="sadtalker_genearted"):
|
| 52 |
gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
|
| 53 |
-
|
| 54 |
-
|
| 55 |
with gr.Row():
|
| 56 |
examples = [
|
| 57 |
[
|
| 58 |
-
'examples/source_image/
|
| 59 |
-
'examples/driven_audio/
|
| 60 |
True,
|
| 61 |
-
False,
|
| 62 |
False
|
| 63 |
],
|
| 64 |
[
|
| 65 |
-
'examples/source_image/
|
| 66 |
-
'examples/driven_audio/
|
| 67 |
True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
True,
|
| 69 |
False
|
| 70 |
],
|
| 71 |
[
|
| 72 |
-
'examples/source_image/
|
| 73 |
-
'examples/driven_audio/
|
| 74 |
True,
|
| 75 |
-
False
|
| 76 |
-
|
| 77 |
-
]
|
| 78 |
]
|
| 79 |
gr.Examples(examples=examples,
|
| 80 |
inputs=[
|
| 81 |
source_image,
|
| 82 |
driven_audio,
|
| 83 |
is_still_mode,
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
gr.Textbox(value=result_dir, visible=False)],
|
| 87 |
-
outputs=[gen_video, gen_text],
|
| 88 |
fn=sad_talker.test,
|
| 89 |
cache_examples=os.getenv('SYSTEM') == 'spaces')
|
| 90 |
|
|
@@ -93,10 +91,8 @@ def sadtalker_demo(result_dir='./tmp/'):
|
|
| 93 |
inputs=[source_image,
|
| 94 |
driven_audio,
|
| 95 |
is_still_mode,
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
gr.Textbox(value=result_dir, visible=False)],
|
| 99 |
-
outputs=[gen_video, gen_text]
|
| 100 |
)
|
| 101 |
|
| 102 |
return sadtalker_interface
|
|
@@ -104,8 +100,7 @@ def sadtalker_demo(result_dir='./tmp/'):
|
|
| 104 |
|
| 105 |
if __name__ == "__main__":
|
| 106 |
|
| 107 |
-
|
| 108 |
-
demo = sadtalker_demo(sadtalker_result_dir)
|
| 109 |
demo.launch()
|
| 110 |
|
| 111 |
|
|
|
|
| 1 |
import os, sys
|
| 2 |
import tempfile
|
| 3 |
import gradio as gr
|
| 4 |
+
from src.gradio_demo import SadTalker
|
| 5 |
+
from src.utils.text2speech import TTSTalker
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
def get_source_image(image):
|
| 8 |
return image
|
| 9 |
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def sadtalker_demo():
|
| 13 |
|
| 14 |
sad_talker = SadTalker()
|
| 15 |
+
tts_talker = TTSTalker()
|
| 16 |
+
|
| 17 |
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
|
| 18 |
+
gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
|
| 19 |
<a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> \
|
| 20 |
<a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> \
|
| 21 |
+
<a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
|
| 22 |
|
| 23 |
+
with gr.Row().style(equal_height=False):
|
| 24 |
with gr.Column(variant='panel'):
|
| 25 |
with gr.Tabs(elem_id="sadtalker_source_image"):
|
| 26 |
with gr.TabItem('Upload image'):
|
| 27 |
with gr.Row():
|
| 28 |
+
source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256)
|
| 29 |
|
| 30 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
| 31 |
+
with gr.TabItem('Upload OR TTS'):
|
| 32 |
with gr.Column(variant='panel'):
|
| 33 |
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
| 34 |
+
|
| 35 |
+
with gr.Column(variant='panel'):
|
| 36 |
+
input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="Alternatively, you can genreate the audio from text using @Coqui.ai TTS.")
|
| 37 |
+
tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
|
| 38 |
+
tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
|
| 39 |
+
|
| 40 |
|
| 41 |
with gr.Column(variant='panel'):
|
| 42 |
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
| 43 |
with gr.TabItem('Settings'):
|
| 44 |
with gr.Column(variant='panel'):
|
| 45 |
+
is_still_mode = gr.Checkbox(label="w/ Still Mode (fewer hand motion, works on full body)")
|
| 46 |
+
enhancer = gr.Checkbox(label="w/ GFPGAN as Face enhancer")
|
|
|
|
| 47 |
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
|
| 48 |
|
| 49 |
with gr.Tabs(elem_id="sadtalker_genearted"):
|
| 50 |
gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
|
| 51 |
+
|
|
|
|
| 52 |
with gr.Row():
|
| 53 |
examples = [
|
| 54 |
[
|
| 55 |
+
'examples/source_image/full_body_1.png',
|
| 56 |
+
'examples/driven_audio/bus_chinese.wav',
|
| 57 |
True,
|
|
|
|
| 58 |
False
|
| 59 |
],
|
| 60 |
[
|
| 61 |
+
'examples/source_image/full_body_2.png',
|
| 62 |
+
'examples/driven_audio/itosinger1.wav',
|
| 63 |
True,
|
| 64 |
+
False
|
| 65 |
+
],
|
| 66 |
+
[
|
| 67 |
+
'examples/source_image/art_13.png',
|
| 68 |
+
'examples/driven_audio/fayu.wav',
|
| 69 |
True,
|
| 70 |
False
|
| 71 |
],
|
| 72 |
[
|
| 73 |
+
'examples/source_image/art_5.png',
|
| 74 |
+
'examples/driven_audio/chinese_news.wav',
|
| 75 |
True,
|
| 76 |
+
False
|
| 77 |
+
],
|
|
|
|
| 78 |
]
|
| 79 |
gr.Examples(examples=examples,
|
| 80 |
inputs=[
|
| 81 |
source_image,
|
| 82 |
driven_audio,
|
| 83 |
is_still_mode,
|
| 84 |
+
enhancer],
|
| 85 |
+
outputs=[gen_video],
|
|
|
|
|
|
|
| 86 |
fn=sad_talker.test,
|
| 87 |
cache_examples=os.getenv('SYSTEM') == 'spaces')
|
| 88 |
|
|
|
|
| 91 |
inputs=[source_image,
|
| 92 |
driven_audio,
|
| 93 |
is_still_mode,
|
| 94 |
+
enhancer],
|
| 95 |
+
outputs=[gen_video]
|
|
|
|
|
|
|
| 96 |
)
|
| 97 |
|
| 98 |
return sadtalker_interface
|
|
|
|
| 100 |
|
| 101 |
if __name__ == "__main__":
|
| 102 |
|
| 103 |
+
demo = sadtalker_demo()
|
|
|
|
| 104 |
demo.launch()
|
| 105 |
|
| 106 |
|
examples/driven_audio/bus_chinese.wav
ADDED
|
Binary file (652 kB). View file
|
|
|
examples/source_image/full_body_1.png
ADDED
|
examples/source_image/full_body_2.png
ADDED
|
examples/source_image/happy.png
ADDED
|
examples/source_image/happy1.png
ADDED
|
examples/source_image/people_0.png
ADDED
|
examples/source_image/sad.png
ADDED
|
examples/source_image/sad1.png
ADDED
|
modules/__pycache__/sadtalker_test.cpython-38.pyc
CHANGED
|
Binary files a/modules/__pycache__/sadtalker_test.cpython-38.pyc and b/modules/__pycache__/sadtalker_test.cpython-38.pyc differ
|
|
|
src/__pycache__/generate_batch.cpython-38.pyc
CHANGED
|
Binary files a/src/__pycache__/generate_batch.cpython-38.pyc and b/src/__pycache__/generate_batch.cpython-38.pyc differ
|
|
|
src/__pycache__/generate_facerender_batch.cpython-38.pyc
CHANGED
|
Binary files a/src/__pycache__/generate_facerender_batch.cpython-38.pyc and b/src/__pycache__/generate_facerender_batch.cpython-38.pyc differ
|
|
|
src/__pycache__/test_audio2coeff.cpython-38.pyc
CHANGED
|
Binary files a/src/__pycache__/test_audio2coeff.cpython-38.pyc and b/src/__pycache__/test_audio2coeff.cpython-38.pyc differ
|
|
|
src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc
CHANGED
|
Binary files a/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc and b/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc differ
|
|
|
src/audio2exp_models/__pycache__/networks.cpython-38.pyc
CHANGED
|
Binary files a/src/audio2exp_models/__pycache__/networks.cpython-38.pyc and b/src/audio2exp_models/__pycache__/networks.cpython-38.pyc differ
|
|
|
src/audio2exp_models/audio2exp.py
CHANGED
|
@@ -22,7 +22,8 @@ class Audio2Exp(nn.Module):
|
|
| 22 |
|
| 23 |
current_mel_input = mel_input[:,i:i+10]
|
| 24 |
|
| 25 |
-
ref = batch['ref'][:, :, :64].repeat((1,current_mel_input.shape[1],1)) #bs T 64
|
|
|
|
| 26 |
ratio = batch['ratio_gt'][:, i:i+10] #bs T
|
| 27 |
|
| 28 |
audiox = current_mel_input.view(-1, 1, 80, 16) # bs*T 1 80 16
|
|
|
|
| 22 |
|
| 23 |
current_mel_input = mel_input[:,i:i+10]
|
| 24 |
|
| 25 |
+
#ref = batch['ref'][:, :, :64].repeat((1,current_mel_input.shape[1],1)) #bs T 64
|
| 26 |
+
ref = batch['ref'][:, :, :64][:, i:i+10]
|
| 27 |
ratio = batch['ratio_gt'][:, i:i+10] #bs T
|
| 28 |
|
| 29 |
audiox = current_mel_input.view(-1, 1, 80, 16) # bs*T 1 80 16
|
src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc
CHANGED
|
Binary files a/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc and b/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc differ
|
|
|
src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc
CHANGED
|
Binary files a/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc and b/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc differ
|
|
|
src/audio2pose_models/__pycache__/cvae.cpython-38.pyc
CHANGED
|
Binary files a/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc and b/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc differ
|
|
|
src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc
CHANGED
|
Binary files a/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc and b/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc differ
|
|
|
src/audio2pose_models/__pycache__/networks.cpython-38.pyc
CHANGED
|
Binary files a/src/audio2pose_models/__pycache__/networks.cpython-38.pyc and b/src/audio2pose_models/__pycache__/networks.cpython-38.pyc differ
|
|
|
src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc
CHANGED
|
Binary files a/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc and b/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc differ
|
|
|
src/audio2pose_models/audio2pose.py
CHANGED
|
@@ -12,7 +12,7 @@ class Audio2Pose(nn.Module):
|
|
| 12 |
self.latent_dim = cfg.MODEL.CVAE.LATENT_SIZE
|
| 13 |
self.device = device
|
| 14 |
|
| 15 |
-
self.audio_encoder = AudioEncoder(wav2lip_checkpoint)
|
| 16 |
self.audio_encoder.eval()
|
| 17 |
for param in self.audio_encoder.parameters():
|
| 18 |
param.requires_grad = False
|
|
@@ -20,10 +20,6 @@ class Audio2Pose(nn.Module):
|
|
| 20 |
self.netG = CVAE(cfg)
|
| 21 |
self.netD_motion = PoseSequenceDiscriminator(cfg)
|
| 22 |
|
| 23 |
-
self.gan_criterion = nn.MSELoss()
|
| 24 |
-
self.reg_criterion = nn.L1Loss(reduction='none')
|
| 25 |
-
self.pair_criterion = nn.PairwiseDistance()
|
| 26 |
-
self.cosine_loss = nn.CosineSimilarity(dim=1)
|
| 27 |
|
| 28 |
def forward(self, x):
|
| 29 |
|
|
@@ -81,6 +77,10 @@ class Audio2Pose(nn.Module):
|
|
| 81 |
z = torch.randn(bs, self.latent_dim).to(ref.device)
|
| 82 |
batch['z'] = z
|
| 83 |
audio_emb = self.audio_encoder(indiv_mels_use[:, -1*self.seq_len:,:,:,:]) #bs seq_len 512
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
batch['audio_emb'] = audio_emb
|
| 85 |
batch = self.netG.test(batch)
|
| 86 |
pose_motion_pred_list.append(batch['pose_motion_pred'][:,-1*re:,:])
|
|
|
|
| 12 |
self.latent_dim = cfg.MODEL.CVAE.LATENT_SIZE
|
| 13 |
self.device = device
|
| 14 |
|
| 15 |
+
self.audio_encoder = AudioEncoder(wav2lip_checkpoint, device)
|
| 16 |
self.audio_encoder.eval()
|
| 17 |
for param in self.audio_encoder.parameters():
|
| 18 |
param.requires_grad = False
|
|
|
|
| 20 |
self.netG = CVAE(cfg)
|
| 21 |
self.netD_motion = PoseSequenceDiscriminator(cfg)
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
def forward(self, x):
|
| 25 |
|
|
|
|
| 77 |
z = torch.randn(bs, self.latent_dim).to(ref.device)
|
| 78 |
batch['z'] = z
|
| 79 |
audio_emb = self.audio_encoder(indiv_mels_use[:, -1*self.seq_len:,:,:,:]) #bs seq_len 512
|
| 80 |
+
if audio_emb.shape[1] != self.seq_len:
|
| 81 |
+
pad_dim = self.seq_len-audio_emb.shape[1]
|
| 82 |
+
pad_audio_emb = audio_emb[:, :1].repeat(1, pad_dim, 1)
|
| 83 |
+
audio_emb = torch.cat([pad_audio_emb, audio_emb], 1)
|
| 84 |
batch['audio_emb'] = audio_emb
|
| 85 |
batch = self.netG.test(batch)
|
| 86 |
pose_motion_pred_list.append(batch['pose_motion_pred'][:,-1*re:,:])
|
src/audio2pose_models/audio_encoder.py
CHANGED
|
@@ -19,7 +19,7 @@ class Conv2d(nn.Module):
|
|
| 19 |
return self.act(out)
|
| 20 |
|
| 21 |
class AudioEncoder(nn.Module):
|
| 22 |
-
def __init__(self, wav2lip_checkpoint):
|
| 23 |
super(AudioEncoder, self).__init__()
|
| 24 |
|
| 25 |
self.audio_encoder = nn.Sequential(
|
|
@@ -41,8 +41,8 @@ class AudioEncoder(nn.Module):
|
|
| 41 |
Conv2d(256, 512, kernel_size=3, stride=1, padding=0),
|
| 42 |
Conv2d(512, 512, kernel_size=1, stride=1, padding=0),)
|
| 43 |
|
| 44 |
-
#### load the pre-trained audio_encoder
|
| 45 |
-
wav2lip_state_dict = torch.load(wav2lip_checkpoint)['state_dict']
|
| 46 |
state_dict = self.audio_encoder.state_dict()
|
| 47 |
|
| 48 |
for k,v in wav2lip_state_dict.items():
|
|
|
|
| 19 |
return self.act(out)
|
| 20 |
|
| 21 |
class AudioEncoder(nn.Module):
|
| 22 |
+
def __init__(self, wav2lip_checkpoint, device):
|
| 23 |
super(AudioEncoder, self).__init__()
|
| 24 |
|
| 25 |
self.audio_encoder = nn.Sequential(
|
|
|
|
| 41 |
Conv2d(256, 512, kernel_size=3, stride=1, padding=0),
|
| 42 |
Conv2d(512, 512, kernel_size=1, stride=1, padding=0),)
|
| 43 |
|
| 44 |
+
#### load the pre-trained audio_encoder
|
| 45 |
+
wav2lip_state_dict = torch.load(wav2lip_checkpoint, map_location=torch.device(device))['state_dict']
|
| 46 |
state_dict = self.audio_encoder.state_dict()
|
| 47 |
|
| 48 |
for k,v in wav2lip_state_dict.items():
|
src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc
CHANGED
|
Binary files a/src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc and b/src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc differ
|
|
|
src/face3d/__pycache__/visualize.cpython-38.pyc
DELETED
|
Binary file (1.7 kB)
|
|
|
src/face3d/models/__pycache__/__init__.cpython-38.pyc
CHANGED
|
Binary files a/src/face3d/models/__pycache__/__init__.cpython-38.pyc and b/src/face3d/models/__pycache__/__init__.cpython-38.pyc differ
|
|
|
src/face3d/models/__pycache__/base_model.cpython-38.pyc
CHANGED
|
Binary files a/src/face3d/models/__pycache__/base_model.cpython-38.pyc and b/src/face3d/models/__pycache__/base_model.cpython-38.pyc differ
|
|
|
src/face3d/models/__pycache__/bfm.cpython-38.pyc
DELETED
|
Binary file (10.2 kB)
|
|
|
src/face3d/models/__pycache__/facerecon_model.cpython-38.pyc
DELETED
|
Binary file (8.45 kB)
|
|
|
src/face3d/models/__pycache__/losses.cpython-38.pyc
DELETED
|
Binary file (4.23 kB)
|
|
|
src/face3d/models/__pycache__/networks.cpython-38.pyc
CHANGED
|
Binary files a/src/face3d/models/__pycache__/networks.cpython-38.pyc and b/src/face3d/models/__pycache__/networks.cpython-38.pyc differ
|
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-36.pyc
DELETED
|
Binary file (857 Bytes)
|
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-37.pyc
DELETED
|
Binary file (842 Bytes)
|
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc
CHANGED
|
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc and b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc differ
|
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-39.pyc
DELETED
|
Binary file (908 Bytes)
|
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-36.pyc
DELETED
|
Binary file (5.41 kB)
|
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-37.pyc
DELETED
|
Binary file (5.39 kB)
|
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc
CHANGED
|
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc and b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc differ
|
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-39.pyc
DELETED
|
Binary file (5.53 kB)
|
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-36.pyc
DELETED
|
Binary file (6.11 kB)
|
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-37.pyc
DELETED
|
Binary file (5.67 kB)
|
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc
CHANGED
|
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc and b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc differ
|
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-39.pyc
DELETED
|
Binary file (5.54 kB)
|
|
|
src/face3d/util/__pycache__/__init__.cpython-38.pyc
CHANGED
|
Binary files a/src/face3d/util/__pycache__/__init__.cpython-38.pyc and b/src/face3d/util/__pycache__/__init__.cpython-38.pyc differ
|
|
|
src/face3d/util/__pycache__/load_mats.cpython-38.pyc
CHANGED
|
Binary files a/src/face3d/util/__pycache__/load_mats.cpython-38.pyc and b/src/face3d/util/__pycache__/load_mats.cpython-38.pyc differ
|
|
|
src/face3d/util/__pycache__/nvdiffrast.cpython-38.pyc
DELETED
|
Binary file (2.99 kB)
|
|
|
src/face3d/util/__pycache__/preprocess.cpython-38.pyc
CHANGED
|
Binary files a/src/face3d/util/__pycache__/preprocess.cpython-38.pyc and b/src/face3d/util/__pycache__/preprocess.cpython-38.pyc differ
|
|
|
src/face3d/util/__pycache__/util.cpython-38.pyc
DELETED
|
Binary file (6.49 kB)
|
|
|
src/facerender/__pycache__/animate.cpython-38.pyc
CHANGED
|
Binary files a/src/facerender/__pycache__/animate.cpython-38.pyc and b/src/facerender/__pycache__/animate.cpython-38.pyc differ
|
|
|