Spaces:
Sleeping
Sleeping
Update function.py
Browse files- function.py +1 -44
function.py
CHANGED
|
@@ -1,15 +1,10 @@
|
|
| 1 |
from transformers import pipeline
|
| 2 |
import torch
|
| 3 |
from datasets import load_dataset
|
| 4 |
-
import soundfile as sf
|
| 5 |
from transformers import AutoTokenizer, AutoModelForCausalLM, VitsModel
|
| 6 |
|
| 7 |
-
#tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
|
| 8 |
-
#model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
# function part
|
| 13 |
# img2text
|
| 14 |
def img2text(url):
|
| 15 |
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
|
|
@@ -60,44 +55,6 @@ def text2story(text):
|
|
| 60 |
return story_text
|
| 61 |
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
# Define the text-to-story function using T5-small
|
| 66 |
-
#def text2story(text):
|
| 67 |
-
# # Load T5-small model for text-to-text generation
|
| 68 |
-
# generator = pipeline ("text-generation", model="pranavpsv/genre-story-generator-v2")
|
| 69 |
-
# story_text = generator (text) [0] ['generated_text']
|
| 70 |
-
# return story_text
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
# text2story
|
| 74 |
-
#def text2story(text):
|
| 75 |
-
# story_text = "abcde" # to be completed
|
| 76 |
-
# return story_text
|
| 77 |
-
|
| 78 |
-
# text2audio
|
| 79 |
-
#def text2audio(story_text):
|
| 80 |
-
# 加载 TTS 模型
|
| 81 |
-
# synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
| 82 |
-
|
| 83 |
-
# 加载 speaker embeddings 数据集
|
| 84 |
-
# embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
| 85 |
-
|
| 86 |
-
# 选择某个 speaker 的 xvector 作为嵌入向量(可以修改索引来选择其他说话人)
|
| 87 |
-
# speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
| 88 |
-
|
| 89 |
-
# 使用生成的文本和 speaker_embedding 生成语音
|
| 90 |
-
# speech = synthesiser(story_text, forward_params={"speaker_embeddings": speaker_embedding})
|
| 91 |
-
|
| 92 |
-
# 保存音频为 wav 文件
|
| 93 |
-
# sf.write("story_audio.wav", speech["audio"], samplerate=speech["sampling_rate"])
|
| 94 |
-
|
| 95 |
-
# 返回音频文件路径(如果需要的话,可以返回 audio 数据)
|
| 96 |
-
# return "story_audio.wav"
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
def text2audio(story_text):
|
| 102 |
from transformers import VitsModel, AutoTokenizer
|
| 103 |
import torch
|
|
|
|
| 1 |
from transformers import pipeline
|
| 2 |
import torch
|
| 3 |
from datasets import load_dataset
|
| 4 |
+
import soundfile as sf
|
| 5 |
from transformers import AutoTokenizer, AutoModelForCausalLM, VitsModel
|
| 6 |
|
|
|
|
|
|
|
| 7 |
|
|
|
|
|
|
|
|
|
|
| 8 |
# img2text
|
| 9 |
def img2text(url):
|
| 10 |
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
|
|
|
|
| 55 |
return story_text
|
| 56 |
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def text2audio(story_text):
|
| 59 |
from transformers import VitsModel, AutoTokenizer
|
| 60 |
import torch
|