leoxia711 commited on
Commit
a5b5a10
·
verified ·
1 Parent(s): a3070ae

Update function.py

Browse files
Files changed (1) hide show
  1. function.py +1 -44
function.py CHANGED
@@ -1,15 +1,10 @@
1
  from transformers import pipeline
2
  import torch
3
  from datasets import load_dataset
4
- import soundfile as sf # 用于保存音频文件
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, VitsModel
6
 
7
- #tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
8
- #model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
9
 
10
-
11
-
12
- # function part
13
  # img2text
14
  def img2text(url):
15
  image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
@@ -60,44 +55,6 @@ def text2story(text):
60
  return story_text
61
 
62
 
63
-
64
-
65
- # Define the text-to-story function using T5-small
66
- #def text2story(text):
67
- # # Load T5-small model for text-to-text generation
68
- # generator = pipeline ("text-generation", model="pranavpsv/genre-story-generator-v2")
69
- # story_text = generator (text) [0] ['generated_text']
70
- # return story_text
71
-
72
-
73
- # text2story
74
- #def text2story(text):
75
- # story_text = "abcde" # to be completed
76
- # return story_text
77
-
78
- # text2audio
79
- #def text2audio(story_text):
80
- # 加载 TTS 模型
81
- # synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
82
-
83
- # 加载 speaker embeddings 数据集
84
- # embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
85
-
86
- # 选择某个 speaker 的 xvector 作为嵌入向量(可以修改索引来选择其他说话人)
87
- # speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
88
-
89
- # 使用生成的文本和 speaker_embedding 生成语音
90
- # speech = synthesiser(story_text, forward_params={"speaker_embeddings": speaker_embedding})
91
-
92
- # 保存音频为 wav 文件
93
- # sf.write("story_audio.wav", speech["audio"], samplerate=speech["sampling_rate"])
94
-
95
- # 返回音频文件路径(如果需要的话,可以返回 audio 数据)
96
- # return "story_audio.wav"
97
-
98
-
99
-
100
-
101
  def text2audio(story_text):
102
  from transformers import VitsModel, AutoTokenizer
103
  import torch
 
1
  from transformers import pipeline
2
  import torch
3
  from datasets import load_dataset
4
+ import soundfile as sf
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, VitsModel
6
 
 
 
7
 
 
 
 
8
  # img2text
9
  def img2text(url):
10
  image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
 
55
  return story_text
56
 
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def text2audio(story_text):
59
  from transformers import VitsModel, AutoTokenizer
60
  import torch