Spaces:

AhmedAlmaghz
/

TranslateMarkdown

Runtime error

App Files Files Community

Hisab Cloud commited on Jun 11, 2024

Commit

08b7f89

verified ·

1 Parent(s): b35b011

Upload 5 files

Browse files

Files changed (5) hide show

app.py +44 -0
github_utils.py +47 -0
markdown_utils.py +49 -0
requirements.txt +6 -0
translation_utils.py +111 -0

app.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import gradio as gr
+from github_utils import clone_repo, push_translated_files, clean_local_repo
+from markdown_utils import parse_markdown_files, extract_translatable_text, save_translated_files
+from translation_utils import translate_content
+def translate(repo_url, target_language):
+    try:
+        # 1. استيراد الملفات من GitHub
+        files = clone_repo(repo_url)
+        # 2. تقسيم النصوص إلى توكنات
+        parsed_files = parse_markdown_files(files)
+        # 3. الترجمة باستخدام نموذج اللغة
+        translated_files = []
+        for file in parsed_files:
+            translatable_texts = extract_translatable_text(file['content'])
+            translated_content = translate_content(translatable_texts, target_language)
+            translated_files.append({'filename': file['filename'], 'content': translated_content})
+        # 4. تجميع النصوص المترجمة
+        save_translated_files(translated_files)
+        # 5. رفع الملفات المترجمة إلى GitHub
+        push_translated_files('cloned_repo')
+        # 6. مسح الملفات المحلية
+        clean_local_repo()
+        return 'Translation completed and files pushed to GitHub'
+    except Exception as e:
+        return str(e)
+# إنشاء واجهة Gradio
+iface = gr.Interface(
+    fn=translate,
+    inputs=[gr.Textbox(label="GitHub Repo URL"), gr.Textbox(label="Target Language")],
+    outputs="text",
+    title="Markdown Translator",
+    description="Translate Markdown files from a GitHub repository to a target language."
+)
+if __name__ == "__main__":
+    iface.launch()

github_utils.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+import shutil
+from git import Repo
+def clone_repo(repo_url, clone_dir='cloned_repo'):
+    """
+    Clones a GitHub repository to the local machine.
+    :param repo_url: URL of the GitHub repository
+    :param clone_dir: Directory where the repository will be cloned
+    :return: List of markdown file paths
+    """
+    if os.path.exists(clone_dir):
+        shutil.rmtree(clone_dir)
+    Repo.clone_from(repo_url, clone_dir)
+    markdown_files = []
+    for root, _, files in os.walk(clone_dir):
+        for file in files:
+            if file.endswith('.md'):
+                markdown_files.append(os.path.join(root, file))
+    return markdown_files
+def push_translated_files(repo_url, translated_files, clone_dir='cloned_repo'):
+    """
+    Pushes translated files back to the GitHub repository.
+    :param repo_url: URL of the GitHub repository
+    :param translated_files: List of translated file data
+    :param clone_dir: Directory where the repository is cloned
+    """
+    repo = Repo(clone_dir)
+    origin = repo.remote(name='origin')
+    for file in translated_files:
+        with open(os.path.join(clone_dir, file['filename']), 'w', encoding='utf-8') as f:
+            f.write(file['content'])
+    repo.index.add([file['filename'] for file in translated_files])
+    repo.index.commit('Add translated files')
+    origin.push()
+def clean_local_repo(clone_dir='cloned_repo'):
+    """
+    Cleans up the local cloned repository.
+    :param clone_dir: Directory where the repository is cloned
+    """
+    if os.path.exists(clone_dir):
+        shutil.rmtree(clone_dir)

markdown_utils.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+import re
+def parse_markdown_files(file_paths):
+    """
+    Parses markdown files to extract content for translation.
+    :param file_paths: List of markdown file paths
+    :return: List of dictionaries containing filename and content
+    """
+    parsed_files = []
+    for path in file_paths:
+        with open(path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        parsed_files.append({'filename': path, 'content': content})
+    return parsed_files
+def extract_translatable_text(content):
+    """
+    Extracts translatable text from markdown content.
+    :param content: Markdown content
+    :return: List of translatable text segments
+    """
+    code_block_pattern = re.compile(r'```.*?```', re.DOTALL)
+    html_block_pattern = re.compile(r'<.*?>', re.DOTALL)
+    url_pattern = re.compile(r'\[.*?\]\(.*?\)')
+    # Remove code blocks, HTML blocks, and URLs
+    content = re.sub(code_block_pattern, '', content)
+    content = re.sub(html_block_pattern, '', content)
+    content = re.sub(url_pattern, '', content)
+    # Extract paragraphs and headers
+    paragraphs = re.split(r'\n\s*\n', content)
+    return [para.strip() for para in paragraphs if para.strip()]
+def save_translated_files(translated_files):
+    """
+    Saves translated files to the local machine.
+    :param translated_files: List of translated file data
+    """
+    for file in translated_files:
+        directory = os.path.dirname(file['filename'])
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+        with open(file['filename'], 'w', encoding='utf-8') as f:
+            f.write(file['content'])

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+Flask==3.0.3
+gitpython==3.1.36
+requests==2.31.0
+flask-cors==4.0.0
+gradio==3.11.0
+groq==0.9.0

translation_utils.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import requests
+# from flask_cors import CORS
+from groq import Groq
+# CORS()
+# GROQ_API_URL = "https://api.groq.com/translate"  # استبدل هذا بعنوان API الخاص بـ Groq
+GROQ_API_KEY = "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"  # ضع هنا مفتاح API الخاص بك
+client = Groq(api_key=GROQ_API_KEY)
+def translate_texts_groq(texts, target_language="ar"):
+    """
+    Translates a list of texts to the target language using Groq.
+    :param texts: List of texts to translate
+    :param target_language: Target language code
+    :return: List of translated texts
+    """
+    # headers = {
+    #     "Authorization": f"Bearer {GROQ_API_KEY}",
+    #     "Content-Type": "application/json"
+    # }
+    translated_texts = []
+    # for text in texts:
+    #     payload = {
+    #         "source_language": "en",  # تأكد من ضبط لغة المصدر
+    #         "target_language": target_language,
+    #         "text": text
+    #     }
+    #     response = requests.post(GROQ_API_URL, json=payload, headers=headers)
+    #     if response.status_code == 200:
+    #         translated_text = response.json().get("translated_text")
+    #         translated_texts.append(translated_text)
+    #     else:
+    #         # في حالة حدوث خطأ، يمكنك التعامل معه هنا
+    #         translated_texts.append(text)  # احتفظ بالنص الأصلي في حال فشل الترجمة
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": f"Translate the text fllowing into Arabic language:{texts}",
+            }
+        ],
+        model="llama3-8b-8192",
+    )
+    translated_texts.append(chat_completion.choices[0].message.content)
+    return translated_texts
+def translate_content(content, target_language):
+    """
+    Translates the content of a markdown file, preserving non-translatable parts.
+    :param content: Markdown content
+    :param target_language: Target language code
+    :return: Translated content
+    """
+    translatable_texts = extract_translatable_text(content)
+    translated_texts = translate_texts_groq(translatable_texts, target_language)
+    # Reconstruct the content with translated texts
+    for original, translated in zip(translatable_texts, translated_texts):
+        content = content.replace(original, translated)
+    return content
+# from transformers import MarianMTModel, MarianTokenizer
+# # Initialize the model and tokenizer
+# model_name = 'Helsinki-NLP/opus-mt-en-<target_language>'  # Replace <target_language> with the target language code
+# model = MarianMTModel.from_pretrained(model_name)
+# tokenizer = MarianTokenizer.from_pretrained(model_name)
+# def translate_texts(texts, target_language):
+#     """
+#     Translates a list of texts to the target language.
+#     :param texts: List of texts to translate
+#     :param target_language: Target language code
+#     :return: List of translated texts
+#     """
+#     translated_texts = []
+#     for text in texts:
+#         inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
+#         translated = model.generate(**inputs)
+#         translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
+#         translated_texts.append(translated_text)
+#     return translated_texts
+# def translate_content(content, target_language):
+#     """
+#     Translates the content of a markdown file, preserving non-translatable parts.
+#     :param content: Markdown content
+#     :param target_language: Target language code
+#     :return: Translated content
+#     """
+#     translatable_texts = extract_translatable_text(content)
+#     translated_texts = translate_texts(translatable_texts, target_language)
+#     # Reconstruct the content with translated texts
+#     for original, translated in zip(translatable_texts, translated_texts):
+#         content = content.replace(original, translated)
+#     return content