Spaces:

Agents-X
/

data-view

Running

App Files Files Community

stzhao commited on Jul 9

Commit

4dcfc4d

verified ·

1 Parent(s): ce2ea77

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -72

app.py CHANGED Viewed

@@ -13,36 +13,36 @@ import re
 def export_to_zip(images, conversations, format_type="original"):
     """
-    将图像和对话数据导出为ZIP文件
     Args:
-        images: 提取的图像列表
-        conversations: 对话JSON数据
-        format_type: 格式类型，"original"或"sharegpt"
     Returns:
-        生成的ZIP文件路径
     """
-    # 创建临时目录
     temp_dir = tempfile.mkdtemp()
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     zip_filename = os.path.join(temp_dir, f"export_{timestamp}.zip")
-    # 创建ZIP文件
     with zipfile.ZipFile(zip_filename, 'w') as zipf:
-        # 保存图像
         for i, img in enumerate(images):
             img_path = os.path.join(temp_dir, f"image_{i}.png")
             img.save(img_path)
             zipf.write(img_path, f"images/image_{i}.png")
-            os.remove(img_path)  # 删除临时图像文件
-        # 保存对话数据
         json_path = os.path.join(temp_dir, "conversations.json")
         with open(json_path, 'w', encoding='utf-8') as f:
             json.dump(conversations, f, ensure_ascii=False, indent=4)
         zipf.write(json_path, "conversations.json")
-        os.remove(json_path)  # 删除临时JSON文件
     return zip_filename
@@ -52,46 +52,46 @@ def base64_to_image(
     convert_mode: Optional[str] = "RGB"
 ) -> Union[Image.Image, None]:
     """
-    将Base64编码的图片字符串转换为PIL Image对象
     Args:
-        base64_str: Base64编码的图片字符串（可带data:前缀）
-        remove_prefix: 是否自动去除"data:image/..."前缀（默认True）
-        convert_mode: 转换为指定模式（如"RGB"/"RGBA"，None表示不转换）
     Returns:
-        PIL.Image.Image 对象，解码失败时返回None
     """
     try:
-        # 1. 处理Base64前缀
         if remove_prefix and "," in base64_str:
             base64_str = base64_str.split(",")[1]
-        # 2. 解码Base64
         image_data = base64.b64decode(base64_str)
-        # 3. 转换为PIL Image
         image = Image.open(BytesIO(image_data))
-        # 4. 可选模式转换
         if convert_mode:
             image = image.convert(convert_mode)
         return image
     except (base64.binascii.Error, OSError, Exception) as e:
-        print(f"Base64解码失败: {str(e)}")
         return None
 def process_message_to_sharegpt_format(message):
     """
-    将消息转换为ShareGPT格式
     Args:
-        message: 原始消息数据
     Returns:
-        ShareGPT格式的数据
     """
     sharegpt_images = []
     sharegpt_conversation = []
@@ -135,13 +135,13 @@ def process_message_to_sharegpt_format(message):
 def extract_images_from_messages(messages):
     """
-    从消息中提取所有图像
     Args:
-        messages: 消息JSON数据
     Returns:
-        提取的图像列表和更新后的消息
     """
     images = []
@@ -151,7 +151,7 @@ def extract_images_from_messages(messages):
                 if content_item.get('type') == 'image_url':
                     image_url = content_item.get('image_url', {}).get('url', '')
                     if image_url.startswith('data:'):
-                        # 提取Base64图像
                         image = base64_to_image(image_url)
                         if image:
                             images.append(image)
@@ -160,24 +160,24 @@ def extract_images_from_messages(messages):
 def process_message(file_path):
     try:
-        # 读取JSON文件
         with open(file_path, "r", encoding="utf-8") as f:
             messages = json.load(f)
-        # 提取图像
         images, messages = extract_images_from_messages(messages)
-        # 转换为ShareGPT格式
         sharegpt_data = process_message_to_sharegpt_format(messages)
-        # 创建HTML输出
-        html_output = '<div style="color: black;">'  # 添加一个包裹所有内容的div，设置文本颜色为黑色
         for message_item in messages:
             role = message_item['role']
             content = message_item['content']
-            # 根据角色设置样式
             if role == "user" or role == "human":
                 html_output += f'<div style="background-color: #f0f0f0; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>User:</strong><br>'
             elif role == "assistant":
@@ -185,19 +185,19 @@ def process_message(file_path):
             else:
                 html_output += f'<div style="background-color: #f9f9f9; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>{role.capitalize()}:</strong><br>'
-            # 处理内容
             for content_item in content:
                 content_type = content_item['type']
                 if content_type == "text":
-                    # 将Markdown文本转换为HTML
                     md_text = content_item['text']
                     html_text = markdown.markdown(md_text, extensions=['fenced_code', 'codehilite'])
                     html_output += f'<div style="color: black;">{html_text}</div>'
                 elif content_type == "image_url":
                     content_value = content_item['image_url']['url']
-                    # 如果是base64图片
                     if content_value.startswith("data:"):
                         html_output += f'<img src="{content_value}" style="max-width: 100%; margin: 10px 0;">'
                     else:
@@ -205,7 +205,7 @@ def process_message(file_path):
             html_output += '</div>'
-        html_output += '</div>'  # 关闭最外层div
         return html_output, images, messages, sharegpt_data
     except Exception as e:
@@ -213,18 +213,18 @@ def process_message(file_path):
 def upload_and_process(file):
     if file is None:
-        return "请上传一个JSON文件", [], None, None
     html_output, images, messages, sharegpt_data = process_message(file.name)
     return html_output, images, messages, sharegpt_data
 def use_example():
-    # 使用示例文件
     example_path = "test_message_gpt.json"
     return process_message(example_path)
 def handle_export_original(images, conversations):
-    """处理原始格式导出请求"""
     if not images or conversations is None:
         return None
@@ -232,7 +232,7 @@ def handle_export_original(images, conversations):
     return zip_path
 def handle_export_sharegpt(sharegpt_data):
-    """处理ShareGPT格式导出请求"""
     if sharegpt_data is None:
         return None
@@ -245,13 +245,13 @@ def handle_export_sharegpt(sharegpt_data):
     zip_path = export_to_zip(images, conversations, "sharegpt")
     return zip_path
-# 确保示例文件存在
 def setup_example_file():
-    # 这里我们需要创建示例文件，因为我们没有实际的内容
-    # 在实际应用中，你应该将原始的test_message_gpt.json文件放在Space的根目录下
     example_path = "test_message_gpt.json"
-    # 如果文件不存在，创建一个简单的示例
     if not os.path.exists(example_path):
         example_messages = [
             {
@@ -259,7 +259,7 @@ def setup_example_file():
                 "content": [
                     {
                         "type": "text",
-                        "text": "你好，请介绍一下自己"
                     }
                 ]
             },
@@ -268,7 +268,7 @@ def setup_example_file():
                 "content": [
                     {
                         "type": "text",
-                        "text": "你好！我是一个AI助手。我可���帮助回答问题、提供信息、进行对话等。我被设计用来协助用户完成各种任务，从简单的问答到更复杂的讨论。\n\n我可以处理文本信息，也能理解和描述图像内容。虽然我有一些限制，但我会尽力提供有用、准确和有帮助的回应。\n\n有什么我可以帮助你的吗？"
                     }
                 ]
             }
@@ -277,38 +277,38 @@ def setup_example_file():
         with open(example_path, "w", encoding="utf-8") as f:
             json.dump(example_messages, f, ensure_ascii=False, indent=2)
-# 设置示例文件
 setup_example_file()
-# 创建Gradio界面
 with gr.Blocks(title="ChatGPT Conversation Visualizer", css="div.prose * {color: black !important;}") as demo:
-    gr.Markdown("# ChatGPT 对话可视化工具")
-    gr.Markdown("上传一个包含ChatGPT对话记录的JSON文件，或使用示例文件查看可视化结果")
     with gr.Row():
-        file_input = gr.File(label="上传JSON文件", file_types=[".json"])
     with gr.Row():
         col1, col2 = gr.Column(), gr.Column()
         with col1:
-            visualize_button = gr.Button("可视化上传的对话")
         with col2:
-            example_button = gr.Button("使用示例文件")
     with gr.Row():
-        output = gr.HTML(label="对话内容")
-    # 添加导出按钮
     with gr.Row():
         with gr.Column():
-            export_original_btn = gr.Button("导出原始格式")
-            download_original_file = gr.File(label="下载原始格式ZIP")
         with gr.Column():
-            export_sharegpt_btn = gr.Button("导出ShareGPT格式")
-            download_sharegpt_file = gr.File(label="下载ShareGPT格式ZIP")
-    # 存储当前结果的状态变量
     current_images = gr.State([])
     current_json = gr.State(None)
     current_sharegpt = gr.State(None)
@@ -337,5 +337,5 @@ with gr.Blocks(title="ChatGPT Conversation Visualizer", css="div.prose * {color:
         outputs=[download_sharegpt_file]
     )
-# 启动Gradio应用
-demo.launch()

 def export_to_zip(images, conversations, format_type="original"):
     """
+    Export images and conversation data to a ZIP file
     Args:
+        images: List of extracted images
+        conversations: Conversation JSON data
+        format_type: Format type, "original" or "sharegpt"
     Returns:
+        Path to the generated ZIP file
     """
+    # Create a temporary directory
     temp_dir = tempfile.mkdtemp()
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     zip_filename = os.path.join(temp_dir, f"export_{timestamp}.zip")
+    # Create a ZIP file
     with zipfile.ZipFile(zip_filename, 'w') as zipf:
+        # Save images
         for i, img in enumerate(images):
             img_path = os.path.join(temp_dir, f"image_{i}.png")
             img.save(img_path)
             zipf.write(img_path, f"images/image_{i}.png")
+            os.remove(img_path)  # Delete temporary image file
+        # Save conversation data
         json_path = os.path.join(temp_dir, "conversations.json")
         with open(json_path, 'w', encoding='utf-8') as f:
             json.dump(conversations, f, ensure_ascii=False, indent=4)
         zipf.write(json_path, "conversations.json")
+        os.remove(json_path)  # Delete temporary JSON file
     return zip_filename
     convert_mode: Optional[str] = "RGB"
 ) -> Union[Image.Image, None]:
     """
+    Convert a base64 encoded image string to a PIL Image object
     Args:
+        base64_str: Base64 encoded image string (with or without data: prefix)
+        remove_prefix: Whether to automatically remove the "data:image/..." prefix (default True)
+        convert_mode: Convert to the specified mode (e.g., "RGB"/"RGBA", None means no conversion)
     Returns:
+        PIL.Image.Image object, returns None if decoding fails
     """
     try:
+        # 1. Handle Base64 prefix
         if remove_prefix and "," in base64_str:
             base64_str = base64_str.split(",")[1]
+        # 2. Decode Base64
         image_data = base64.b64decode(base64_str)
+        # 3. Convert to PIL Image
         image = Image.open(BytesIO(image_data))
+        # 4. Optional mode conversion
         if convert_mode:
             image = image.convert(convert_mode)
         return image
     except (base64.binascii.Error, OSError, Exception) as e:
+        print(f"Base64 decoding failed: {str(e)}")
         return None
 def process_message_to_sharegpt_format(message):
     """
+    Convert messages to ShareGPT format
     Args:
+        message: Original message data
     Returns:
+        Data in ShareGPT format
     """
     sharegpt_images = []
     sharegpt_conversation = []
 def extract_images_from_messages(messages):
     """
+    Extract all images from messages
     Args:
+        messages: Message JSON data
     Returns:
+        Extracted image list and updated messages
     """
     images = []
                 if content_item.get('type') == 'image_url':
                     image_url = content_item.get('image_url', {}).get('url', '')
                     if image_url.startswith('data:'):
+                        # Extract base64 image
                         image = base64_to_image(image_url)
                         if image:
                             images.append(image)
 def process_message(file_path):
     try:
+        # Read JSON file
         with open(file_path, "r", encoding="utf-8") as f:
             messages = json.load(f)
+        # Extract images
         images, messages = extract_images_from_messages(messages)
+        # Convert to ShareGPT format
         sharegpt_data = process_message_to_sharegpt_format(messages)
+        # Create HTML output
+        html_output = '<div style="color: black;">'  # Add a wrapper div for all content, set text color black
         for message_item in messages:
             role = message_item['role']
             content = message_item['content']
+            # Style based on role
             if role == "user" or role == "human":
                 html_output += f'<div style="background-color: #f0f0f0; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>User:</strong><br>'
             elif role == "assistant":
             else:
                 html_output += f'<div style="background-color: #f9f9f9; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>{role.capitalize()}:</strong><br>'
+            # Handle content
             for content_item in content:
                 content_type = content_item['type']
                 if content_type == "text":
+                    # Convert Markdown text to HTML
                     md_text = content_item['text']
                     html_text = markdown.markdown(md_text, extensions=['fenced_code', 'codehilite'])
                     html_output += f'<div style="color: black;">{html_text}</div>'
                 elif content_type == "image_url":
                     content_value = content_item['image_url']['url']
+                    # If base64 image
                     if content_value.startswith("data:"):
                         html_output += f'<img src="{content_value}" style="max-width: 100%; margin: 10px 0;">'
                     else:
             html_output += '</div>'
+        html_output += '</div>'  # Close outermost div
         return html_output, images, messages, sharegpt_data
     except Exception as e:
 def upload_and_process(file):
     if file is None:
+        return "Please upload a JSON file", [], None, None
     html_output, images, messages, sharegpt_data = process_message(file.name)
     return html_output, images, messages, sharegpt_data
 def use_example():
+    # Use example file
     example_path = "test_message_gpt.json"
     return process_message(example_path)
 def handle_export_original(images, conversations):
+    """Handle export request for original format"""
     if not images or conversations is None:
         return None
     return zip_path
 def handle_export_sharegpt(sharegpt_data):
+    """Handle export request for ShareGPT format"""
     if sharegpt_data is None:
         return None
     zip_path = export_to_zip(images, conversations, "sharegpt")
     return zip_path
+# Ensure example file exists
 def setup_example_file():
+    # Here we need to create the example file because we don't have actual content
+    # In a real application, you should place the original test_message_gpt.json file in the root directory
     example_path = "test_message_gpt.json"
+    # Create a simple example if the file does not exist
     if not os.path.exists(example_path):
         example_messages = [
             {
                 "content": [
                     {
                         "type": "text",
+                        "text": "Hello, please introduce yourself."
                     }
                 ]
             },
                 "content": [
                     {
                         "type": "text",
+                        "text": "Hello! I am an AI assistant. I can help answer questions, provide information, and have conversations. I am designed to assist users with a variety of tasks, from simple Q&A to more complex discussions.\n\nI can handle text information and also understand and describe images. Although I have some limitations, I will do my best to provide useful, accurate, and helpful responses.\n\nHow can I help you today?"
                     }
                 ]
             }
         with open(example_path, "w", encoding="utf-8") as f:
             json.dump(example_messages, f, ensure_ascii=False, indent=2)
+# Set up the example file
 setup_example_file()
+# Create Gradio interface
 with gr.Blocks(title="ChatGPT Conversation Visualizer", css="div.prose * {color: black !important;}") as demo:
+    gr.Markdown("# ChatGPT Conversation Visualization Tool")
+    gr.Markdown("Upload a JSON file containing ChatGPT conversation records or use the example file to view visualization results.")
     with gr.Row():
+        file_input = gr.File(label="Upload JSON File", file_types=[".json"])
     with gr.Row():
         col1, col2 = gr.Column(), gr.Column()
         with col1:
+            visualize_button = gr.Button("Visualize Uploaded Conversation")
         with col2:
+            example_button = gr.Button("Use Example File")
     with gr.Row():
+        output = gr.HTML(label="Conversation Content")
+    # Add export buttons
     with gr.Row():
         with gr.Column():
+            export_original_btn = gr.Button("Export Original Format")
+            download_original_file = gr.File(label="Download Original Format ZIP")
         with gr.Column():
+            export_sharegpt_btn = gr.Button("Export ShareGPT Format")
+            download_sharegpt_file = gr.File(label="Download ShareGPT Format ZIP")
+    # State variables to store current results
     current_images = gr.State([])
     current_json = gr.State(None)
     current_sharegpt = gr.State(None)
         outputs=[download_sharegpt_file]
     )
+# Launch Gradio app
+demo.launch()