stzhao commited on
Commit
4dcfc4d
·
verified ·
1 Parent(s): ce2ea77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -72
app.py CHANGED
@@ -13,36 +13,36 @@ import re
13
 
14
  def export_to_zip(images, conversations, format_type="original"):
15
  """
16
- 将图像和对话数据导出为ZIP文件
17
-
18
  Args:
19
- images: 提取的图像列表
20
- conversations: 对话JSON数据
21
- format_type: 格式类型,"original""sharegpt"
22
-
23
  Returns:
24
- 生成的ZIP文件路径
25
  """
26
- # 创建临时目录
27
  temp_dir = tempfile.mkdtemp()
28
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
29
  zip_filename = os.path.join(temp_dir, f"export_{timestamp}.zip")
30
 
31
- # 创建ZIP文件
32
  with zipfile.ZipFile(zip_filename, 'w') as zipf:
33
- # 保存图像
34
  for i, img in enumerate(images):
35
  img_path = os.path.join(temp_dir, f"image_{i}.png")
36
  img.save(img_path)
37
  zipf.write(img_path, f"images/image_{i}.png")
38
- os.remove(img_path) # 删除临时图像文件
39
 
40
- # 保存对话数据
41
  json_path = os.path.join(temp_dir, "conversations.json")
42
  with open(json_path, 'w', encoding='utf-8') as f:
43
  json.dump(conversations, f, ensure_ascii=False, indent=4)
44
  zipf.write(json_path, "conversations.json")
45
- os.remove(json_path) # 删除临时JSON文件
46
 
47
  return zip_filename
48
 
@@ -52,46 +52,46 @@ def base64_to_image(
52
  convert_mode: Optional[str] = "RGB"
53
  ) -> Union[Image.Image, None]:
54
  """
55
- 将Base64编码的图片字符串转换为PIL Image对象
56
-
57
  Args:
58
- base64_str: Base64编码的图片字符串(可带data:前缀)
59
- remove_prefix: 是否自动去除"data:image/..."前缀(默认True
60
- convert_mode: 转换为指定模式(如"RGB"/"RGBA"None表示不转换)
61
-
62
  Returns:
63
- PIL.Image.Image 对象,解码失败时返回None
64
  """
65
  try:
66
- # 1. 处理Base64前缀
67
  if remove_prefix and "," in base64_str:
68
  base64_str = base64_str.split(",")[1]
69
 
70
- # 2. 解码Base64
71
  image_data = base64.b64decode(base64_str)
72
 
73
- # 3. 转换为PIL Image
74
  image = Image.open(BytesIO(image_data))
75
 
76
- # 4. 可选模式转换
77
  if convert_mode:
78
  image = image.convert(convert_mode)
79
 
80
  return image
81
 
82
  except (base64.binascii.Error, OSError, Exception) as e:
83
- print(f"Base64解码失败: {str(e)}")
84
  return None
85
 
86
  def process_message_to_sharegpt_format(message):
87
  """
88
- 将消息转换为ShareGPT格式
89
-
90
  Args:
91
- message: 原始消息数据
92
-
93
  Returns:
94
- ShareGPT格式的数据
95
  """
96
  sharegpt_images = []
97
  sharegpt_conversation = []
@@ -135,13 +135,13 @@ def process_message_to_sharegpt_format(message):
135
 
136
  def extract_images_from_messages(messages):
137
  """
138
- 从消息中提取所有图像
139
-
140
  Args:
141
- messages: 消息JSON数据
142
-
143
  Returns:
144
- 提取的图像列表和更新后的消息
145
  """
146
  images = []
147
 
@@ -151,7 +151,7 @@ def extract_images_from_messages(messages):
151
  if content_item.get('type') == 'image_url':
152
  image_url = content_item.get('image_url', {}).get('url', '')
153
  if image_url.startswith('data:'):
154
- # 提取Base64图像
155
  image = base64_to_image(image_url)
156
  if image:
157
  images.append(image)
@@ -160,24 +160,24 @@ def extract_images_from_messages(messages):
160
 
161
  def process_message(file_path):
162
  try:
163
- # 读取JSON文件
164
  with open(file_path, "r", encoding="utf-8") as f:
165
  messages = json.load(f)
166
 
167
- # 提取图像
168
  images, messages = extract_images_from_messages(messages)
169
 
170
- # 转换为ShareGPT格式
171
  sharegpt_data = process_message_to_sharegpt_format(messages)
172
 
173
- # 创建HTML输出
174
- html_output = '<div style="color: black;">' # 添加一个包裹所有内容的div,设置文本颜色为黑色
175
 
176
  for message_item in messages:
177
  role = message_item['role']
178
  content = message_item['content']
179
 
180
- # 根据角色设置样式
181
  if role == "user" or role == "human":
182
  html_output += f'<div style="background-color: #f0f0f0; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>User:</strong><br>'
183
  elif role == "assistant":
@@ -185,19 +185,19 @@ def process_message(file_path):
185
  else:
186
  html_output += f'<div style="background-color: #f9f9f9; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>{role.capitalize()}:</strong><br>'
187
 
188
- # 处理内容
189
  for content_item in content:
190
  content_type = content_item['type']
191
 
192
  if content_type == "text":
193
- # Markdown文本转换为HTML
194
  md_text = content_item['text']
195
  html_text = markdown.markdown(md_text, extensions=['fenced_code', 'codehilite'])
196
  html_output += f'<div style="color: black;">{html_text}</div>'
197
 
198
  elif content_type == "image_url":
199
  content_value = content_item['image_url']['url']
200
- # 如果是base64图片
201
  if content_value.startswith("data:"):
202
  html_output += f'<img src="{content_value}" style="max-width: 100%; margin: 10px 0;">'
203
  else:
@@ -205,7 +205,7 @@ def process_message(file_path):
205
 
206
  html_output += '</div>'
207
 
208
- html_output += '</div>' # 关闭最外层div
209
  return html_output, images, messages, sharegpt_data
210
 
211
  except Exception as e:
@@ -213,18 +213,18 @@ def process_message(file_path):
213
 
214
  def upload_and_process(file):
215
  if file is None:
216
- return "请上传一个JSON文件", [], None, None
217
 
218
  html_output, images, messages, sharegpt_data = process_message(file.name)
219
  return html_output, images, messages, sharegpt_data
220
 
221
  def use_example():
222
- # 使用示例文件
223
  example_path = "test_message_gpt.json"
224
  return process_message(example_path)
225
 
226
  def handle_export_original(images, conversations):
227
- """处理原始格式导出请求"""
228
  if not images or conversations is None:
229
  return None
230
 
@@ -232,7 +232,7 @@ def handle_export_original(images, conversations):
232
  return zip_path
233
 
234
  def handle_export_sharegpt(sharegpt_data):
235
- """处理ShareGPT格式导出请求"""
236
  if sharegpt_data is None:
237
  return None
238
 
@@ -245,13 +245,13 @@ def handle_export_sharegpt(sharegpt_data):
245
  zip_path = export_to_zip(images, conversations, "sharegpt")
246
  return zip_path
247
 
248
- # 确保示例文件存在
249
  def setup_example_file():
250
- # 这里我们需要创建示例文件,因为我们没有实际的内容
251
- # 在实际应用中,你应该将原始的test_message_gpt.json文件放在Space的根目录下
252
  example_path = "test_message_gpt.json"
253
 
254
- # 如果文件不存在,创建一个简单的示例
255
  if not os.path.exists(example_path):
256
  example_messages = [
257
  {
@@ -259,7 +259,7 @@ def setup_example_file():
259
  "content": [
260
  {
261
  "type": "text",
262
- "text": "你好,请介绍一下自己"
263
  }
264
  ]
265
  },
@@ -268,7 +268,7 @@ def setup_example_file():
268
  "content": [
269
  {
270
  "type": "text",
271
- "text": "你好!我是一个AI助手。我可���帮助回答问题、提供信息、进行对话等。我被设计用来协助用户完成各种任务,从简单的问答到更复杂的讨论。\n\n我可以处理文本信息,也能理解和描述图像内容。虽然我有一些限制,但我会尽力提供有用、准确和有帮助的回应。\n\n有什么我可以帮助你的吗?"
272
  }
273
  ]
274
  }
@@ -277,38 +277,38 @@ def setup_example_file():
277
  with open(example_path, "w", encoding="utf-8") as f:
278
  json.dump(example_messages, f, ensure_ascii=False, indent=2)
279
 
280
- # 设置示例文件
281
  setup_example_file()
282
 
283
- # 创建Gradio界面
284
  with gr.Blocks(title="ChatGPT Conversation Visualizer", css="div.prose * {color: black !important;}") as demo:
285
- gr.Markdown("# ChatGPT 对话可视化工具")
286
- gr.Markdown("上传一个包含ChatGPT对话记录的JSON文件,或使用示例文件查看可视化结果")
287
 
288
  with gr.Row():
289
- file_input = gr.File(label="上传JSON文件", file_types=[".json"])
290
 
291
  with gr.Row():
292
  col1, col2 = gr.Column(), gr.Column()
293
  with col1:
294
- visualize_button = gr.Button("可视化上传的对话")
295
  with col2:
296
- example_button = gr.Button("使用示例文件")
297
 
298
  with gr.Row():
299
- output = gr.HTML(label="对话内容")
300
 
301
- # 添加导出按钮
302
  with gr.Row():
303
  with gr.Column():
304
- export_original_btn = gr.Button("导出原始格式")
305
- download_original_file = gr.File(label="下载原始格式ZIP")
306
 
307
  with gr.Column():
308
- export_sharegpt_btn = gr.Button("导出ShareGPT格式")
309
- download_sharegpt_file = gr.File(label="下载ShareGPT格式ZIP")
310
 
311
- # 存储当前结果的状态变量
312
  current_images = gr.State([])
313
  current_json = gr.State(None)
314
  current_sharegpt = gr.State(None)
@@ -337,5 +337,5 @@ with gr.Blocks(title="ChatGPT Conversation Visualizer", css="div.prose * {color:
337
  outputs=[download_sharegpt_file]
338
  )
339
 
340
- # 启动Gradio应用
341
- demo.launch()
 
13
 
14
  def export_to_zip(images, conversations, format_type="original"):
15
  """
16
+ Export images and conversation data to a ZIP file
17
+
18
  Args:
19
+ images: List of extracted images
20
+ conversations: Conversation JSON data
21
+ format_type: Format type, "original" or "sharegpt"
22
+
23
  Returns:
24
+ Path to the generated ZIP file
25
  """
26
+ # Create a temporary directory
27
  temp_dir = tempfile.mkdtemp()
28
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
29
  zip_filename = os.path.join(temp_dir, f"export_{timestamp}.zip")
30
 
31
+ # Create a ZIP file
32
  with zipfile.ZipFile(zip_filename, 'w') as zipf:
33
+ # Save images
34
  for i, img in enumerate(images):
35
  img_path = os.path.join(temp_dir, f"image_{i}.png")
36
  img.save(img_path)
37
  zipf.write(img_path, f"images/image_{i}.png")
38
+ os.remove(img_path) # Delete temporary image file
39
 
40
+ # Save conversation data
41
  json_path = os.path.join(temp_dir, "conversations.json")
42
  with open(json_path, 'w', encoding='utf-8') as f:
43
  json.dump(conversations, f, ensure_ascii=False, indent=4)
44
  zipf.write(json_path, "conversations.json")
45
+ os.remove(json_path) # Delete temporary JSON file
46
 
47
  return zip_filename
48
 
 
52
  convert_mode: Optional[str] = "RGB"
53
  ) -> Union[Image.Image, None]:
54
  """
55
+ Convert a base64 encoded image string to a PIL Image object
56
+
57
  Args:
58
+ base64_str: Base64 encoded image string (with or without data: prefix)
59
+ remove_prefix: Whether to automatically remove the "data:image/..." prefix (default True)
60
+ convert_mode: Convert to the specified mode (e.g., "RGB"/"RGBA", None means no conversion)
61
+
62
  Returns:
63
+ PIL.Image.Image object, returns None if decoding fails
64
  """
65
  try:
66
+ # 1. Handle Base64 prefix
67
  if remove_prefix and "," in base64_str:
68
  base64_str = base64_str.split(",")[1]
69
 
70
+ # 2. Decode Base64
71
  image_data = base64.b64decode(base64_str)
72
 
73
+ # 3. Convert to PIL Image
74
  image = Image.open(BytesIO(image_data))
75
 
76
+ # 4. Optional mode conversion
77
  if convert_mode:
78
  image = image.convert(convert_mode)
79
 
80
  return image
81
 
82
  except (base64.binascii.Error, OSError, Exception) as e:
83
+ print(f"Base64 decoding failed: {str(e)}")
84
  return None
85
 
86
  def process_message_to_sharegpt_format(message):
87
  """
88
+ Convert messages to ShareGPT format
89
+
90
  Args:
91
+ message: Original message data
92
+
93
  Returns:
94
+ Data in ShareGPT format
95
  """
96
  sharegpt_images = []
97
  sharegpt_conversation = []
 
135
 
136
  def extract_images_from_messages(messages):
137
  """
138
+ Extract all images from messages
139
+
140
  Args:
141
+ messages: Message JSON data
142
+
143
  Returns:
144
+ Extracted image list and updated messages
145
  """
146
  images = []
147
 
 
151
  if content_item.get('type') == 'image_url':
152
  image_url = content_item.get('image_url', {}).get('url', '')
153
  if image_url.startswith('data:'):
154
+ # Extract base64 image
155
  image = base64_to_image(image_url)
156
  if image:
157
  images.append(image)
 
160
 
161
  def process_message(file_path):
162
  try:
163
+ # Read JSON file
164
  with open(file_path, "r", encoding="utf-8") as f:
165
  messages = json.load(f)
166
 
167
+ # Extract images
168
  images, messages = extract_images_from_messages(messages)
169
 
170
+ # Convert to ShareGPT format
171
  sharegpt_data = process_message_to_sharegpt_format(messages)
172
 
173
+ # Create HTML output
174
+ html_output = '<div style="color: black;">' # Add a wrapper div for all content, set text color black
175
 
176
  for message_item in messages:
177
  role = message_item['role']
178
  content = message_item['content']
179
 
180
+ # Style based on role
181
  if role == "user" or role == "human":
182
  html_output += f'<div style="background-color: #f0f0f0; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>User:</strong><br>'
183
  elif role == "assistant":
 
185
  else:
186
  html_output += f'<div style="background-color: #f9f9f9; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>{role.capitalize()}:</strong><br>'
187
 
188
+ # Handle content
189
  for content_item in content:
190
  content_type = content_item['type']
191
 
192
  if content_type == "text":
193
+ # Convert Markdown text to HTML
194
  md_text = content_item['text']
195
  html_text = markdown.markdown(md_text, extensions=['fenced_code', 'codehilite'])
196
  html_output += f'<div style="color: black;">{html_text}</div>'
197
 
198
  elif content_type == "image_url":
199
  content_value = content_item['image_url']['url']
200
+ # If base64 image
201
  if content_value.startswith("data:"):
202
  html_output += f'<img src="{content_value}" style="max-width: 100%; margin: 10px 0;">'
203
  else:
 
205
 
206
  html_output += '</div>'
207
 
208
+ html_output += '</div>' # Close outermost div
209
  return html_output, images, messages, sharegpt_data
210
 
211
  except Exception as e:
 
213
 
214
  def upload_and_process(file):
215
  if file is None:
216
+ return "Please upload a JSON file", [], None, None
217
 
218
  html_output, images, messages, sharegpt_data = process_message(file.name)
219
  return html_output, images, messages, sharegpt_data
220
 
221
  def use_example():
222
+ # Use example file
223
  example_path = "test_message_gpt.json"
224
  return process_message(example_path)
225
 
226
  def handle_export_original(images, conversations):
227
+ """Handle export request for original format"""
228
  if not images or conversations is None:
229
  return None
230
 
 
232
  return zip_path
233
 
234
  def handle_export_sharegpt(sharegpt_data):
235
+ """Handle export request for ShareGPT format"""
236
  if sharegpt_data is None:
237
  return None
238
 
 
245
  zip_path = export_to_zip(images, conversations, "sharegpt")
246
  return zip_path
247
 
248
+ # Ensure example file exists
249
  def setup_example_file():
250
+ # Here we need to create the example file because we don't have actual content
251
+ # In a real application, you should place the original test_message_gpt.json file in the root directory
252
  example_path = "test_message_gpt.json"
253
 
254
+ # Create a simple example if the file does not exist
255
  if not os.path.exists(example_path):
256
  example_messages = [
257
  {
 
259
  "content": [
260
  {
261
  "type": "text",
262
+ "text": "Hello, please introduce yourself."
263
  }
264
  ]
265
  },
 
268
  "content": [
269
  {
270
  "type": "text",
271
+ "text": "Hello! I am an AI assistant. I can help answer questions, provide information, and have conversations. I am designed to assist users with a variety of tasks, from simple Q&A to more complex discussions.\n\nI can handle text information and also understand and describe images. Although I have some limitations, I will do my best to provide useful, accurate, and helpful responses.\n\nHow can I help you today?"
272
  }
273
  ]
274
  }
 
277
  with open(example_path, "w", encoding="utf-8") as f:
278
  json.dump(example_messages, f, ensure_ascii=False, indent=2)
279
 
280
+ # Set up the example file
281
  setup_example_file()
282
 
283
+ # Create Gradio interface
284
  with gr.Blocks(title="ChatGPT Conversation Visualizer", css="div.prose * {color: black !important;}") as demo:
285
+ gr.Markdown("# ChatGPT Conversation Visualization Tool")
286
+ gr.Markdown("Upload a JSON file containing ChatGPT conversation records or use the example file to view visualization results.")
287
 
288
  with gr.Row():
289
+ file_input = gr.File(label="Upload JSON File", file_types=[".json"])
290
 
291
  with gr.Row():
292
  col1, col2 = gr.Column(), gr.Column()
293
  with col1:
294
+ visualize_button = gr.Button("Visualize Uploaded Conversation")
295
  with col2:
296
+ example_button = gr.Button("Use Example File")
297
 
298
  with gr.Row():
299
+ output = gr.HTML(label="Conversation Content")
300
 
301
+ # Add export buttons
302
  with gr.Row():
303
  with gr.Column():
304
+ export_original_btn = gr.Button("Export Original Format")
305
+ download_original_file = gr.File(label="Download Original Format ZIP")
306
 
307
  with gr.Column():
308
+ export_sharegpt_btn = gr.Button("Export ShareGPT Format")
309
+ download_sharegpt_file = gr.File(label="Download ShareGPT Format ZIP")
310
 
311
+ # State variables to store current results
312
  current_images = gr.State([])
313
  current_json = gr.State(None)
314
  current_sharegpt = gr.State(None)
 
337
  outputs=[download_sharegpt_file]
338
  )
339
 
340
+ # Launch Gradio app
341
+ demo.launch()