Update app.py
Browse files
app.py
CHANGED
|
@@ -13,36 +13,36 @@ import re
|
|
| 13 |
|
| 14 |
def export_to_zip(images, conversations, format_type="original"):
|
| 15 |
"""
|
| 16 |
-
|
| 17 |
-
|
| 18 |
Args:
|
| 19 |
-
images:
|
| 20 |
-
conversations:
|
| 21 |
-
format_type:
|
| 22 |
-
|
| 23 |
Returns:
|
| 24 |
-
|
| 25 |
"""
|
| 26 |
-
#
|
| 27 |
temp_dir = tempfile.mkdtemp()
|
| 28 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 29 |
zip_filename = os.path.join(temp_dir, f"export_{timestamp}.zip")
|
| 30 |
|
| 31 |
-
#
|
| 32 |
with zipfile.ZipFile(zip_filename, 'w') as zipf:
|
| 33 |
-
#
|
| 34 |
for i, img in enumerate(images):
|
| 35 |
img_path = os.path.join(temp_dir, f"image_{i}.png")
|
| 36 |
img.save(img_path)
|
| 37 |
zipf.write(img_path, f"images/image_{i}.png")
|
| 38 |
-
os.remove(img_path) #
|
| 39 |
|
| 40 |
-
#
|
| 41 |
json_path = os.path.join(temp_dir, "conversations.json")
|
| 42 |
with open(json_path, 'w', encoding='utf-8') as f:
|
| 43 |
json.dump(conversations, f, ensure_ascii=False, indent=4)
|
| 44 |
zipf.write(json_path, "conversations.json")
|
| 45 |
-
os.remove(json_path) #
|
| 46 |
|
| 47 |
return zip_filename
|
| 48 |
|
|
@@ -52,46 +52,46 @@ def base64_to_image(
|
|
| 52 |
convert_mode: Optional[str] = "RGB"
|
| 53 |
) -> Union[Image.Image, None]:
|
| 54 |
"""
|
| 55 |
-
|
| 56 |
-
|
| 57 |
Args:
|
| 58 |
-
base64_str: Base64
|
| 59 |
-
remove_prefix:
|
| 60 |
-
convert_mode:
|
| 61 |
-
|
| 62 |
Returns:
|
| 63 |
-
PIL.Image.Image
|
| 64 |
"""
|
| 65 |
try:
|
| 66 |
-
# 1.
|
| 67 |
if remove_prefix and "," in base64_str:
|
| 68 |
base64_str = base64_str.split(",")[1]
|
| 69 |
|
| 70 |
-
# 2.
|
| 71 |
image_data = base64.b64decode(base64_str)
|
| 72 |
|
| 73 |
-
# 3.
|
| 74 |
image = Image.open(BytesIO(image_data))
|
| 75 |
|
| 76 |
-
# 4.
|
| 77 |
if convert_mode:
|
| 78 |
image = image.convert(convert_mode)
|
| 79 |
|
| 80 |
return image
|
| 81 |
|
| 82 |
except (base64.binascii.Error, OSError, Exception) as e:
|
| 83 |
-
print(f"Base64
|
| 84 |
return None
|
| 85 |
|
| 86 |
def process_message_to_sharegpt_format(message):
|
| 87 |
"""
|
| 88 |
-
|
| 89 |
-
|
| 90 |
Args:
|
| 91 |
-
message:
|
| 92 |
-
|
| 93 |
Returns:
|
| 94 |
-
ShareGPT
|
| 95 |
"""
|
| 96 |
sharegpt_images = []
|
| 97 |
sharegpt_conversation = []
|
|
@@ -135,13 +135,13 @@ def process_message_to_sharegpt_format(message):
|
|
| 135 |
|
| 136 |
def extract_images_from_messages(messages):
|
| 137 |
"""
|
| 138 |
-
|
| 139 |
-
|
| 140 |
Args:
|
| 141 |
-
messages:
|
| 142 |
-
|
| 143 |
Returns:
|
| 144 |
-
|
| 145 |
"""
|
| 146 |
images = []
|
| 147 |
|
|
@@ -151,7 +151,7 @@ def extract_images_from_messages(messages):
|
|
| 151 |
if content_item.get('type') == 'image_url':
|
| 152 |
image_url = content_item.get('image_url', {}).get('url', '')
|
| 153 |
if image_url.startswith('data:'):
|
| 154 |
-
#
|
| 155 |
image = base64_to_image(image_url)
|
| 156 |
if image:
|
| 157 |
images.append(image)
|
|
@@ -160,24 +160,24 @@ def extract_images_from_messages(messages):
|
|
| 160 |
|
| 161 |
def process_message(file_path):
|
| 162 |
try:
|
| 163 |
-
#
|
| 164 |
with open(file_path, "r", encoding="utf-8") as f:
|
| 165 |
messages = json.load(f)
|
| 166 |
|
| 167 |
-
#
|
| 168 |
images, messages = extract_images_from_messages(messages)
|
| 169 |
|
| 170 |
-
#
|
| 171 |
sharegpt_data = process_message_to_sharegpt_format(messages)
|
| 172 |
|
| 173 |
-
#
|
| 174 |
-
html_output = '<div style="color: black;">' #
|
| 175 |
|
| 176 |
for message_item in messages:
|
| 177 |
role = message_item['role']
|
| 178 |
content = message_item['content']
|
| 179 |
|
| 180 |
-
#
|
| 181 |
if role == "user" or role == "human":
|
| 182 |
html_output += f'<div style="background-color: #f0f0f0; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>User:</strong><br>'
|
| 183 |
elif role == "assistant":
|
|
@@ -185,19 +185,19 @@ def process_message(file_path):
|
|
| 185 |
else:
|
| 186 |
html_output += f'<div style="background-color: #f9f9f9; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>{role.capitalize()}:</strong><br>'
|
| 187 |
|
| 188 |
-
#
|
| 189 |
for content_item in content:
|
| 190 |
content_type = content_item['type']
|
| 191 |
|
| 192 |
if content_type == "text":
|
| 193 |
-
#
|
| 194 |
md_text = content_item['text']
|
| 195 |
html_text = markdown.markdown(md_text, extensions=['fenced_code', 'codehilite'])
|
| 196 |
html_output += f'<div style="color: black;">{html_text}</div>'
|
| 197 |
|
| 198 |
elif content_type == "image_url":
|
| 199 |
content_value = content_item['image_url']['url']
|
| 200 |
-
#
|
| 201 |
if content_value.startswith("data:"):
|
| 202 |
html_output += f'<img src="{content_value}" style="max-width: 100%; margin: 10px 0;">'
|
| 203 |
else:
|
|
@@ -205,7 +205,7 @@ def process_message(file_path):
|
|
| 205 |
|
| 206 |
html_output += '</div>'
|
| 207 |
|
| 208 |
-
html_output += '</div>' #
|
| 209 |
return html_output, images, messages, sharegpt_data
|
| 210 |
|
| 211 |
except Exception as e:
|
|
@@ -213,18 +213,18 @@ def process_message(file_path):
|
|
| 213 |
|
| 214 |
def upload_and_process(file):
|
| 215 |
if file is None:
|
| 216 |
-
return "
|
| 217 |
|
| 218 |
html_output, images, messages, sharegpt_data = process_message(file.name)
|
| 219 |
return html_output, images, messages, sharegpt_data
|
| 220 |
|
| 221 |
def use_example():
|
| 222 |
-
#
|
| 223 |
example_path = "test_message_gpt.json"
|
| 224 |
return process_message(example_path)
|
| 225 |
|
| 226 |
def handle_export_original(images, conversations):
|
| 227 |
-
"""
|
| 228 |
if not images or conversations is None:
|
| 229 |
return None
|
| 230 |
|
|
@@ -232,7 +232,7 @@ def handle_export_original(images, conversations):
|
|
| 232 |
return zip_path
|
| 233 |
|
| 234 |
def handle_export_sharegpt(sharegpt_data):
|
| 235 |
-
"""
|
| 236 |
if sharegpt_data is None:
|
| 237 |
return None
|
| 238 |
|
|
@@ -245,13 +245,13 @@ def handle_export_sharegpt(sharegpt_data):
|
|
| 245 |
zip_path = export_to_zip(images, conversations, "sharegpt")
|
| 246 |
return zip_path
|
| 247 |
|
| 248 |
-
#
|
| 249 |
def setup_example_file():
|
| 250 |
-
#
|
| 251 |
-
#
|
| 252 |
example_path = "test_message_gpt.json"
|
| 253 |
|
| 254 |
-
#
|
| 255 |
if not os.path.exists(example_path):
|
| 256 |
example_messages = [
|
| 257 |
{
|
|
@@ -259,7 +259,7 @@ def setup_example_file():
|
|
| 259 |
"content": [
|
| 260 |
{
|
| 261 |
"type": "text",
|
| 262 |
-
"text": "
|
| 263 |
}
|
| 264 |
]
|
| 265 |
},
|
|
@@ -268,7 +268,7 @@ def setup_example_file():
|
|
| 268 |
"content": [
|
| 269 |
{
|
| 270 |
"type": "text",
|
| 271 |
-
"text": "
|
| 272 |
}
|
| 273 |
]
|
| 274 |
}
|
|
@@ -277,38 +277,38 @@ def setup_example_file():
|
|
| 277 |
with open(example_path, "w", encoding="utf-8") as f:
|
| 278 |
json.dump(example_messages, f, ensure_ascii=False, indent=2)
|
| 279 |
|
| 280 |
-
#
|
| 281 |
setup_example_file()
|
| 282 |
|
| 283 |
-
#
|
| 284 |
with gr.Blocks(title="ChatGPT Conversation Visualizer", css="div.prose * {color: black !important;}") as demo:
|
| 285 |
-
gr.Markdown("# ChatGPT
|
| 286 |
-
gr.Markdown("
|
| 287 |
|
| 288 |
with gr.Row():
|
| 289 |
-
file_input = gr.File(label="
|
| 290 |
|
| 291 |
with gr.Row():
|
| 292 |
col1, col2 = gr.Column(), gr.Column()
|
| 293 |
with col1:
|
| 294 |
-
visualize_button = gr.Button("
|
| 295 |
with col2:
|
| 296 |
-
example_button = gr.Button("
|
| 297 |
|
| 298 |
with gr.Row():
|
| 299 |
-
output = gr.HTML(label="
|
| 300 |
|
| 301 |
-
#
|
| 302 |
with gr.Row():
|
| 303 |
with gr.Column():
|
| 304 |
-
export_original_btn = gr.Button("
|
| 305 |
-
download_original_file = gr.File(label="
|
| 306 |
|
| 307 |
with gr.Column():
|
| 308 |
-
export_sharegpt_btn = gr.Button("
|
| 309 |
-
download_sharegpt_file = gr.File(label="
|
| 310 |
|
| 311 |
-
#
|
| 312 |
current_images = gr.State([])
|
| 313 |
current_json = gr.State(None)
|
| 314 |
current_sharegpt = gr.State(None)
|
|
@@ -337,5 +337,5 @@ with gr.Blocks(title="ChatGPT Conversation Visualizer", css="div.prose * {color:
|
|
| 337 |
outputs=[download_sharegpt_file]
|
| 338 |
)
|
| 339 |
|
| 340 |
-
#
|
| 341 |
-
demo.launch()
|
|
|
|
| 13 |
|
| 14 |
def export_to_zip(images, conversations, format_type="original"):
|
| 15 |
"""
|
| 16 |
+
Export images and conversation data to a ZIP file
|
| 17 |
+
|
| 18 |
Args:
|
| 19 |
+
images: List of extracted images
|
| 20 |
+
conversations: Conversation JSON data
|
| 21 |
+
format_type: Format type, "original" or "sharegpt"
|
| 22 |
+
|
| 23 |
Returns:
|
| 24 |
+
Path to the generated ZIP file
|
| 25 |
"""
|
| 26 |
+
# Create a temporary directory
|
| 27 |
temp_dir = tempfile.mkdtemp()
|
| 28 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 29 |
zip_filename = os.path.join(temp_dir, f"export_{timestamp}.zip")
|
| 30 |
|
| 31 |
+
# Create a ZIP file
|
| 32 |
with zipfile.ZipFile(zip_filename, 'w') as zipf:
|
| 33 |
+
# Save images
|
| 34 |
for i, img in enumerate(images):
|
| 35 |
img_path = os.path.join(temp_dir, f"image_{i}.png")
|
| 36 |
img.save(img_path)
|
| 37 |
zipf.write(img_path, f"images/image_{i}.png")
|
| 38 |
+
os.remove(img_path) # Delete temporary image file
|
| 39 |
|
| 40 |
+
# Save conversation data
|
| 41 |
json_path = os.path.join(temp_dir, "conversations.json")
|
| 42 |
with open(json_path, 'w', encoding='utf-8') as f:
|
| 43 |
json.dump(conversations, f, ensure_ascii=False, indent=4)
|
| 44 |
zipf.write(json_path, "conversations.json")
|
| 45 |
+
os.remove(json_path) # Delete temporary JSON file
|
| 46 |
|
| 47 |
return zip_filename
|
| 48 |
|
|
|
|
| 52 |
convert_mode: Optional[str] = "RGB"
|
| 53 |
) -> Union[Image.Image, None]:
|
| 54 |
"""
|
| 55 |
+
Convert a base64 encoded image string to a PIL Image object
|
| 56 |
+
|
| 57 |
Args:
|
| 58 |
+
base64_str: Base64 encoded image string (with or without data: prefix)
|
| 59 |
+
remove_prefix: Whether to automatically remove the "data:image/..." prefix (default True)
|
| 60 |
+
convert_mode: Convert to the specified mode (e.g., "RGB"/"RGBA", None means no conversion)
|
| 61 |
+
|
| 62 |
Returns:
|
| 63 |
+
PIL.Image.Image object, returns None if decoding fails
|
| 64 |
"""
|
| 65 |
try:
|
| 66 |
+
# 1. Handle Base64 prefix
|
| 67 |
if remove_prefix and "," in base64_str:
|
| 68 |
base64_str = base64_str.split(",")[1]
|
| 69 |
|
| 70 |
+
# 2. Decode Base64
|
| 71 |
image_data = base64.b64decode(base64_str)
|
| 72 |
|
| 73 |
+
# 3. Convert to PIL Image
|
| 74 |
image = Image.open(BytesIO(image_data))
|
| 75 |
|
| 76 |
+
# 4. Optional mode conversion
|
| 77 |
if convert_mode:
|
| 78 |
image = image.convert(convert_mode)
|
| 79 |
|
| 80 |
return image
|
| 81 |
|
| 82 |
except (base64.binascii.Error, OSError, Exception) as e:
|
| 83 |
+
print(f"Base64 decoding failed: {str(e)}")
|
| 84 |
return None
|
| 85 |
|
| 86 |
def process_message_to_sharegpt_format(message):
|
| 87 |
"""
|
| 88 |
+
Convert messages to ShareGPT format
|
| 89 |
+
|
| 90 |
Args:
|
| 91 |
+
message: Original message data
|
| 92 |
+
|
| 93 |
Returns:
|
| 94 |
+
Data in ShareGPT format
|
| 95 |
"""
|
| 96 |
sharegpt_images = []
|
| 97 |
sharegpt_conversation = []
|
|
|
|
| 135 |
|
| 136 |
def extract_images_from_messages(messages):
|
| 137 |
"""
|
| 138 |
+
Extract all images from messages
|
| 139 |
+
|
| 140 |
Args:
|
| 141 |
+
messages: Message JSON data
|
| 142 |
+
|
| 143 |
Returns:
|
| 144 |
+
Extracted image list and updated messages
|
| 145 |
"""
|
| 146 |
images = []
|
| 147 |
|
|
|
|
| 151 |
if content_item.get('type') == 'image_url':
|
| 152 |
image_url = content_item.get('image_url', {}).get('url', '')
|
| 153 |
if image_url.startswith('data:'):
|
| 154 |
+
# Extract base64 image
|
| 155 |
image = base64_to_image(image_url)
|
| 156 |
if image:
|
| 157 |
images.append(image)
|
|
|
|
| 160 |
|
| 161 |
def process_message(file_path):
|
| 162 |
try:
|
| 163 |
+
# Read JSON file
|
| 164 |
with open(file_path, "r", encoding="utf-8") as f:
|
| 165 |
messages = json.load(f)
|
| 166 |
|
| 167 |
+
# Extract images
|
| 168 |
images, messages = extract_images_from_messages(messages)
|
| 169 |
|
| 170 |
+
# Convert to ShareGPT format
|
| 171 |
sharegpt_data = process_message_to_sharegpt_format(messages)
|
| 172 |
|
| 173 |
+
# Create HTML output
|
| 174 |
+
html_output = '<div style="color: black;">' # Add a wrapper div for all content, set text color black
|
| 175 |
|
| 176 |
for message_item in messages:
|
| 177 |
role = message_item['role']
|
| 178 |
content = message_item['content']
|
| 179 |
|
| 180 |
+
# Style based on role
|
| 181 |
if role == "user" or role == "human":
|
| 182 |
html_output += f'<div style="background-color: #f0f0f0; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>User:</strong><br>'
|
| 183 |
elif role == "assistant":
|
|
|
|
| 185 |
else:
|
| 186 |
html_output += f'<div style="background-color: #f9f9f9; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>{role.capitalize()}:</strong><br>'
|
| 187 |
|
| 188 |
+
# Handle content
|
| 189 |
for content_item in content:
|
| 190 |
content_type = content_item['type']
|
| 191 |
|
| 192 |
if content_type == "text":
|
| 193 |
+
# Convert Markdown text to HTML
|
| 194 |
md_text = content_item['text']
|
| 195 |
html_text = markdown.markdown(md_text, extensions=['fenced_code', 'codehilite'])
|
| 196 |
html_output += f'<div style="color: black;">{html_text}</div>'
|
| 197 |
|
| 198 |
elif content_type == "image_url":
|
| 199 |
content_value = content_item['image_url']['url']
|
| 200 |
+
# If base64 image
|
| 201 |
if content_value.startswith("data:"):
|
| 202 |
html_output += f'<img src="{content_value}" style="max-width: 100%; margin: 10px 0;">'
|
| 203 |
else:
|
|
|
|
| 205 |
|
| 206 |
html_output += '</div>'
|
| 207 |
|
| 208 |
+
html_output += '</div>' # Close outermost div
|
| 209 |
return html_output, images, messages, sharegpt_data
|
| 210 |
|
| 211 |
except Exception as e:
|
|
|
|
| 213 |
|
| 214 |
def upload_and_process(file):
|
| 215 |
if file is None:
|
| 216 |
+
return "Please upload a JSON file", [], None, None
|
| 217 |
|
| 218 |
html_output, images, messages, sharegpt_data = process_message(file.name)
|
| 219 |
return html_output, images, messages, sharegpt_data
|
| 220 |
|
| 221 |
def use_example():
|
| 222 |
+
# Use example file
|
| 223 |
example_path = "test_message_gpt.json"
|
| 224 |
return process_message(example_path)
|
| 225 |
|
| 226 |
def handle_export_original(images, conversations):
|
| 227 |
+
"""Handle export request for original format"""
|
| 228 |
if not images or conversations is None:
|
| 229 |
return None
|
| 230 |
|
|
|
|
| 232 |
return zip_path
|
| 233 |
|
| 234 |
def handle_export_sharegpt(sharegpt_data):
|
| 235 |
+
"""Handle export request for ShareGPT format"""
|
| 236 |
if sharegpt_data is None:
|
| 237 |
return None
|
| 238 |
|
|
|
|
| 245 |
zip_path = export_to_zip(images, conversations, "sharegpt")
|
| 246 |
return zip_path
|
| 247 |
|
| 248 |
+
# Ensure example file exists
|
| 249 |
def setup_example_file():
|
| 250 |
+
# Here we need to create the example file because we don't have actual content
|
| 251 |
+
# In a real application, you should place the original test_message_gpt.json file in the root directory
|
| 252 |
example_path = "test_message_gpt.json"
|
| 253 |
|
| 254 |
+
# Create a simple example if the file does not exist
|
| 255 |
if not os.path.exists(example_path):
|
| 256 |
example_messages = [
|
| 257 |
{
|
|
|
|
| 259 |
"content": [
|
| 260 |
{
|
| 261 |
"type": "text",
|
| 262 |
+
"text": "Hello, please introduce yourself."
|
| 263 |
}
|
| 264 |
]
|
| 265 |
},
|
|
|
|
| 268 |
"content": [
|
| 269 |
{
|
| 270 |
"type": "text",
|
| 271 |
+
"text": "Hello! I am an AI assistant. I can help answer questions, provide information, and have conversations. I am designed to assist users with a variety of tasks, from simple Q&A to more complex discussions.\n\nI can handle text information and also understand and describe images. Although I have some limitations, I will do my best to provide useful, accurate, and helpful responses.\n\nHow can I help you today?"
|
| 272 |
}
|
| 273 |
]
|
| 274 |
}
|
|
|
|
| 277 |
with open(example_path, "w", encoding="utf-8") as f:
|
| 278 |
json.dump(example_messages, f, ensure_ascii=False, indent=2)
|
| 279 |
|
| 280 |
+
# Set up the example file
|
| 281 |
setup_example_file()
|
| 282 |
|
| 283 |
+
# Create Gradio interface
|
| 284 |
with gr.Blocks(title="ChatGPT Conversation Visualizer", css="div.prose * {color: black !important;}") as demo:
|
| 285 |
+
gr.Markdown("# ChatGPT Conversation Visualization Tool")
|
| 286 |
+
gr.Markdown("Upload a JSON file containing ChatGPT conversation records or use the example file to view visualization results.")
|
| 287 |
|
| 288 |
with gr.Row():
|
| 289 |
+
file_input = gr.File(label="Upload JSON File", file_types=[".json"])
|
| 290 |
|
| 291 |
with gr.Row():
|
| 292 |
col1, col2 = gr.Column(), gr.Column()
|
| 293 |
with col1:
|
| 294 |
+
visualize_button = gr.Button("Visualize Uploaded Conversation")
|
| 295 |
with col2:
|
| 296 |
+
example_button = gr.Button("Use Example File")
|
| 297 |
|
| 298 |
with gr.Row():
|
| 299 |
+
output = gr.HTML(label="Conversation Content")
|
| 300 |
|
| 301 |
+
# Add export buttons
|
| 302 |
with gr.Row():
|
| 303 |
with gr.Column():
|
| 304 |
+
export_original_btn = gr.Button("Export Original Format")
|
| 305 |
+
download_original_file = gr.File(label="Download Original Format ZIP")
|
| 306 |
|
| 307 |
with gr.Column():
|
| 308 |
+
export_sharegpt_btn = gr.Button("Export ShareGPT Format")
|
| 309 |
+
download_sharegpt_file = gr.File(label="Download ShareGPT Format ZIP")
|
| 310 |
|
| 311 |
+
# State variables to store current results
|
| 312 |
current_images = gr.State([])
|
| 313 |
current_json = gr.State(None)
|
| 314 |
current_sharegpt = gr.State(None)
|
|
|
|
| 337 |
outputs=[download_sharegpt_file]
|
| 338 |
)
|
| 339 |
|
| 340 |
+
# Launch Gradio app
|
| 341 |
+
demo.launch()
|