update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
from transformers import pipeline
|
| 2 |
from rcsbsearchapi import AttributeQuery
|
| 3 |
-
from rcsbsearchapi.search import SequenceQuery
|
| 4 |
import os
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from shiny import App, render, ui, reactive
|
|
|
|
| 7 |
import pandas as pd
|
| 8 |
import warnings
|
| 9 |
import re
|
|
@@ -22,7 +23,7 @@ warnings.filterwarnings('ignore')
|
|
| 22 |
load_dotenv()
|
| 23 |
|
| 24 |
class PDBSearchAssistant:
|
| 25 |
-
def __init__(self, model_name="google/flan-t5-large"):
|
| 26 |
# Set up HuggingFace pipeline with better model
|
| 27 |
self.pipe = pipeline(
|
| 28 |
"text2text-generation",
|
|
@@ -30,7 +31,7 @@ class PDBSearchAssistant:
|
|
| 30 |
max_new_tokens=1024,
|
| 31 |
temperature=0.1,
|
| 32 |
torch_dtype="auto",
|
| 33 |
-
device="cuda
|
| 34 |
)
|
| 35 |
|
| 36 |
self.prompt_template = """
|
|
@@ -159,18 +160,26 @@ class PDBSearchAssistant:
|
|
| 159 |
# Clean and normalize remaining response
|
| 160 |
# Remove all resolution entries to avoid confusion
|
| 161 |
cleaned_response = re.sub(r'[Rr]esolution:\s*\d+(?:\.\d+)?(?:\s*Å?)?\s*', '', response)
|
| 162 |
-
|
|
|
|
| 163 |
# Split remaining response into clean key-value pairs
|
| 164 |
response_pairs = {}
|
| 165 |
for pair in re.finditer(r'(\w+):\s*([^:]+?)(?=\s+\w+:|$)', cleaned_response):
|
| 166 |
key, value = pair.groups()
|
|
|
|
| 167 |
key = key.lower()
|
| 168 |
value = value.strip()
|
| 169 |
if value.lower() not in ['none', 'n/a']:
|
| 170 |
response_pairs[key] = value
|
| 171 |
|
| 172 |
print("Parsed response pairs:", response_pairs) # Debug print
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
# Extract sequence and similarity from cleaned pairs
|
| 175 |
if 'sequence' in response_pairs:
|
| 176 |
sequence = response_pairs['sequence']
|
|
@@ -241,7 +250,7 @@ class PDBSearchAssistant:
|
|
| 241 |
protein_entity_query = AttributeQuery(
|
| 242 |
attribute="rcsb_entity_container_identifiers.entity_names.value",
|
| 243 |
operator="contains_words",
|
| 244 |
-
value=protein_name
|
| 245 |
)
|
| 246 |
queries.append(protein_entity_query)
|
| 247 |
|
|
@@ -342,18 +351,20 @@ class PDBSearchAssistant:
|
|
| 342 |
print("Final query:", final_query)
|
| 343 |
|
| 344 |
# Execute search
|
| 345 |
-
session = final_query.exec()
|
| 346 |
results = []
|
| 347 |
|
| 348 |
# Process results with additional information
|
| 349 |
# search_engine = ProteinSearchEngine()
|
| 350 |
|
|
|
|
| 351 |
try:
|
| 352 |
for entry in session:
|
| 353 |
try:
|
| 354 |
# PDB ID 추출 방식 개선
|
| 355 |
if isinstance(entry, dict):
|
| 356 |
-
|
|
|
|
| 357 |
elif hasattr(entry, 'identifier'):
|
| 358 |
pdb_id = entry.identifier
|
| 359 |
else:
|
|
@@ -363,7 +374,11 @@ class PDBSearchAssistant:
|
|
| 363 |
|
| 364 |
if not pdb_id or len(pdb_id) != 4: # PDB ID는 항상 4자리
|
| 365 |
continue
|
| 366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
# RCSB PDB REST API를 직접 사용하여 구조 정보 가져오기
|
| 368 |
structure_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
|
| 369 |
response = requests.get(structure_url)
|
|
@@ -381,20 +396,18 @@ class PDBSearchAssistant:
|
|
| 381 |
'Resolution': f"{structure_data.get('rcsb_entry_info', {}).get('resolution_combined', [0.0])[0]:.2f}Å",
|
| 382 |
'Method': structure_data.get('exptl', [{}])[0].get('method', 'Unknown'),
|
| 383 |
'Release Date': structure_data.get('rcsb_accession_info', {}).get('initial_release_date', 'N/A')
|
| 384 |
-
|
| 385 |
-
|
| 386 |
}
|
| 387 |
|
| 388 |
results.append(result)
|
| 389 |
-
|
| 390 |
-
# Limit to
|
| 391 |
-
if len(results) >=
|
| 392 |
break
|
| 393 |
|
| 394 |
except Exception as e:
|
| 395 |
print(f"Error processing entry: {str(e)}")
|
| 396 |
continue
|
| 397 |
-
|
| 398 |
except Exception as e:
|
| 399 |
print(f"Error processing results: {str(e)}")
|
| 400 |
print(f"Error type: {type(e)}")
|
|
@@ -421,46 +434,45 @@ class PDBSearchAssistant:
|
|
| 421 |
pdb_path = self.pdbl.retrieve_pdb_file(
|
| 422 |
pdb_id,
|
| 423 |
pdir=self.pdb_dir,
|
| 424 |
-
file_format="pdb"
|
| 425 |
)
|
| 426 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
if not pdb_path or not os.path.exists(pdb_path):
|
| 428 |
print(f"Failed to download PDB file for {pdb_id}")
|
| 429 |
|
| 430 |
-
structure_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
|
| 431 |
-
response = requests.get(structure_url)
|
| 432 |
-
structure_data = response.json() if response.status_code == 200 else {}
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
sequence_url = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/1"
|
| 436 |
-
seq_response = requests.get(sequence_url)
|
| 437 |
-
seq_data = seq_response.json() if response.status_code == 200 else {}
|
| 438 |
-
sequence = seq_data.get('entity_poly', {}).get('pdbx_seq_one_letter_code', 'N/A')
|
| 439 |
-
|
| 440 |
sequences = []
|
| 441 |
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
return sequences
|
| 454 |
|
| 455 |
# Parse structure
|
| 456 |
parser = PDB.PDBParser(QUIET=True)
|
| 457 |
structure = parser.get_structure(pdb_id, pdb_path)
|
| 458 |
|
| 459 |
-
# Get structure info from RCSB API for additional details
|
| 460 |
-
structure_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
|
| 461 |
-
response = requests.get(structure_url)
|
| 462 |
-
structure_data = response.json() if response.status_code == 200 else {}
|
| 463 |
-
|
| 464 |
sequences = []
|
| 465 |
# Extract sequences from each chain
|
| 466 |
for model in structure:
|
|
@@ -543,9 +555,29 @@ class PDBSearchAssistant:
|
|
| 543 |
print(f"Error processing query: {str(e)}")
|
| 544 |
return {"type": "structure", "results": []}
|
| 545 |
|
| 546 |
-
def render_html(pdb_id):
|
| 547 |
-
if pdb_id is None:
|
| 548 |
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 549 |
html_content = f"""
|
| 550 |
<!DOCTYPE html>
|
| 551 |
<html>
|
|
@@ -561,12 +593,16 @@ def render_html(pdb_id):
|
|
| 561 |
</style>
|
| 562 |
</head>
|
| 563 |
<body>
|
|
|
|
|
|
|
|
|
|
| 564 |
<div class="viewer_3Dmoljs"
|
| 565 |
data-pdb="{pdb_id}"
|
| 566 |
data-backgroundcolor="0xffffff"
|
| 567 |
data-style="cartoon:color=spectrum"
|
| 568 |
data-spin="axis:y;speed:0.2">
|
| 569 |
</div>
|
|
|
|
| 570 |
</body>
|
| 571 |
</html>
|
| 572 |
"""
|
|
@@ -582,49 +618,14 @@ def render_html(pdb_id):
|
|
| 582 |
return f'<iframe style="width: 100%; height: 480px; border: none;" srcdoc=\'{escaped_content}\'></iframe>'
|
| 583 |
|
| 584 |
def create_interactive_table(df):
|
| 585 |
-
if df.empty:
|
| 586 |
-
return go.Figure()
|
| 587 |
-
|
| 588 |
# Reorder columns - Add '# of atoms of protein' to the column order
|
| 589 |
column_order = ['PDB ID', 'Resolution', 'Title','# of total residues', '# of atoms of protein', 'Method','Release Date']
|
| 590 |
df = df[column_order]
|
| 591 |
|
| 592 |
# Release Date 형식 변경 (YYYY-MM-DD)
|
| 593 |
df['Release Date'] = pd.to_datetime(df['Release Date']).dt.strftime('%Y-%m-%d')
|
|
|
|
| 594 |
|
| 595 |
-
# Create interactive table
|
| 596 |
-
table = go.Figure(data=[go.Table(
|
| 597 |
-
header=dict(
|
| 598 |
-
values=list(df.columns),
|
| 599 |
-
fill_color='paleturquoise',
|
| 600 |
-
align='center',
|
| 601 |
-
font=dict(size=16),
|
| 602 |
-
),
|
| 603 |
-
cells=dict(
|
| 604 |
-
values=[
|
| 605 |
-
[f'<a href="https://www.rcsb.org/structure/{cell}">{cell}</a>'
|
| 606 |
-
if i == 0 else cell
|
| 607 |
-
for cell in df[col]]
|
| 608 |
-
for i, col in enumerate(df.columns)
|
| 609 |
-
],
|
| 610 |
-
align='center',
|
| 611 |
-
font=dict(size=15),
|
| 612 |
-
height=35
|
| 613 |
-
),
|
| 614 |
-
columnwidth=[80, 80, 400, 100, 100, 100, 100], # Updated columnwidth to include new column
|
| 615 |
-
customdata=[['html'] * len(df) if i == 0 else [''] * len(df)
|
| 616 |
-
for i in range(len(df.columns))],
|
| 617 |
-
hoverlabel=dict(bgcolor='white')
|
| 618 |
-
)])
|
| 619 |
-
|
| 620 |
-
# Update table layout
|
| 621 |
-
table.update_layout(
|
| 622 |
-
margin=dict(l=20, r=20, t=20, b=20),
|
| 623 |
-
height=450,
|
| 624 |
-
autosize=True
|
| 625 |
-
)
|
| 626 |
-
|
| 627 |
-
return table
|
| 628 |
|
| 629 |
# Simplified Shiny app UI definition
|
| 630 |
app_ui = ui.page_fluid(
|
|
@@ -643,6 +644,15 @@ app_ui = ui.page_fluid(
|
|
| 643 |
color: #0a58ca;
|
| 644 |
text-decoration: underline;
|
| 645 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 646 |
.shiny-input-container {
|
| 647 |
max-width: 100%;
|
| 648 |
margin: 0 auto;
|
|
@@ -970,10 +980,13 @@ app_ui = ui.page_fluid(
|
|
| 970 |
align-items: ;
|
| 971 |
justify-content: flex-start;
|
| 972 |
gap: 5px;
|
|
|
|
| 973 |
margin-bottom: 20px;
|
| 974 |
margin-left: 20px;
|
| 975 |
}
|
| 976 |
-
|
|
|
|
|
|
|
| 977 |
.pdb-select-label {
|
| 978 |
font-weight: bold;
|
| 979 |
margin: 0;
|
|
@@ -1046,6 +1059,7 @@ app_ui = ui.page_fluid(
|
|
| 1046 |
ui.tags.ul(
|
| 1047 |
ui.tags.li("Sequence of PDB ID 8ET6"),
|
| 1048 |
ui.tags.li("Spike protein"),
|
|
|
|
| 1049 |
ui.tags.li("Human insulin"),
|
| 1050 |
ui.tags.li("Human hemoglobin C resolution better than 2.5Å"),
|
| 1051 |
ui.tags.li("Find structures containing sequence with similarity 90% FVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKR"),
|
|
@@ -1060,8 +1074,11 @@ app_ui = ui.page_fluid(
|
|
| 1060 |
ui.column(12,
|
| 1061 |
ui.div(
|
| 1062 |
{"class": "results-section"},
|
| 1063 |
-
ui.h4("
|
| 1064 |
-
|
|
|
|
|
|
|
|
|
|
| 1065 |
ui.download_button("download", "Download Results",
|
| 1066 |
class_="btn btn-info")
|
| 1067 |
)
|
|
@@ -1075,7 +1092,7 @@ app_ui = ui.page_fluid(
|
|
| 1075 |
"Select PDB ID",
|
| 1076 |
{"class": "pdb-select-label"}
|
| 1077 |
),
|
| 1078 |
-
ui.
|
| 1079 |
"selected_pdb",
|
| 1080 |
"", # Label is empty as we're using a separate label
|
| 1081 |
choices=[],
|
|
@@ -1144,21 +1161,21 @@ def server(input, output, session):
|
|
| 1144 |
status_store.set("Ready")
|
| 1145 |
pdb_ids = df['PDB ID'].tolist()
|
| 1146 |
@output
|
| 1147 |
-
@render_widget
|
| 1148 |
def results_table():
|
| 1149 |
-
return create_interactive_table(df)
|
| 1150 |
|
| 1151 |
if pdb_ids:
|
| 1152 |
pdb_ids_store.set(pdb_ids)
|
| 1153 |
# Update only one dropdown
|
| 1154 |
-
ui.
|
| 1155 |
"selected_pdb",
|
| 1156 |
choices=pdb_ids,
|
| 1157 |
-
selected=pdb_ids[0]
|
| 1158 |
)
|
| 1159 |
else:
|
| 1160 |
pdb_ids_store.set([])
|
| 1161 |
-
ui.
|
| 1162 |
"selected_pdb",
|
| 1163 |
choices=[],
|
| 1164 |
selected=None
|
|
@@ -1202,8 +1219,11 @@ def server(input, output, session):
|
|
| 1202 |
@render.ui
|
| 1203 |
def output_iframe():
|
| 1204 |
selected_pdb = input.selected_pdb()
|
|
|
|
|
|
|
|
|
|
| 1205 |
if selected_pdb:
|
| 1206 |
-
return ui.HTML(render_html(selected_pdb))
|
| 1207 |
return ui.HTML("")
|
| 1208 |
|
| 1209 |
@output
|
|
|
|
| 1 |
from transformers import pipeline
|
| 2 |
from rcsbsearchapi import AttributeQuery
|
| 3 |
+
from rcsbsearchapi.search import SequenceQuery, SeqMotifQuery
|
| 4 |
import os
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from shiny import App, render, ui, reactive
|
| 7 |
+
from itables.shiny import DT
|
| 8 |
import pandas as pd
|
| 9 |
import warnings
|
| 10 |
import re
|
|
|
|
| 23 |
load_dotenv()
|
| 24 |
|
| 25 |
class PDBSearchAssistant:
|
| 26 |
+
def __init__(self, model_name="google/flan-t5-large"): # google/flan-t5-large or Rostlab/prot_t5_xl_uniref50 11GB
|
| 27 |
# Set up HuggingFace pipeline with better model
|
| 28 |
self.pipe = pipeline(
|
| 29 |
"text2text-generation",
|
|
|
|
| 31 |
max_new_tokens=1024,
|
| 32 |
temperature=0.1,
|
| 33 |
torch_dtype="auto",
|
| 34 |
+
device="cpu" # cuda or cpu
|
| 35 |
)
|
| 36 |
|
| 37 |
self.prompt_template = """
|
|
|
|
| 160 |
# Clean and normalize remaining response
|
| 161 |
# Remove all resolution entries to avoid confusion
|
| 162 |
cleaned_response = re.sub(r'[Rr]esolution:\s*\d+(?:\.\d+)?(?:\s*Å?)?\s*', '', response)
|
| 163 |
+
print("cleaned_responese :", cleaned_response)
|
| 164 |
+
|
| 165 |
# Split remaining response into clean key-value pairs
|
| 166 |
response_pairs = {}
|
| 167 |
for pair in re.finditer(r'(\w+):\s*([^:]+?)(?=\s+\w+:|$)', cleaned_response):
|
| 168 |
key, value = pair.groups()
|
| 169 |
+
print(key, value)
|
| 170 |
key = key.lower()
|
| 171 |
value = value.strip()
|
| 172 |
if value.lower() not in ['none', 'n/a']:
|
| 173 |
response_pairs[key] = value
|
| 174 |
|
| 175 |
print("Parsed response pairs:", response_pairs) # Debug print
|
| 176 |
+
|
| 177 |
+
# case LLM remove all input, if input has any param word -> replace input to value
|
| 178 |
+
if not response_pairs:
|
| 179 |
+
if 'protein' in response:
|
| 180 |
+
response_pairs['protein'] = response
|
| 181 |
+
print("Replaced response pairs:", response_pairs) # Debug print
|
| 182 |
+
|
| 183 |
# Extract sequence and similarity from cleaned pairs
|
| 184 |
if 'sequence' in response_pairs:
|
| 185 |
sequence = response_pairs['sequence']
|
|
|
|
| 250 |
protein_entity_query = AttributeQuery(
|
| 251 |
attribute="rcsb_entity_container_identifiers.entity_names.value",
|
| 252 |
operator="contains_words",
|
| 253 |
+
value=protein_name
|
| 254 |
)
|
| 255 |
queries.append(protein_entity_query)
|
| 256 |
|
|
|
|
| 351 |
print("Final query:", final_query)
|
| 352 |
|
| 353 |
# Execute search
|
| 354 |
+
session = final_query.exec(results_verbosity="minimal") # query return identifier, score
|
| 355 |
results = []
|
| 356 |
|
| 357 |
# Process results with additional information
|
| 358 |
# search_engine = ProteinSearchEngine()
|
| 359 |
|
| 360 |
+
|
| 361 |
try:
|
| 362 |
for entry in session:
|
| 363 |
try:
|
| 364 |
# PDB ID 추출 방식 개선
|
| 365 |
if isinstance(entry, dict):
|
| 366 |
+
if entry.get('score') > 0.75:
|
| 367 |
+
pdb_id = entry.get('identifier')
|
| 368 |
elif hasattr(entry, 'identifier'):
|
| 369 |
pdb_id = entry.identifier
|
| 370 |
else:
|
|
|
|
| 374 |
|
| 375 |
if not pdb_id or len(pdb_id) != 4: # PDB ID는 항상 4자리
|
| 376 |
continue
|
| 377 |
+
|
| 378 |
+
# thresh hold
|
| 379 |
+
if len(results) > 1 and results[-1]["PDB ID"] == pdb_id:
|
| 380 |
+
break
|
| 381 |
+
|
| 382 |
# RCSB PDB REST API를 직접 사용하여 구조 정보 가져오기
|
| 383 |
structure_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
|
| 384 |
response = requests.get(structure_url)
|
|
|
|
| 396 |
'Resolution': f"{structure_data.get('rcsb_entry_info', {}).get('resolution_combined', [0.0])[0]:.2f}Å",
|
| 397 |
'Method': structure_data.get('exptl', [{}])[0].get('method', 'Unknown'),
|
| 398 |
'Release Date': structure_data.get('rcsb_accession_info', {}).get('initial_release_date', 'N/A')
|
|
|
|
|
|
|
| 399 |
}
|
| 400 |
|
| 401 |
results.append(result)
|
| 402 |
+
|
| 403 |
+
# Limit to max 500
|
| 404 |
+
if len(results) >= 500:
|
| 405 |
break
|
| 406 |
|
| 407 |
except Exception as e:
|
| 408 |
print(f"Error processing entry: {str(e)}")
|
| 409 |
continue
|
| 410 |
+
|
| 411 |
except Exception as e:
|
| 412 |
print(f"Error processing results: {str(e)}")
|
| 413 |
print(f"Error type: {type(e)}")
|
|
|
|
| 434 |
pdb_path = self.pdbl.retrieve_pdb_file(
|
| 435 |
pdb_id,
|
| 436 |
pdir=self.pdb_dir,
|
| 437 |
+
file_format="pdb"
|
| 438 |
)
|
| 439 |
|
| 440 |
+
# Get structure info from RCSB API for additional details
|
| 441 |
+
structure_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
|
| 442 |
+
response = requests.get(structure_url)
|
| 443 |
+
structure_data = response.json() if response.status_code == 200 else {}
|
| 444 |
+
|
| 445 |
if not pdb_path or not os.path.exists(pdb_path):
|
| 446 |
print(f"Failed to download PDB file for {pdb_id}")
|
| 447 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
sequences = []
|
| 449 |
|
| 450 |
+
entity_ids = structure_data.get('rcsb_entry_container_identifiers', {}).get('polymer_entity_ids', {})
|
| 451 |
+
for i in entity_ids:
|
| 452 |
+
sequence_url = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{i}"
|
| 453 |
+
seq_response = requests.get(sequence_url)
|
| 454 |
+
seq_data = seq_response.json() if response.status_code == 200 else {}
|
| 455 |
+
sequence = seq_data.get('entity_poly', {}).get('pdbx_seq_one_letter_code_can', 'N/A') # pdbx_seq_one_letter_code
|
| 456 |
+
|
| 457 |
+
chain_info = {
|
| 458 |
+
'chain_id': seq_data.get('entity_poly', {}).get('pdbx_strand_id', 'N/A'), # chain.id
|
| 459 |
+
'entity_id': i, # Default entity ID
|
| 460 |
+
'description': structure_data.get('struct', {}).get('title', 'N/A'),
|
| 461 |
+
'sequence': sequence,
|
| 462 |
+
'length': len(sequence),
|
| 463 |
+
'resolution': structure_data.get('rcsb_entry_info', {}).get('resolution_combined', [0.0])[0],
|
| 464 |
+
'method': structure_data.get('exptl', [{}])[0].get('method', 'Unknown'),
|
| 465 |
+
'release_date': structure_data.get('rcsb_accession_info', {}).get('initial_release_date', 'N/A')
|
| 466 |
+
}
|
| 467 |
+
sequences.append(chain_info)
|
| 468 |
+
print("not Bio pdb list")
|
| 469 |
+
|
| 470 |
return sequences
|
| 471 |
|
| 472 |
# Parse structure
|
| 473 |
parser = PDB.PDBParser(QUIET=True)
|
| 474 |
structure = parser.get_structure(pdb_id, pdb_path)
|
| 475 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
sequences = []
|
| 477 |
# Extract sequences from each chain
|
| 478 |
for model in structure:
|
|
|
|
| 555 |
print(f"Error processing query: {str(e)}")
|
| 556 |
return {"type": "structure", "results": []}
|
| 557 |
|
| 558 |
+
def render_html(pdb_id, chain_count):
|
| 559 |
+
if pdb_id is None or chain_count <= 0:
|
| 560 |
return ""
|
| 561 |
+
|
| 562 |
+
chains = [chr(65 + i) for i in range(chain_count)]
|
| 563 |
+
|
| 564 |
+
# chain block
|
| 565 |
+
chain_html_blocks = "".join([
|
| 566 |
+
f"""
|
| 567 |
+
<div>
|
| 568 |
+
{pdb_id} {chain}
|
| 569 |
+
</div>
|
| 570 |
+
<div class="viewer_3Dmoljs"
|
| 571 |
+
data-pdb="{pdb_id}"
|
| 572 |
+
data-select="chain:{chain}"
|
| 573 |
+
data-backgroundcolor="0xffffff"
|
| 574 |
+
data-style="cartoon:color=spectrum"
|
| 575 |
+
data-spin="axis:y;speed:0.2">
|
| 576 |
+
</div>
|
| 577 |
+
"""
|
| 578 |
+
for chain in chains
|
| 579 |
+
])
|
| 580 |
+
|
| 581 |
html_content = f"""
|
| 582 |
<!DOCTYPE html>
|
| 583 |
<html>
|
|
|
|
| 593 |
</style>
|
| 594 |
</head>
|
| 595 |
<body>
|
| 596 |
+
<div>
|
| 597 |
+
{pdb_id}
|
| 598 |
+
</div>
|
| 599 |
<div class="viewer_3Dmoljs"
|
| 600 |
data-pdb="{pdb_id}"
|
| 601 |
data-backgroundcolor="0xffffff"
|
| 602 |
data-style="cartoon:color=spectrum"
|
| 603 |
data-spin="axis:y;speed:0.2">
|
| 604 |
</div>
|
| 605 |
+
{chain_html_blocks}
|
| 606 |
</body>
|
| 607 |
</html>
|
| 608 |
"""
|
|
|
|
| 618 |
return f'<iframe style="width: 100%; height: 480px; border: none;" srcdoc=\'{escaped_content}\'></iframe>'
|
| 619 |
|
| 620 |
def create_interactive_table(df):
|
|
|
|
|
|
|
|
|
|
| 621 |
# Reorder columns - Add '# of atoms of protein' to the column order
|
| 622 |
column_order = ['PDB ID', 'Resolution', 'Title','# of total residues', '# of atoms of protein', 'Method','Release Date']
|
| 623 |
df = df[column_order]
|
| 624 |
|
| 625 |
# Release Date 형식 변경 (YYYY-MM-DD)
|
| 626 |
df['Release Date'] = pd.to_datetime(df['Release Date']).dt.strftime('%Y-%m-%d')
|
| 627 |
+
return df
|
| 628 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 629 |
|
| 630 |
# Simplified Shiny app UI definition
|
| 631 |
app_ui = ui.page_fluid(
|
|
|
|
| 644 |
color: #0a58ca;
|
| 645 |
text-decoration: underline;
|
| 646 |
}
|
| 647 |
+
.dt-layout-cell {
|
| 648 |
+
overflow-x: auto;
|
| 649 |
+
max-width :100%;
|
| 650 |
+
max-height: 600px;
|
| 651 |
+
}
|
| 652 |
+
table colgroup col[data-dt-column="2"] {
|
| 653 |
+
width: 450px !important;
|
| 654 |
+
min-width: 450px !important;
|
| 655 |
+
}
|
| 656 |
.shiny-input-container {
|
| 657 |
max-width: 100%;
|
| 658 |
margin: 0 auto;
|
|
|
|
| 980 |
align-items: ;
|
| 981 |
justify-content: flex-start;
|
| 982 |
gap: 5px;
|
| 983 |
+
margin-top: 20px;
|
| 984 |
margin-bottom: 20px;
|
| 985 |
margin-left: 20px;
|
| 986 |
}
|
| 987 |
+
.pdb-selector .form-group.shiny-input-container{
|
| 988 |
+
margin-left: 250px;
|
| 989 |
+
}
|
| 990 |
.pdb-select-label {
|
| 991 |
font-weight: bold;
|
| 992 |
margin: 0;
|
|
|
|
| 1059 |
ui.tags.ul(
|
| 1060 |
ui.tags.li("Sequence of PDB ID 8ET6"),
|
| 1061 |
ui.tags.li("Spike protein"),
|
| 1062 |
+
ui.tags.li("Membrane protein"),
|
| 1063 |
ui.tags.li("Human insulin"),
|
| 1064 |
ui.tags.li("Human hemoglobin C resolution better than 2.5Å"),
|
| 1065 |
ui.tags.li("Find structures containing sequence with similarity 90% FVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKR"),
|
|
|
|
| 1074 |
ui.column(12,
|
| 1075 |
ui.div(
|
| 1076 |
{"class": "results-section"},
|
| 1077 |
+
ui.h4("PDB Search Results"),
|
| 1078 |
+
ui.output_ui(
|
| 1079 |
+
"results_table",
|
| 1080 |
+
# {"class": "resres"}
|
| 1081 |
+
), #output_widget("results_table"),
|
| 1082 |
ui.download_button("download", "Download Results",
|
| 1083 |
class_="btn btn-info")
|
| 1084 |
)
|
|
|
|
| 1092 |
"Select PDB ID",
|
| 1093 |
{"class": "pdb-select-label"}
|
| 1094 |
),
|
| 1095 |
+
ui.input_selectize(
|
| 1096 |
"selected_pdb",
|
| 1097 |
"", # Label is empty as we're using a separate label
|
| 1098 |
choices=[],
|
|
|
|
| 1161 |
status_store.set("Ready")
|
| 1162 |
pdb_ids = df['PDB ID'].tolist()
|
| 1163 |
@output
|
| 1164 |
+
@render.ui #render_widget
|
| 1165 |
def results_table():
|
| 1166 |
+
return ui.HTML(DT(create_interactive_table(df))) #create_interactive_table(df)
|
| 1167 |
|
| 1168 |
if pdb_ids:
|
| 1169 |
pdb_ids_store.set(pdb_ids)
|
| 1170 |
# Update only one dropdown
|
| 1171 |
+
ui.update_selectize(
|
| 1172 |
"selected_pdb",
|
| 1173 |
choices=pdb_ids,
|
| 1174 |
+
selected=pdb_ids[0] # matching entity 1
|
| 1175 |
)
|
| 1176 |
else:
|
| 1177 |
pdb_ids_store.set([])
|
| 1178 |
+
ui.update_selectize(
|
| 1179 |
"selected_pdb",
|
| 1180 |
choices=[],
|
| 1181 |
selected=None
|
|
|
|
| 1219 |
@render.ui
|
| 1220 |
def output_iframe():
|
| 1221 |
selected_pdb = input.selected_pdb()
|
| 1222 |
+
sequences = assistant.get_sequences_by_pdb_id(selected_pdb)
|
| 1223 |
+
chain_cnt = len(sequences)
|
| 1224 |
+
|
| 1225 |
if selected_pdb:
|
| 1226 |
+
return ui.HTML(render_html(selected_pdb, chain_cnt))
|
| 1227 |
return ui.HTML("")
|
| 1228 |
|
| 1229 |
@output
|