Commit
·
dd1a4ad
1
Parent(s):
df845d1
Update app.py
Browse files
app.py
CHANGED
|
@@ -28,14 +28,14 @@ device = (
|
|
| 28 |
)
|
| 29 |
|
| 30 |
# 1. Product environment
|
| 31 |
-
|
| 32 |
-
|
| 33 |
SCIBERT_MODEL_PATH = "allenai/scibert_scivocab_uncased"
|
| 34 |
|
| 35 |
|
| 36 |
# 2. Developing environment
|
| 37 |
-
INDEX_PATH = Path(__file__).parent.joinpath("data/index_test.bin")
|
| 38 |
-
CLUSTER_PATH = Path(__file__).parent.joinpath("data/repo_clusters_test.json")
|
| 39 |
# SCIBERT_MODEL_PATH = Path(__file__).parent.joinpath("data/scibert_scivocab_uncased") # Download locally
|
| 40 |
|
| 41 |
|
|
@@ -377,66 +377,75 @@ if __name__ == "__main__":
|
|
| 377 |
)
|
| 378 |
|
| 379 |
display_columns = st.session_state.display_columns
|
| 380 |
-
code_sim_tab, doc_sim_tab, readme_sim_tab, requirement_sim_tab, repo_sim_tab, same_cluster_tab,
|
| 381 |
["Code_sim", "Docstring_sim", "Readme_sim", "Requirement_sim",
|
| 382 |
-
"Repository_sim", "
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
)
|
| 29 |
|
| 30 |
# 1. Product environment
|
| 31 |
+
INDEX_PATH = Path(__file__).parent.joinpath("data/index.bin")
|
| 32 |
+
CLUSTER_PATH = Path(__file__).parent.joinpath("data/repo_clusters.json")
|
| 33 |
SCIBERT_MODEL_PATH = "allenai/scibert_scivocab_uncased"
|
| 34 |
|
| 35 |
|
| 36 |
# 2. Developing environment
|
| 37 |
+
# INDEX_PATH = Path(__file__).parent.joinpath("data/index_test.bin")
|
| 38 |
+
# CLUSTER_PATH = Path(__file__).parent.joinpath("data/repo_clusters_test.json")
|
| 39 |
# SCIBERT_MODEL_PATH = Path(__file__).parent.joinpath("data/scibert_scivocab_uncased") # Download locally
|
| 40 |
|
| 41 |
|
|
|
|
| 377 |
)
|
| 378 |
|
| 379 |
display_columns = st.session_state.display_columns
|
| 380 |
+
code_sim_tab, doc_sim_tab, readme_sim_tab, requirement_sim_tab, repo_sim_tab, cluster_tab, same_cluster_tab, = st.tabs(
|
| 381 |
["Code_sim", "Docstring_sim", "Readme_sim", "Requirement_sim",
|
| 382 |
+
"Repository_sim", "Cluster_sim", "Same_cluster_sim"])
|
| 383 |
+
|
| 384 |
+
with code_sim_tab:
|
| 385 |
+
if query_doc.code_embedding is not None:
|
| 386 |
+
code_sim_res = run_index_search(index, query_doc, "code_embedding", limit)
|
| 387 |
+
cluster_numbers = run_cluster_search(repo_clusters, code_sim_res["name"])
|
| 388 |
+
code_sim_res["cluster number"] = cluster_numbers
|
| 389 |
+
st.dataframe(code_sim_res[display_columns])
|
| 390 |
+
else:
|
| 391 |
+
st.error("No function code was extracted for this repository!")
|
| 392 |
+
|
| 393 |
+
with doc_sim_tab:
|
| 394 |
+
if query_doc.doc_embedding is not None:
|
| 395 |
+
doc_sim_res = run_index_search(index, query_doc, "doc_embedding", limit)
|
| 396 |
+
cluster_numbers = run_cluster_search(repo_clusters, doc_sim_res["name"])
|
| 397 |
+
doc_sim_res["cluster number"] = cluster_numbers
|
| 398 |
+
st.dataframe(doc_sim_res[display_columns])
|
| 399 |
+
else:
|
| 400 |
+
st.error("No function docstring was extracted for this repository!")
|
| 401 |
+
|
| 402 |
+
with readme_sim_tab:
|
| 403 |
+
if query_doc.readme_embedding is not None:
|
| 404 |
+
readme_sim_res = run_index_search(index, query_doc, "readme_embedding", limit)
|
| 405 |
+
cluster_numbers = run_cluster_search(repo_clusters, readme_sim_res["name"])
|
| 406 |
+
readme_sim_res["cluster number"] = cluster_numbers
|
| 407 |
+
st.dataframe(readme_sim_res[display_columns])
|
| 408 |
+
else:
|
| 409 |
+
st.error("No readme file was extracted for this repository!")
|
| 410 |
+
|
| 411 |
+
with requirement_sim_tab:
|
| 412 |
+
if query_doc.requirement_embedding is not None:
|
| 413 |
+
requirement_sim_res = run_index_search(index, query_doc, "requirement_embedding", limit)
|
| 414 |
+
cluster_numbers = run_cluster_search(repo_clusters, requirement_sim_res["name"])
|
| 415 |
+
requirement_sim_res["cluster number"] = cluster_numbers
|
| 416 |
+
st.dataframe(requirement_sim_res[display_columns])
|
| 417 |
+
else:
|
| 418 |
+
st.error("No requirement file was extracted for this repository!")
|
| 419 |
+
|
| 420 |
+
with repo_sim_tab:
|
| 421 |
+
if query_doc.repository_embedding is not None:
|
| 422 |
+
# Repo Sim tab
|
| 423 |
+
repo_sim_res = run_index_search(index, query_doc, "repository_embedding", limit)
|
| 424 |
+
cluster_numbers = run_cluster_search(repo_clusters, repo_sim_res["name"])
|
| 425 |
+
repo_sim_res["cluster number"] = cluster_numbers
|
| 426 |
+
st.dataframe(repo_sim_res[display_columns])
|
| 427 |
+
else:
|
| 428 |
+
st.error("No such useful information was extracted for this repository!")
|
| 429 |
+
|
| 430 |
+
with cluster_tab:
|
| 431 |
+
if query_doc.repository_embedding is not None:
|
| 432 |
+
cluster_df = run_similaritycal_search(index, repo_clusters, sim_cal_model,
|
| 433 |
+
query_doc, cluster_number, limit,
|
| 434 |
+
same_cluster=False)
|
| 435 |
+
cluster_numbers = run_cluster_search(repo_clusters, cluster_df["name"])
|
| 436 |
+
cluster_df["cluster number"] = cluster_numbers
|
| 437 |
+
st.dataframe(cluster_df[display_columns])
|
| 438 |
+
else:
|
| 439 |
+
st.error("No such useful information was extracted for this repository!")
|
| 440 |
+
|
| 441 |
+
with same_cluster_tab:
|
| 442 |
+
if query_doc.repository_embedding is not None:
|
| 443 |
+
# Cluster tab and same cluster tab
|
| 444 |
+
same_cluster_df = run_similaritycal_search(index, repo_clusters, sim_cal_model,
|
| 445 |
+
query_doc, cluster_number, limit,
|
| 446 |
+
same_cluster=True)
|
| 447 |
+
same_cluster_numbers = run_cluster_search(repo_clusters, same_cluster_df["name"])
|
| 448 |
+
same_cluster_df["cluster number"] = same_cluster_numbers
|
| 449 |
+
same_cluster_tab.dataframe(same_cluster_df[display_columns])
|
| 450 |
+
else:
|
| 451 |
+
same_cluster_tab.error("No such useful information was extracted for this repository!")
|