Spaces:
Running
on
Zero
Running
on
Zero
Clean up
Browse files- pyproject.toml +0 -1
- requirements.txt +0 -2
- table.py +0 -64
- uv.lock +0 -24
pyproject.toml
CHANGED
|
@@ -9,7 +9,6 @@ dependencies = [
|
|
| 9 |
"faiss-cpu>=1.12.0",
|
| 10 |
"gradio[mcp]>=5.49.1",
|
| 11 |
"gradio-modal>=0.0.4",
|
| 12 |
-
"loguru>=0.7.3",
|
| 13 |
"polars>=1.34.0",
|
| 14 |
"sentence-transformers>=5.1.1",
|
| 15 |
"spaces>=0.42.1",
|
|
|
|
| 9 |
"faiss-cpu>=1.12.0",
|
| 10 |
"gradio[mcp]>=5.49.1",
|
| 11 |
"gradio-modal>=0.0.4",
|
|
|
|
| 12 |
"polars>=1.34.0",
|
| 13 |
"sentence-transformers>=5.1.1",
|
| 14 |
"spaces>=0.42.1",
|
requirements.txt
CHANGED
|
@@ -120,8 +120,6 @@ jsonschema==4.25.1
|
|
| 120 |
# via mcp
|
| 121 |
jsonschema-specifications==2025.9.1
|
| 122 |
# via jsonschema
|
| 123 |
-
loguru==0.7.3
|
| 124 |
-
# via iccv2025 (pyproject.toml)
|
| 125 |
markdown-it-py==4.0.0
|
| 126 |
# via rich
|
| 127 |
markupsafe==3.0.3
|
|
|
|
| 120 |
# via mcp
|
| 121 |
jsonschema-specifications==2025.9.1
|
| 122 |
# via jsonschema
|
|
|
|
|
|
|
| 123 |
markdown-it-py==4.0.0
|
| 124 |
# via rich
|
| 125 |
markupsafe==3.0.3
|
table.py
CHANGED
|
@@ -1,52 +1,10 @@
|
|
| 1 |
import datasets
|
| 2 |
import polars as pl
|
| 3 |
-
from loguru import logger
|
| 4 |
-
from polars import datatypes as pdt
|
| 5 |
|
| 6 |
BASE_REPO_ID = "ai-conferences/ICCV2025"
|
| 7 |
-
PATCH_REPO_ID = "ai-conferences/ICCV2025-patches"
|
| 8 |
PAPER_PAGE_REPO_ID = "hysts-bot-data/paper-pages-slim"
|
| 9 |
|
| 10 |
|
| 11 |
-
def get_patch_latest_values(
|
| 12 |
-
df: pl.DataFrame, all_columns: list[str], id_col: str, timestamp_col: str = "timestamp", delimiter: str = ","
|
| 13 |
-
) -> pl.DataFrame:
|
| 14 |
-
df = df.sort(timestamp_col)
|
| 15 |
-
|
| 16 |
-
list_cols = [
|
| 17 |
-
col for col, dtype in df.schema.items() if col not in (id_col, timestamp_col) and dtype.base_type() is pdt.List
|
| 18 |
-
]
|
| 19 |
-
df = df.with_columns(
|
| 20 |
-
[
|
| 21 |
-
pl.when(pl.col(c).is_not_null()).then(pl.col(c).list.join(delimiter)).otherwise(None).alias(c)
|
| 22 |
-
for c in list_cols
|
| 23 |
-
]
|
| 24 |
-
)
|
| 25 |
-
|
| 26 |
-
update_columns = [col for col in df.columns if col not in (id_col, timestamp_col)]
|
| 27 |
-
melted = df.unpivot(on=update_columns, index=[timestamp_col, id_col]).drop_nulls()
|
| 28 |
-
|
| 29 |
-
latest_rows = (
|
| 30 |
-
melted.sort(timestamp_col)
|
| 31 |
-
.group_by([id_col, "variable"])
|
| 32 |
-
.agg(pl.col("value").last())
|
| 33 |
-
.pivot("variable", index=id_col, values="value")
|
| 34 |
-
)
|
| 35 |
-
|
| 36 |
-
latest_rows = latest_rows.with_columns(
|
| 37 |
-
[
|
| 38 |
-
pl.when(pl.col(c).is_not_null()).then(pl.col(c).str.split(delimiter)).otherwise(None).alias(c)
|
| 39 |
-
for c in list_cols
|
| 40 |
-
]
|
| 41 |
-
)
|
| 42 |
-
|
| 43 |
-
missing_cols = [c for c in all_columns if c not in latest_rows.columns and c != id_col]
|
| 44 |
-
if missing_cols:
|
| 45 |
-
latest_rows = latest_rows.with_columns([pl.lit(None).alias(c) for c in missing_cols])
|
| 46 |
-
|
| 47 |
-
return latest_rows.select([id_col] + [col for col in all_columns if col != id_col])
|
| 48 |
-
|
| 49 |
-
|
| 50 |
def format_author_claim_ratio(row: dict) -> str:
|
| 51 |
n_linked_authors = row["n_linked_authors"]
|
| 52 |
n_authors = row["n_authors"]
|
|
@@ -77,28 +35,6 @@ df_orig = (
|
|
| 77 |
.drop(["github_2"])
|
| 78 |
)
|
| 79 |
|
| 80 |
-
try:
|
| 81 |
-
df_patches = (
|
| 82 |
-
datasets.load_dataset(PATCH_REPO_ID, split="train")
|
| 83 |
-
.to_polars()
|
| 84 |
-
.drop("diff")
|
| 85 |
-
.with_columns(pl.col("timestamp").str.strptime(pl.Datetime, "%+"))
|
| 86 |
-
)
|
| 87 |
-
df_patches = get_patch_latest_values(df_patches, df_orig.columns, id_col="paper_id", timestamp_col="timestamp")
|
| 88 |
-
df_orig = (
|
| 89 |
-
df_orig.join(df_patches, on="paper_id", how="left")
|
| 90 |
-
.with_columns(
|
| 91 |
-
[
|
| 92 |
-
pl.coalesce([pl.col(col + "_right"), pl.col(col)]).alias(col)
|
| 93 |
-
for col in df_orig.columns
|
| 94 |
-
if col != "paper_id"
|
| 95 |
-
]
|
| 96 |
-
)
|
| 97 |
-
.select(df_orig.columns)
|
| 98 |
-
)
|
| 99 |
-
except Exception as e: # noqa: BLE001
|
| 100 |
-
logger.warning(e)
|
| 101 |
-
|
| 102 |
# format authors
|
| 103 |
df_orig = df_orig.with_columns(pl.col("authors").list.join(", ").alias("authors_str"))
|
| 104 |
# format links
|
|
|
|
| 1 |
import datasets
|
| 2 |
import polars as pl
|
|
|
|
|
|
|
| 3 |
|
| 4 |
BASE_REPO_ID = "ai-conferences/ICCV2025"
|
|
|
|
| 5 |
PAPER_PAGE_REPO_ID = "hysts-bot-data/paper-pages-slim"
|
| 6 |
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
def format_author_claim_ratio(row: dict) -> str:
|
| 9 |
n_linked_authors = row["n_linked_authors"]
|
| 10 |
n_authors = row["n_authors"]
|
|
|
|
| 35 |
.drop(["github_2"])
|
| 36 |
)
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# format authors
|
| 39 |
df_orig = df_orig.with_columns(pl.col("authors").list.join(", ").alias("authors_str"))
|
| 40 |
# format links
|
uv.lock
CHANGED
|
@@ -894,7 +894,6 @@ dependencies = [
|
|
| 894 |
{ name = "faiss-cpu" },
|
| 895 |
{ name = "gradio", extra = ["mcp"] },
|
| 896 |
{ name = "gradio-modal" },
|
| 897 |
-
{ name = "loguru" },
|
| 898 |
{ name = "polars" },
|
| 899 |
{ name = "sentence-transformers" },
|
| 900 |
{ name = "spaces" },
|
|
@@ -907,7 +906,6 @@ requires-dist = [
|
|
| 907 |
{ name = "faiss-cpu", specifier = ">=1.12.0" },
|
| 908 |
{ name = "gradio", extras = ["mcp"], specifier = ">=5.49.1" },
|
| 909 |
{ name = "gradio-modal", specifier = ">=0.0.4" },
|
| 910 |
-
{ name = "loguru", specifier = ">=0.7.3" },
|
| 911 |
{ name = "polars", specifier = ">=1.34.0" },
|
| 912 |
{ name = "sentence-transformers", specifier = ">=5.1.1" },
|
| 913 |
{ name = "spaces", specifier = ">=0.42.1" },
|
|
@@ -971,19 +969,6 @@ wheels = [
|
|
| 971 |
{ url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
|
| 972 |
]
|
| 973 |
|
| 974 |
-
[[package]]
|
| 975 |
-
name = "loguru"
|
| 976 |
-
version = "0.7.3"
|
| 977 |
-
source = { registry = "https://pypi.org/simple" }
|
| 978 |
-
dependencies = [
|
| 979 |
-
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
| 980 |
-
{ name = "win32-setctime", marker = "sys_platform == 'win32'" },
|
| 981 |
-
]
|
| 982 |
-
sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" }
|
| 983 |
-
wheels = [
|
| 984 |
-
{ url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" },
|
| 985 |
-
]
|
| 986 |
-
|
| 987 |
[[package]]
|
| 988 |
name = "markdown-it-py"
|
| 989 |
version = "4.0.0"
|
|
@@ -3179,15 +3164,6 @@ wheels = [
|
|
| 3179 |
{ url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
|
| 3180 |
]
|
| 3181 |
|
| 3182 |
-
[[package]]
|
| 3183 |
-
name = "win32-setctime"
|
| 3184 |
-
version = "1.2.0"
|
| 3185 |
-
source = { registry = "https://pypi.org/simple" }
|
| 3186 |
-
sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867, upload-time = "2024-12-07T15:28:28.314Z" }
|
| 3187 |
-
wheels = [
|
| 3188 |
-
{ url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" },
|
| 3189 |
-
]
|
| 3190 |
-
|
| 3191 |
[[package]]
|
| 3192 |
name = "xxhash"
|
| 3193 |
version = "3.6.0"
|
|
|
|
| 894 |
{ name = "faiss-cpu" },
|
| 895 |
{ name = "gradio", extra = ["mcp"] },
|
| 896 |
{ name = "gradio-modal" },
|
|
|
|
| 897 |
{ name = "polars" },
|
| 898 |
{ name = "sentence-transformers" },
|
| 899 |
{ name = "spaces" },
|
|
|
|
| 906 |
{ name = "faiss-cpu", specifier = ">=1.12.0" },
|
| 907 |
{ name = "gradio", extras = ["mcp"], specifier = ">=5.49.1" },
|
| 908 |
{ name = "gradio-modal", specifier = ">=0.0.4" },
|
|
|
|
| 909 |
{ name = "polars", specifier = ">=1.34.0" },
|
| 910 |
{ name = "sentence-transformers", specifier = ">=5.1.1" },
|
| 911 |
{ name = "spaces", specifier = ">=0.42.1" },
|
|
|
|
| 969 |
{ url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
|
| 970 |
]
|
| 971 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 972 |
[[package]]
|
| 973 |
name = "markdown-it-py"
|
| 974 |
version = "4.0.0"
|
|
|
|
| 3164 |
{ url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
|
| 3165 |
]
|
| 3166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3167 |
[[package]]
|
| 3168 |
name = "xxhash"
|
| 3169 |
version = "3.6.0"
|