hysts HF Staff commited on
Commit
0e315f0
·
1 Parent(s): 32fe149
Files changed (4) hide show
  1. pyproject.toml +0 -1
  2. requirements.txt +0 -2
  3. table.py +0 -64
  4. uv.lock +0 -24
pyproject.toml CHANGED
@@ -9,7 +9,6 @@ dependencies = [
9
  "faiss-cpu>=1.12.0",
10
  "gradio[mcp]>=5.49.1",
11
  "gradio-modal>=0.0.4",
12
- "loguru>=0.7.3",
13
  "polars>=1.34.0",
14
  "sentence-transformers>=5.1.1",
15
  "spaces>=0.42.1",
 
9
  "faiss-cpu>=1.12.0",
10
  "gradio[mcp]>=5.49.1",
11
  "gradio-modal>=0.0.4",
 
12
  "polars>=1.34.0",
13
  "sentence-transformers>=5.1.1",
14
  "spaces>=0.42.1",
requirements.txt CHANGED
@@ -120,8 +120,6 @@ jsonschema==4.25.1
120
  # via mcp
121
  jsonschema-specifications==2025.9.1
122
  # via jsonschema
123
- loguru==0.7.3
124
- # via iccv2025 (pyproject.toml)
125
  markdown-it-py==4.0.0
126
  # via rich
127
  markupsafe==3.0.3
 
120
  # via mcp
121
  jsonschema-specifications==2025.9.1
122
  # via jsonschema
 
 
123
  markdown-it-py==4.0.0
124
  # via rich
125
  markupsafe==3.0.3
table.py CHANGED
@@ -1,52 +1,10 @@
1
  import datasets
2
  import polars as pl
3
- from loguru import logger
4
- from polars import datatypes as pdt
5
 
6
  BASE_REPO_ID = "ai-conferences/ICCV2025"
7
- PATCH_REPO_ID = "ai-conferences/ICCV2025-patches"
8
  PAPER_PAGE_REPO_ID = "hysts-bot-data/paper-pages-slim"
9
 
10
 
11
- def get_patch_latest_values(
12
- df: pl.DataFrame, all_columns: list[str], id_col: str, timestamp_col: str = "timestamp", delimiter: str = ","
13
- ) -> pl.DataFrame:
14
- df = df.sort(timestamp_col)
15
-
16
- list_cols = [
17
- col for col, dtype in df.schema.items() if col not in (id_col, timestamp_col) and dtype.base_type() is pdt.List
18
- ]
19
- df = df.with_columns(
20
- [
21
- pl.when(pl.col(c).is_not_null()).then(pl.col(c).list.join(delimiter)).otherwise(None).alias(c)
22
- for c in list_cols
23
- ]
24
- )
25
-
26
- update_columns = [col for col in df.columns if col not in (id_col, timestamp_col)]
27
- melted = df.unpivot(on=update_columns, index=[timestamp_col, id_col]).drop_nulls()
28
-
29
- latest_rows = (
30
- melted.sort(timestamp_col)
31
- .group_by([id_col, "variable"])
32
- .agg(pl.col("value").last())
33
- .pivot("variable", index=id_col, values="value")
34
- )
35
-
36
- latest_rows = latest_rows.with_columns(
37
- [
38
- pl.when(pl.col(c).is_not_null()).then(pl.col(c).str.split(delimiter)).otherwise(None).alias(c)
39
- for c in list_cols
40
- ]
41
- )
42
-
43
- missing_cols = [c for c in all_columns if c not in latest_rows.columns and c != id_col]
44
- if missing_cols:
45
- latest_rows = latest_rows.with_columns([pl.lit(None).alias(c) for c in missing_cols])
46
-
47
- return latest_rows.select([id_col] + [col for col in all_columns if col != id_col])
48
-
49
-
50
  def format_author_claim_ratio(row: dict) -> str:
51
  n_linked_authors = row["n_linked_authors"]
52
  n_authors = row["n_authors"]
@@ -77,28 +35,6 @@ df_orig = (
77
  .drop(["github_2"])
78
  )
79
 
80
- try:
81
- df_patches = (
82
- datasets.load_dataset(PATCH_REPO_ID, split="train")
83
- .to_polars()
84
- .drop("diff")
85
- .with_columns(pl.col("timestamp").str.strptime(pl.Datetime, "%+"))
86
- )
87
- df_patches = get_patch_latest_values(df_patches, df_orig.columns, id_col="paper_id", timestamp_col="timestamp")
88
- df_orig = (
89
- df_orig.join(df_patches, on="paper_id", how="left")
90
- .with_columns(
91
- [
92
- pl.coalesce([pl.col(col + "_right"), pl.col(col)]).alias(col)
93
- for col in df_orig.columns
94
- if col != "paper_id"
95
- ]
96
- )
97
- .select(df_orig.columns)
98
- )
99
- except Exception as e: # noqa: BLE001
100
- logger.warning(e)
101
-
102
  # format authors
103
  df_orig = df_orig.with_columns(pl.col("authors").list.join(", ").alias("authors_str"))
104
  # format links
 
1
  import datasets
2
  import polars as pl
 
 
3
 
4
  BASE_REPO_ID = "ai-conferences/ICCV2025"
 
5
  PAPER_PAGE_REPO_ID = "hysts-bot-data/paper-pages-slim"
6
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def format_author_claim_ratio(row: dict) -> str:
9
  n_linked_authors = row["n_linked_authors"]
10
  n_authors = row["n_authors"]
 
35
  .drop(["github_2"])
36
  )
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # format authors
39
  df_orig = df_orig.with_columns(pl.col("authors").list.join(", ").alias("authors_str"))
40
  # format links
uv.lock CHANGED
@@ -894,7 +894,6 @@ dependencies = [
894
  { name = "faiss-cpu" },
895
  { name = "gradio", extra = ["mcp"] },
896
  { name = "gradio-modal" },
897
- { name = "loguru" },
898
  { name = "polars" },
899
  { name = "sentence-transformers" },
900
  { name = "spaces" },
@@ -907,7 +906,6 @@ requires-dist = [
907
  { name = "faiss-cpu", specifier = ">=1.12.0" },
908
  { name = "gradio", extras = ["mcp"], specifier = ">=5.49.1" },
909
  { name = "gradio-modal", specifier = ">=0.0.4" },
910
- { name = "loguru", specifier = ">=0.7.3" },
911
  { name = "polars", specifier = ">=1.34.0" },
912
  { name = "sentence-transformers", specifier = ">=5.1.1" },
913
  { name = "spaces", specifier = ">=0.42.1" },
@@ -971,19 +969,6 @@ wheels = [
971
  { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
972
  ]
973
 
974
- [[package]]
975
- name = "loguru"
976
- version = "0.7.3"
977
- source = { registry = "https://pypi.org/simple" }
978
- dependencies = [
979
- { name = "colorama", marker = "sys_platform == 'win32'" },
980
- { name = "win32-setctime", marker = "sys_platform == 'win32'" },
981
- ]
982
- sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" }
983
- wheels = [
984
- { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" },
985
- ]
986
-
987
  [[package]]
988
  name = "markdown-it-py"
989
  version = "4.0.0"
@@ -3179,15 +3164,6 @@ wheels = [
3179
  { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
3180
  ]
3181
 
3182
- [[package]]
3183
- name = "win32-setctime"
3184
- version = "1.2.0"
3185
- source = { registry = "https://pypi.org/simple" }
3186
- sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867, upload-time = "2024-12-07T15:28:28.314Z" }
3187
- wheels = [
3188
- { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" },
3189
- ]
3190
-
3191
  [[package]]
3192
  name = "xxhash"
3193
  version = "3.6.0"
 
894
  { name = "faiss-cpu" },
895
  { name = "gradio", extra = ["mcp"] },
896
  { name = "gradio-modal" },
 
897
  { name = "polars" },
898
  { name = "sentence-transformers" },
899
  { name = "spaces" },
 
906
  { name = "faiss-cpu", specifier = ">=1.12.0" },
907
  { name = "gradio", extras = ["mcp"], specifier = ">=5.49.1" },
908
  { name = "gradio-modal", specifier = ">=0.0.4" },
 
909
  { name = "polars", specifier = ">=1.34.0" },
910
  { name = "sentence-transformers", specifier = ">=5.1.1" },
911
  { name = "spaces", specifier = ">=0.42.1" },
 
969
  { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
970
  ]
971
 
 
 
 
 
 
 
 
 
 
 
 
 
 
972
  [[package]]
973
  name = "markdown-it-py"
974
  version = "4.0.0"
 
3164
  { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
3165
  ]
3166
 
 
 
 
 
 
 
 
 
 
3167
  [[package]]
3168
  name = "xxhash"
3169
  version = "3.6.0"