Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -267,22 +267,36 @@ with tab_download:
|
|
| 267 |
example_urls = ["https://arxiv.org/pdf/2308.03892", "https://arxiv.org/pdf/1912.01703", "https://arxiv.org/pdf/2408.11039", "https://arxiv.org/pdf/2109.10282", "https://arxiv.org/pdf/2112.10752", "https://arxiv.org/pdf/2308.11236", "https://arxiv.org/pdf/1706.03762", "https://arxiv.org/pdf/2006.11239", "https://arxiv.org/pdf/2305.11207", "https://arxiv.org/pdf/2106.09685", "https://arxiv.org/pdf/2005.11401", "https://arxiv.org/pdf/2106.10504"]; st.session_state['pdf_urls'] = "\n".join(example_urls) # 📚 Examples loaded into session!
|
| 268 |
|
| 269 |
url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200) # 📝 Text area: Paste your PDF URLs here—no commas needed!
|
| 270 |
-
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
for idx, url in enumerate(urls):
|
| 273 |
if url:
|
| 274 |
-
output_path = pdf_url_to_filename(url)
|
|
|
|
| 275 |
if output_path not in existing_pdfs:
|
| 276 |
if download_pdf(url, output_path):
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
st.session_state['
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode") # 🎛️ Selectbox: Choose your snapshot resolution!
|
| 288 |
if st.button("Snapshot Selected 📸"): # 📸 Button: Time to snap some PDF snapshots!
|
|
|
|
| 267 |
example_urls = ["https://arxiv.org/pdf/2308.03892", "https://arxiv.org/pdf/1912.01703", "https://arxiv.org/pdf/2408.11039", "https://arxiv.org/pdf/2109.10282", "https://arxiv.org/pdf/2112.10752", "https://arxiv.org/pdf/2308.11236", "https://arxiv.org/pdf/1706.03762", "https://arxiv.org/pdf/2006.11239", "https://arxiv.org/pdf/2305.11207", "https://arxiv.org/pdf/2106.09685", "https://arxiv.org/pdf/2005.11401", "https://arxiv.org/pdf/2106.10504"]; st.session_state['pdf_urls'] = "\n".join(example_urls) # 📚 Examples loaded into session!
|
| 268 |
|
| 269 |
url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200) # 📝 Text area: Paste your PDF URLs here—no commas needed!
|
| 270 |
+
|
| 271 |
+
# --- Download PDFs Tab (modified section) ---
|
| 272 |
+
if st.button("Robo-Download 🤖"):
|
| 273 |
+
urls = url_input.strip().split("\n")
|
| 274 |
+
progress_bar = st.progress(0)
|
| 275 |
+
status_text = st.empty()
|
| 276 |
+
total_urls = len(urls)
|
| 277 |
+
existing_pdfs = get_pdf_files()
|
| 278 |
for idx, url in enumerate(urls):
|
| 279 |
if url:
|
| 280 |
+
output_path = pdf_url_to_filename(url)
|
| 281 |
+
status_text.text(f"Fetching {idx + 1}/{total_urls}: {os.path.basename(output_path)}...")
|
| 282 |
if output_path not in existing_pdfs:
|
| 283 |
if download_pdf(url, output_path):
|
| 284 |
+
st.session_state['downloaded_pdfs'][url] = output_path
|
| 285 |
+
logger.info(f"Downloaded PDF from {url} to {output_path}")
|
| 286 |
+
# Removed file size from history entry
|
| 287 |
+
entry = f"Downloaded PDF: {output_path}"
|
| 288 |
+
if entry not in st.session_state['history']:
|
| 289 |
+
st.session_state['history'].append(entry)
|
| 290 |
+
st.session_state['asset_checkboxes'][output_path] = True
|
| 291 |
+
else:
|
| 292 |
+
st.error(f"Failed to nab {url} 😿")
|
| 293 |
+
else:
|
| 294 |
+
st.info(f"Already got {os.path.basename(output_path)}! Skipping... 🐾")
|
| 295 |
+
st.session_state['downloaded_pdfs'][url] = output_path
|
| 296 |
+
progress_bar.progress((idx + 1) / total_urls)
|
| 297 |
+
status_text.text("Robo-Download complete! 🚀")
|
| 298 |
+
update_gallery()
|
| 299 |
+
|
| 300 |
|
| 301 |
mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode") # 🎛️ Selectbox: Choose your snapshot resolution!
|
| 302 |
if st.button("Snapshot Selected 📸"): # 📸 Button: Time to snap some PDF snapshots!
|