Spaces:
Running
Running
Bạn có thể trích xuất dc nội dung và hình ảnh bài viết từ url không
Browse files- README.md +8 -5
- about.html +109 -0
- components/footer.js +51 -0
- components/navbar.js +73 -0
- index.html +135 -19
- script.js +128 -0
- style.css +71 -18
README.md
CHANGED
|
@@ -1,10 +1,13 @@
|
|
| 1 |
---
|
| 2 |
-
title: Web Scraper Pro
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Web Scraper Pro 🕷️
|
| 3 |
+
colorFrom: green
|
| 4 |
+
colorTo: yellow
|
| 5 |
+
emoji: 🐳
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
| 8 |
+
tags:
|
| 9 |
+
- deepsite-v3
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Welcome to your new DeepSite project!
|
| 13 |
+
This project was created with [DeepSite](https://huggingface.co/deepsite).
|
about.html
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>About - Web Scraper Pro</title>
|
| 7 |
+
<link rel="stylesheet" href="style.css">
|
| 8 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 9 |
+
<script src="https://cdn.jsdelivr.net/npm/feather-icons/dist/feather.min.js"></script>
|
| 10 |
+
<script src="https://unpkg.com/feather-icons"></script>
|
| 11 |
+
<script>
|
| 12 |
+
tailwind.config = {
|
| 13 |
+
theme: {
|
| 14 |
+
extend: {
|
| 15 |
+
colors: {
|
| 16 |
+
primary: '#3B82F6',
|
| 17 |
+
secondary: '#10B981'
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
</script>
|
| 23 |
+
</head>
|
| 24 |
+
<body class="bg-gray-50 min-h-screen flex flex-col">
|
| 25 |
+
<custom-navbar></custom-navbar>
|
| 26 |
+
|
| 27 |
+
<main class="flex-1 container mx-auto px-4 py-12">
|
| 28 |
+
<div class="max-w-4xl mx-auto">
|
| 29 |
+
<!-- Header -->
|
| 30 |
+
<div class="text-center mb-12">
|
| 31 |
+
<h1 class="text-4xl md:text-5xl font-bold text-gray-900 mb-4">
|
| 32 |
+
About Web Scraper Pro
|
| 33 |
+
<span class="text-primary">🕷️</span>
|
| 34 |
+
</h1>
|
| 35 |
+
<p class="text-xl text-gray-600">
|
| 36 |
+
Powerful web content extraction made simple
|
| 37 |
+
</p>
|
| 38 |
+
</div>
|
| 39 |
+
|
| 40 |
+
<!-- Features Grid -->
|
| 41 |
+
<div class="grid md:grid-cols-2 lg:grid-cols-3 gap-8 mb-12">
|
| 42 |
+
<div class="bg-white p-6 rounded-2xl shadow-lg text-center">
|
| 43 |
+
<i data-feather="zap" class="w-12 h-12 text-primary mx-auto mb-4"></i>
|
| 44 |
+
<h3 class="text-lg font-semibold text-gray-900 mb-2">Fast Extraction</h3>
|
| 45 |
+
<p class="text-gray-600">Quickly extract content and images from any web page</p>
|
| 46 |
+
</div>
|
| 47 |
+
<div class="bg-white p-6 rounded-2xl shadow-lg text-center">
|
| 48 |
+
<i data-feather="shield" class="w-12 h-12 text-primary mx-auto mb-4"></i>
|
| 49 |
+
<h3 class="text-lg font-semibold text-gray-900 mb-2">Secure</h3>
|
| 50 |
+
<p class="text-gray-600">Your data and privacy are our top priority</p>
|
| 51 |
+
</div>
|
| 52 |
+
<div class="bg-white p-6 rounded-2xl shadow-lg text-center">
|
| 53 |
+
<i data-feather="code" class="w-12 h-12 text-primary mx-auto mb-4"></i>
|
| 54 |
+
<h3 class="text-lg font-semibold text-gray-900 mb-2">Developer Friendly</h3>
|
| 55 |
+
<p class="text-gray-600">Easy-to-use API for integration into your applications</p>
|
| 56 |
+
</div>
|
| 57 |
+
</div>
|
| 58 |
+
|
| 59 |
+
<!-- How It Works -->
|
| 60 |
+
<div class="bg-white rounded-2xl shadow-xl p-8">
|
| 61 |
+
<h2 class="text-3xl font-bold text-gray-900 mb-6">How It Works</h2>
|
| 62 |
+
<div class="space-y-6">
|
| 63 |
+
<div class="flex items-start gap-4">
|
| 64 |
+
<div class="bg-primary text-white rounded-full w-8 h-8 flex items-center justify-center flex-shrink-0">
|
| 65 |
+
<span class="font-semibold">1</span>
|
| 66 |
+
</div>
|
| 67 |
+
<div>
|
| 68 |
+
<h3 class="text-xl font-semibold text-gray-900 mb-2">Enter URL</h3>
|
| 69 |
+
<p class="text-gray-600">Provide the web page URL you want to extract content from</p>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
<div class="flex items-start gap-4">
|
| 73 |
+
<div class="bg-primary text-white rounded-full w-8 h-8 flex items-center justify-center flex-shrink-0">
|
| 74 |
+
<span class="font-semibold">2</span>
|
| 75 |
+
</div>
|
| 76 |
+
<div>
|
| 77 |
+
<h3 class="text-xl font-semibold text-gray-900 mb-2">Content Extraction</h3>
|
| 78 |
+
<p class="text-gray-600">Our system analyzes the page and extracts relevant content and images</p>
|
| 79 |
+
</div>
|
| 80 |
+
</div>
|
| 81 |
+
<div class="flex items-start gap-4">
|
| 82 |
+
<div class="bg-primary text-white rounded-full w-8 h-8 flex items-center justify-center flex-shrink-0">
|
| 83 |
+
<span class="font-semibold">3</span>
|
| 84 |
+
</div>
|
| 85 |
+
<div>
|
| 86 |
+
<h3 class="text-xl font-semibold text-gray-900 mb-2">View Results</h3>
|
| 87 |
+
<p class="text-gray-600">Browse through the extracted text content and image gallery</p>
|
| 88 |
+
</div>
|
| 89 |
+
</div>
|
| 90 |
+
</div>
|
| 91 |
+
</div>
|
| 92 |
+
</div>
|
| 93 |
+
</main>
|
| 94 |
+
|
| 95 |
+
<custom-footer></custom-footer>
|
| 96 |
+
|
| 97 |
+
<!-- Component Scripts -->
|
| 98 |
+
<script src="components/navbar.js"></script>
|
| 99 |
+
<script src="components/footer.js"></script>
|
| 100 |
+
|
| 101 |
+
<!-- Main Script -->
|
| 102 |
+
<script src="script.js"></script>
|
| 103 |
+
|
| 104 |
+
<!-- Feather Icons -->
|
| 105 |
+
<script>
|
| 106 |
+
feather.replace();
|
| 107 |
+
</script>
|
| 108 |
+
</body>
|
| 109 |
+
</html>
|
components/footer.js
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class CustomFooter extends HTMLElement {
|
| 2 |
+
connectedCallback() {
|
| 3 |
+
this.attachShadow({ mode: 'open' });
|
| 4 |
+
this.shadowRoot.innerHTML = `
|
| 5 |
+
<style>
|
| 6 |
+
footer {
|
| 7 |
+
background: #1a202c;
|
| 8 |
+
color: white;
|
| 9 |
+
padding: 2rem;
|
| 10 |
+
text-align: center;
|
| 11 |
+
margin-top: auto;
|
| 12 |
+
}
|
| 13 |
+
.footer-content {
|
| 14 |
+
max-width: 1200px;
|
| 15 |
+
margin: 0 auto;
|
| 16 |
+
}
|
| 17 |
+
.footer-links {
|
| 18 |
+
display: flex;
|
| 19 |
+
justify-content: center;
|
| 20 |
+
gap: 2rem;
|
| 21 |
+
margin-bottom: 1rem;
|
| 22 |
+
}
|
| 23 |
+
.footer-links a {
|
| 24 |
+
color: #cbd5e0;
|
| 25 |
+
text-decoration: none;
|
| 26 |
+
transition: color 0.2s;
|
| 27 |
+
}
|
| 28 |
+
.footer-links a:hover {
|
| 29 |
+
color: white;
|
| 30 |
+
}
|
| 31 |
+
.copyright {
|
| 32 |
+
color: #718096;
|
| 33 |
+
font-size: 0.875rem;
|
| 34 |
+
}
|
| 35 |
+
</style>
|
| 36 |
+
<footer>
|
| 37 |
+
<div class="footer-content">
|
| 38 |
+
<div class="footer-links">
|
| 39 |
+
<a href="/privacy.html">Privacy</a>
|
| 40 |
+
<a href="/terms.html">Terms</a>
|
| 41 |
+
<a href="/contact.html">Contact</a>
|
| 42 |
+
</div>
|
| 43 |
+
<div class="copyright">
|
| 44 |
+
© 2024 Web Scraper Pro. All rights reserved.
|
| 45 |
+
</div>
|
| 46 |
+
</div>
|
| 47 |
+
</footer>
|
| 48 |
+
`;
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
customElements.define('custom-footer', CustomFooter);
|
components/navbar.js
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class CustomNavbar extends HTMLElement {
|
| 2 |
+
connectedCallback() {
|
| 3 |
+
this.attachShadow({ mode: 'open' });
|
| 4 |
+
this.shadowRoot.innerHTML = `
|
| 5 |
+
<style>
|
| 6 |
+
nav {
|
| 7 |
+
background: linear-gradient(135deg, #3B82F6 0%, #1D4ED8 100%);
|
| 8 |
+
padding: 1rem 2rem;
|
| 9 |
+
display: flex;
|
| 10 |
+
justify-content: space-between;
|
| 11 |
+
align-items: center;
|
| 12 |
+
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
|
| 13 |
+
}
|
| 14 |
+
.logo {
|
| 15 |
+
color: white;
|
| 16 |
+
font-weight: bold;
|
| 17 |
+
font-size: 1.5rem;
|
| 18 |
+
display: flex;
|
| 19 |
+
align-items: center;
|
| 20 |
+
gap: 0.5rem;
|
| 21 |
+
}
|
| 22 |
+
ul {
|
| 23 |
+
display: flex;
|
| 24 |
+
gap: 2rem;
|
| 25 |
+
list-style: none;
|
| 26 |
+
margin: 0;
|
| 27 |
+
padding: 0;
|
| 28 |
+
}
|
| 29 |
+
a {
|
| 30 |
+
color: white;
|
| 31 |
+
text-decoration: none;
|
| 32 |
+
font-weight: 500;
|
| 33 |
+
padding: 0.5rem 1rem;
|
| 34 |
+
border-radius: 0.5rem;
|
| 35 |
+
transition: all 0.2s;
|
| 36 |
+
}
|
| 37 |
+
a:hover {
|
| 38 |
+
background: rgba(255, 255, 255, 0.1);
|
| 39 |
+
}
|
| 40 |
+
.nav-container {
|
| 41 |
+
width: 100%;
|
| 42 |
+
max-width: 1200px;
|
| 43 |
+
margin: 0 auto;
|
| 44 |
+
display: flex;
|
| 45 |
+
justify-content: space-between;
|
| 46 |
+
align-items: center;
|
| 47 |
+
}
|
| 48 |
+
@media (max-width: 768px) {
|
| 49 |
+
nav {
|
| 50 |
+
padding: 1rem;
|
| 51 |
+
}
|
| 52 |
+
ul {
|
| 53 |
+
gap: 1rem;
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
</style>
|
| 57 |
+
<nav>
|
| 58 |
+
<div class="nav-container">
|
| 59 |
+
<div class="logo">
|
| 60 |
+
<i data-feather="globe"></i>
|
| 61 |
+
Web Scraper Pro
|
| 62 |
+
</div>
|
| 63 |
+
<ul>
|
| 64 |
+
<li><a href="/">Home</a></li>
|
| 65 |
+
<li><a href="/about.html">About</a></li>
|
| 66 |
+
<li><a href="/api.html">API</a></li>
|
| 67 |
+
</ul>
|
| 68 |
+
</div>
|
| 69 |
+
</nav>
|
| 70 |
+
`;
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
customElements.define('custom-navbar', CustomNavbar);
|
index.html
CHANGED
|
@@ -1,19 +1,135 @@
|
|
| 1 |
-
<!
|
| 2 |
-
<html>
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Web Scraper Pro - Extract Content from URLs</title>
|
| 7 |
+
<link rel="stylesheet" href="style.css">
|
| 8 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 9 |
+
<script src="https://cdn.jsdelivr.net/npm/feather-icons/dist/feather.min.js"></script>
|
| 10 |
+
<script src="https://unpkg.com/feather-icons"></script>
|
| 11 |
+
<script>
|
| 12 |
+
tailwind.config = {
|
| 13 |
+
theme: {
|
| 14 |
+
extend: {
|
| 15 |
+
colors: {
|
| 16 |
+
primary: '#3B82F6',
|
| 17 |
+
secondary: '#10B981'
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
</script>
|
| 23 |
+
</head>
|
| 24 |
+
<body class="bg-gray-50 min-h-screen flex flex-col">
|
| 25 |
+
<custom-navbar></custom-navbar>
|
| 26 |
+
|
| 27 |
+
<main class="flex-1 container mx-auto px-4 py-8">
|
| 28 |
+
<!-- Hero Section -->
|
| 29 |
+
<section class="text-center mb-12">
|
| 30 |
+
<h1 class="text-4xl md:text-6xl font-bold text-gray-900 mb-4">
|
| 31 |
+
Web Scraper Pro
|
| 32 |
+
<span class="text-primary">🕷️</span>
|
| 33 |
+
</h1>
|
| 34 |
+
<p class="text-xl text-gray-600 max-w-2xl mx-auto">
|
| 35 |
+
Extract article content and images from any URL with our powerful web scraping tool
|
| 36 |
+
</p>
|
| 37 |
+
</section>
|
| 38 |
+
|
| 39 |
+
<!-- Main Scraper Interface -->
|
| 40 |
+
<section class="max-w-4xl mx-auto bg-white rounded-2xl shadow-xl p-8 mb-8">
|
| 41 |
+
<div class="flex items-center gap-3 mb-6">
|
| 42 |
+
<i data-feather="link" class="text-primary"></i>
|
| 43 |
+
<h2 class="text-2xl font-bold text-gray-900">Enter URL to Scrape</h2>
|
| 44 |
+
</div>
|
| 45 |
+
|
| 46 |
+
<form id="scraperForm" class="space-y-6">
|
| 47 |
+
<div class="space-y-2">
|
| 48 |
+
<label for="urlInput" class="block text-sm font-medium text-gray-700">
|
| 49 |
+
Website URL
|
| 50 |
+
</label>
|
| 51 |
+
<input
|
| 52 |
+
type="url"
|
| 53 |
+
id="urlInput"
|
| 54 |
+
placeholder="https://example.com/article"
|
| 55 |
+
class="w-full px-4 py-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-primary focus:border-transparent transition-all duration-200"
|
| 56 |
+
required
|
| 57 |
+
>
|
| 58 |
+
</div>
|
| 59 |
+
|
| 60 |
+
<div class="flex gap-4">
|
| 61 |
+
<button
|
| 62 |
+
type="submit"
|
| 63 |
+
class="flex-1 bg-primary hover:bg-blue-600 text-white font-semibold py-3 px-6 rounded-lg transition-all duration-200 flex items-center justify-center gap-2"
|
| 64 |
+
>
|
| 65 |
+
<i data-feather="download" class="w-5 h-5"></i>
|
| 66 |
+
Extract Content
|
| 67 |
+
</button>
|
| 68 |
+
<button
|
| 69 |
+
type="button"
|
| 70 |
+
onclick="clearResults()"
|
| 71 |
+
class="px-6 py-3 border border-gray-300 text-gray-700 rounded-lg hover:bg-gray-50 transition-all duration-200"
|
| 72 |
+
>
|
| 73 |
+
Clear
|
| 74 |
+
</button>
|
| 75 |
+
</div>
|
| 76 |
+
</form>
|
| 77 |
+
</section>
|
| 78 |
+
|
| 79 |
+
<!-- Loading State -->
|
| 80 |
+
<div id="loading" class="hidden max-w-4xl mx-auto text-center p-8">
|
| 81 |
+
<div class="animate-spin rounded-full h-12 w-12 border-b-2 border-primary mx-auto mb-4"></div>
|
| 82 |
+
<p class="text-gray-600">Extracting content from website...</p>
|
| 83 |
+
</div>
|
| 84 |
+
|
| 85 |
+
<!-- Results Section -->
|
| 86 |
+
<section id="results" class="hidden max-w-4xl mx-auto">
|
| 87 |
+
<!-- Article Content -->
|
| 88 |
+
<div class="bg-white rounded-2xl shadow-xl p-8 mb-6">
|
| 89 |
+
<div class="flex items-center gap-3 mb-6">
|
| 90 |
+
<i data-feather="file-text" class="text-primary"></i>
|
| 91 |
+
<h2 class="text-2xl font-bold text-gray-900">Extracted Content</h2>
|
| 92 |
+
</div>
|
| 93 |
+
<div id="articleContent" class="prose max-w-none text-gray-700">
|
| 94 |
+
<!-- Content will be inserted here -->
|
| 95 |
+
</div>
|
| 96 |
+
</div>
|
| 97 |
+
|
| 98 |
+
<!-- Images Gallery -->
|
| 99 |
+
<div class="bg-white rounded-2xl shadow-xl p-8">
|
| 100 |
+
<div class="flex items-center gap-3 mb-6">
|
| 101 |
+
<i data-feather="image" class="text-primary"></i>
|
| 102 |
+
<h2 class="text-2xl font-bold text-gray-900">Extracted Images</h2>
|
| 103 |
+
</div>
|
| 104 |
+
<div id="imageGallery" class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
| 105 |
+
<!-- Images will be inserted here -->
|
| 106 |
+
</div>
|
| 107 |
+
</div>
|
| 108 |
+
</section>
|
| 109 |
+
|
| 110 |
+
<!-- Error Message -->
|
| 111 |
+
<div id="error" class="hidden max-w-4xl mx-auto bg-red-50 border border-red-200 rounded-2xl p-6 mb-6">
|
| 112 |
+
<div class="flex items-center gap-3 text-red-800">
|
| 113 |
+
<i data-feather="alert-triangle" class="w-6 h-6"></i>
|
| 114 |
+
<h3 class="font-semibold">Error</h3>
|
| 115 |
+
</div>
|
| 116 |
+
<p id="errorMessage" class="text-red-700 mt-2"></p>
|
| 117 |
+
</div>
|
| 118 |
+
</main>
|
| 119 |
+
|
| 120 |
+
<custom-footer></custom-footer>
|
| 121 |
+
|
| 122 |
+
<!-- Component Scripts -->
|
| 123 |
+
<script src="components/navbar.js"></script>
|
| 124 |
+
<script src="components/footer.js"></script>
|
| 125 |
+
|
| 126 |
+
<!-- Main Script -->
|
| 127 |
+
<script src="script.js"></script>
|
| 128 |
+
|
| 129 |
+
<!-- Feather Icons -->
|
| 130 |
+
<script>
|
| 131 |
+
feather.replace();
|
| 132 |
+
</script>
|
| 133 |
+
<script src="https://huggingface.co/deepsite/deepsite-badge.js"></script>
|
| 134 |
+
</body>
|
| 135 |
+
</html>
|
script.js
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Shared JavaScript across all pages
|
| 2 |
+
console.log('Web Scraper Pro loaded');
|
| 3 |
+
|
| 4 |
+
// DOM Elements
|
| 5 |
+
const scraperForm = document.getElementById('scraperForm');
|
| 6 |
+
const urlInput = document.getElementById('urlInput');
|
| 7 |
+
const loading = document.getElementById('loading');
|
| 8 |
+
const results = document.getElementById('results');
|
| 9 |
+
const error = document.getElementById('error');
|
| 10 |
+
const errorMessage = document.getElementById('errorMessage');
|
| 11 |
+
const articleContent = document.getElementById('articleContent');
|
| 12 |
+
const imageGallery = document.getElementById('imageGallery');
|
| 13 |
+
|
| 14 |
+
// Form submission handler
|
| 15 |
+
scraperForm.addEventListener('submit', async (e) => {
|
| 16 |
+
e.preventDefault();
|
| 17 |
+
|
| 18 |
+
const url = urlInput.value.trim();
|
| 19 |
+
if (!url) return;
|
| 20 |
+
|
| 21 |
+
// Show loading state
|
| 22 |
+
showLoading();
|
| 23 |
+
hideResults();
|
| 24 |
+
hideError();
|
| 25 |
+
|
| 26 |
+
try {
|
| 27 |
+
// Note: In a real implementation, you would call a backend API
|
| 28 |
+
// For demo purposes, we'll simulate the scraping process
|
| 29 |
+
await simulateScraping(url);
|
| 30 |
+
} catch (err) {
|
| 31 |
+
showError('Failed to extract content. Please check the URL and try again.');
|
| 32 |
+
}
|
| 33 |
+
});
|
| 34 |
+
|
| 35 |
+
// Simulate scraping process (replace with actual API call)
|
| 36 |
+
async function simulateScraping(url) {
|
| 37 |
+
// Simulate API delay
|
| 38 |
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
| 39 |
+
|
| 40 |
+
// Hide loading
|
| 41 |
+
hideLoading();
|
| 42 |
+
|
| 43 |
+
// For demo purposes, we'll show sample content
|
| 44 |
+
// In a real implementation, you would call your scraping API here
|
| 45 |
+
displaySampleResults();
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
// Display sample results for demo
|
| 49 |
+
function displaySampleResults() {
|
| 50 |
+
// Sample article content
|
| 51 |
+
articleContent.innerHTML = `
|
| 52 |
+
<h1>Sample Article Title</h1>
|
| 53 |
+
<p class="text-gray-500 mb-6">Extracted from: ${urlInput.value}</p>
|
| 54 |
+
|
| 55 |
+
<h2>Introduction</h2>
|
| 56 |
+
<p>This is a sample demonstration of how extracted content would appear. In a real implementation, this would be the actual content from the provided URL.</p>
|
| 57 |
+
|
| 58 |
+
<h2>Main Content</h2>
|
| 59 |
+
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.</p>
|
| 60 |
+
|
| 61 |
+
<ul>
|
| 62 |
+
<li>First extracted paragraph</li>
|
| 63 |
+
<li>Second extracted paragraph</li>
|
| 64 |
+
<li>Third extracted paragraph with more details</li>
|
| 65 |
+
</ul>
|
| 66 |
+
|
| 67 |
+
<h2>Conclusion</h2>
|
| 68 |
+
<p>This demonstrates the web scraping capability. The actual implementation would connect to a backend service that extracts real content.</p>
|
| 69 |
+
`;
|
| 70 |
+
|
| 71 |
+
// Sample images
|
| 72 |
+
imageGallery.innerHTML = `
|
| 73 |
+
<div class="gallery-image">
|
| 74 |
+
<img src="http://static.photos/technology/640x360/1" alt="Sample image 1" class="w-full h-48 object-cover">
|
| 75 |
+
</div>
|
| 76 |
+
<div class="gallery-image">
|
| 77 |
+
<img src="http://static.photos/office/640x360/2" alt="Sample image 2" class="w-full h-48 object-cover">
|
| 78 |
+
</div>
|
| 79 |
+
<div class="gallery-image">
|
| 80 |
+
<img src="http://static.photos/nature/640x360/3" alt="Sample image 3" class="w-full h-48 object-cover">
|
| 81 |
+
</div>
|
| 82 |
+
`;
|
| 83 |
+
|
| 84 |
+
// Show results with animation
|
| 85 |
+
showResults();
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
// UI Control Functions
|
| 89 |
+
function showLoading() {
|
| 90 |
+
loading.classList.remove('hidden');
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
function hideLoading() {
|
| 94 |
+
loading.classList.add('hidden');
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
function showResults() {
|
| 98 |
+
results.classList.remove('hidden');
|
| 99 |
+
results.classList.add('fade-in');
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
function hideResults() {
|
| 103 |
+
results.classList.add('hidden');
|
| 104 |
+
results.classList.remove('fade-in');
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
function showError(message) {
|
| 108 |
+
errorMessage.textContent = message;
|
| 109 |
+
error.classList.remove('hidden');
|
| 110 |
+
hideLoading();
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
function hideError() {
|
| 114 |
+
error.classList.add('hidden');
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
function clearResults() {
|
| 118 |
+
urlInput.value = '';
|
| 119 |
+
hideResults();
|
| 120 |
+
hideError();
|
| 121 |
+
hideLoading();
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
// Initialize the application
|
| 125 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 126 |
+
console.log('Web Scraper Pro initialized');
|
| 127 |
+
feather.replace();
|
| 128 |
+
});
|
style.css
CHANGED
|
@@ -1,28 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
body {
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
}
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
}
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
margin-top: 5px;
|
| 16 |
}
|
| 17 |
|
| 18 |
-
.
|
| 19 |
-
|
| 20 |
-
margin: 0 auto;
|
| 21 |
-
padding: 16px;
|
| 22 |
-
border: 1px solid lightgray;
|
| 23 |
-
border-radius: 16px;
|
| 24 |
}
|
| 25 |
|
| 26 |
-
.
|
| 27 |
-
|
|
|
|
| 28 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Shared styles across all pages */
|
| 2 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
|
| 3 |
+
|
| 4 |
body {
|
| 5 |
+
font-family: 'Inter', sans-serif;
|
| 6 |
+
}
|
| 7 |
+
|
| 8 |
+
/* Custom scrollbar */
|
| 9 |
+
::-webkit-scrollbar {
|
| 10 |
+
width: 6px;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
::-webkit-scrollbar-track {
|
| 14 |
+
background: #f1f1f1;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
::-webkit-scrollbar-thumb {
|
| 18 |
+
background: #c1c1c1;
|
| 19 |
+
border-radius: 3px;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
::-webkit-scrollbar-thumb:hover {
|
| 23 |
+
background: #a8a8a8;
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
/* Smooth transitions */
|
| 27 |
+
* {
|
| 28 |
+
transition: all 0.2s ease-in-out;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
/* Custom animations */
|
| 32 |
+
@keyframes fadeIn {
|
| 33 |
+
from {
|
| 34 |
+
opacity: 0;
|
| 35 |
+
transform: translateY(10px);
|
| 36 |
+
}
|
| 37 |
+
to {
|
| 38 |
+
opacity: 1;
|
| 39 |
+
transform: translateY(0);
|
| 40 |
+
}
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
.fade-in {
|
| 44 |
+
animation: fadeIn 0.5s ease-out;
|
| 45 |
}
|
| 46 |
|
| 47 |
+
/* Prose styles for extracted content */
|
| 48 |
+
.prose {
|
| 49 |
+
line-height: 1.6;
|
| 50 |
}
|
| 51 |
|
| 52 |
+
.prose h1, .prose h2, .prose h3, .prose h4, .prose h5, .prose h6 {
|
| 53 |
+
margin-top: 1.5em;
|
| 54 |
+
margin-bottom: 0.5em;
|
| 55 |
+
font-weight: 600;
|
|
|
|
| 56 |
}
|
| 57 |
|
| 58 |
+
.prose p {
|
| 59 |
+
margin-bottom: 1em;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
}
|
| 61 |
|
| 62 |
+
.prose ul, .prose ol {
|
| 63 |
+
margin-bottom: 1em;
|
| 64 |
+
padding-left: 1.5em;
|
| 65 |
}
|
| 66 |
+
|
| 67 |
+
.prose li {
|
| 68 |
+
margin-bottom: 0.5em;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
/* Image gallery styles */
|
| 72 |
+
.gallery-image {
|
| 73 |
+
border-radius: 8px;
|
| 74 |
+
overflow: hidden;
|
| 75 |
+
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
|
| 76 |
+
transition: transform 0.3s ease;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
.gallery-image:hover {
|
| 80 |
+
transform: scale(1.05);
|
| 81 |
+
}
|