Spaces:
Sleeping
Sleeping
Update ingest.py
Browse files
ingest.py
CHANGED
|
@@ -12,7 +12,12 @@ WEAVIATE_KEY = os.environ["WEAVIATE_KEY"]
|
|
| 12 |
|
| 13 |
print("Connecting to Weaviate:", WEAVIATE_URL)
|
| 14 |
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
print("Loading 10K ArXiv papers from CShorten/ML-ArXiv-Papers...")
|
| 17 |
dataset = load_dataset("CShorten/ML-ArXiv-Papers", split="train", streaming=True)
|
| 18 |
dataset = list(dataset.take(10000))
|
|
@@ -20,12 +25,6 @@ dataset = list(dataset.take(10000))
|
|
| 20 |
# EMBEDDER
|
| 21 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 22 |
|
| 23 |
-
# === WEAVIATE CLIENT ===
|
| 24 |
-
client = weaviate.Client(
|
| 25 |
-
url=WEAVIATE_URL,
|
| 26 |
-
auth_client_secret=weaviate.AuthApiKey(WEAVIATE_KEY)
|
| 27 |
-
)
|
| 28 |
-
|
| 29 |
# Reset
|
| 30 |
try:
|
| 31 |
client.schema.delete_class("Paper")
|
|
|
|
| 12 |
|
| 13 |
print("Connecting to Weaviate:", WEAVIATE_URL)
|
| 14 |
|
| 15 |
+
client = weaviate.Client(
|
| 16 |
+
url=WEAVIATE_URL,
|
| 17 |
+
auth_client_secret=weaviate.AuthApiKey(WEAVIATE_KEY) # ← v3 syntax
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# LOAD DATASET
|
| 21 |
print("Loading 10K ArXiv papers from CShorten/ML-ArXiv-Papers...")
|
| 22 |
dataset = load_dataset("CShorten/ML-ArXiv-Papers", split="train", streaming=True)
|
| 23 |
dataset = list(dataset.take(10000))
|
|
|
|
| 25 |
# EMBEDDER
|
| 26 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# Reset
|
| 29 |
try:
|
| 30 |
client.schema.delete_class("Paper")
|