Spaces:
Sleeping
Sleeping
Commit
·
65997b1
1
Parent(s):
b96eea7
add delete_rows endpoint
Browse files- .gitignore +2 -1
- src/api/models/embedding_models.py +5 -0
- src/api/services/huggingface_service.py +26 -0
- src/main.py +24 -0
.gitignore
CHANGED
|
@@ -1,2 +1,3 @@
|
|
| 1 |
*pycache*
|
| 2 |
-
*.env*
|
|
|
|
|
|
| 1 |
*pycache*
|
| 2 |
+
*.env*
|
| 3 |
+
.python-version
|
src/api/models/embedding_models.py
CHANGED
|
@@ -42,6 +42,11 @@ class DeleteEmbeddingRequest(BaseModel):
|
|
| 42 |
dataset_name: str
|
| 43 |
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
# Request model for the /embed endpoint
|
| 46 |
class EmbedRequest(BaseModel):
|
| 47 |
texts: List[str] # List of strings to generate embeddings for
|
|
|
|
| 42 |
dataset_name: str
|
| 43 |
|
| 44 |
|
| 45 |
+
class DeleteRowsRequest(BaseModel):
|
| 46 |
+
dataset_name: str
|
| 47 |
+
product_types_to_delete: List[str]
|
| 48 |
+
|
| 49 |
+
|
| 50 |
# Request model for the /embed endpoint
|
| 51 |
class EmbedRequest(BaseModel):
|
| 52 |
texts: List[str] # List of strings to generate embeddings for
|
src/api/services/huggingface_service.py
CHANGED
|
@@ -100,3 +100,29 @@ class HuggingFaceService:
|
|
| 100 |
except Exception as e:
|
| 101 |
logger.error(f"Failed to delete dataset: {e}")
|
| 102 |
raise DatasetDeleteError(f"Failed to delete dataset: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
except Exception as e:
|
| 101 |
logger.error(f"Failed to delete dataset: {e}")
|
| 102 |
raise DatasetDeleteError(f"Failed to delete dataset: {e}")
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
async def delete_rows_from_dataset(self, dataset_name: str, product_types_to_delete: List[str]):
|
| 106 |
+
"""
|
| 107 |
+
Loads a dataset, filters out rows based on a list of product types, and pushes it back.
|
| 108 |
+
"""
|
| 109 |
+
if not product_types_to_delete:
|
| 110 |
+
return
|
| 111 |
+
|
| 112 |
+
# Step 1: Load the existing dataset
|
| 113 |
+
logger.info(f"Loading dataset {dataset_name} to delete rows.")
|
| 114 |
+
dataset = await self.read_dataset(dataset_name)
|
| 115 |
+
|
| 116 |
+
# Step 2 : Filter the dataset to EXCLUDE the rows with the given product_types
|
| 117 |
+
logger.info(f"Filtering out product_types: {product_types_to_delete}")
|
| 118 |
+
initial_row_count = len(dataset)
|
| 119 |
+
|
| 120 |
+
filtered_dataset = dataset.filter(lambda product: product['product_type'] not in product_types_to_delete)
|
| 121 |
+
|
| 122 |
+
final_row_count = len(filtered_dataset)
|
| 123 |
+
logger.info(f"{initial_row_count - final_row_count} rows deleted.")
|
| 124 |
+
|
| 125 |
+
# Step 3 : Push the modified dataset back to the hub
|
| 126 |
+
await self.push_to_hub(filtered_dataset, dataset_name)
|
| 127 |
+
|
| 128 |
+
return filtered_dataset
|
src/main.py
CHANGED
|
@@ -9,6 +9,7 @@ from src.api.models.embedding_models import (
|
|
| 9 |
CreateEmbeddingRequest,
|
| 10 |
ReadEmbeddingRequest,
|
| 11 |
UpdateEmbeddingRequest,
|
|
|
|
| 12 |
DeleteEmbeddingRequest,
|
| 13 |
EmbedRequest,
|
| 14 |
SearchEmbeddingRequest,
|
|
@@ -230,6 +231,29 @@ async def delete_embeddings(
|
|
| 230 |
raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
|
| 231 |
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
@app.post("/search_embeddings")
|
| 234 |
async def search_embedding(
|
| 235 |
request: SearchEmbeddingRequest,
|
|
|
|
| 9 |
CreateEmbeddingRequest,
|
| 10 |
ReadEmbeddingRequest,
|
| 11 |
UpdateEmbeddingRequest,
|
| 12 |
+
DeleteRowsRequest,
|
| 13 |
DeleteEmbeddingRequest,
|
| 14 |
EmbedRequest,
|
| 15 |
SearchEmbeddingRequest,
|
|
|
|
| 231 |
raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
|
| 232 |
|
| 233 |
|
| 234 |
+
@app.post("/delete_rows")
|
| 235 |
+
async def delete_rows(
|
| 236 |
+
request: DeleteRowsRequest,
|
| 237 |
+
huggingface_service: HuggingFaceService = Depends(get_huggingface_service)
|
| 238 |
+
):
|
| 239 |
+
"""
|
| 240 |
+
Deletes specific rows from a Hugging Face dataset based on their product_types"""
|
| 241 |
+
try:
|
| 242 |
+
await huggingface_service.delete_rows_from_dataset(
|
| 243 |
+
request.dataset_name, request.product_types_to_delete
|
| 244 |
+
)
|
| 245 |
+
return {
|
| 246 |
+
"message": "Rows deleted succesfully from dataset.",
|
| 247 |
+
"dataset_name": request.dataset_name,
|
| 248 |
+
"deleted_product_types": request.product_types_to_delete,
|
| 249 |
+
}
|
| 250 |
+
except DatasetNotFoundError as e:
|
| 251 |
+
raise HTTPException(status_code=404, detail=str(e))
|
| 252 |
+
except Exception as e:
|
| 253 |
+
logger.error(f"An error occured while deleting rows: {e}")
|
| 254 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 255 |
+
|
| 256 |
+
|
| 257 |
@app.post("/search_embeddings")
|
| 258 |
async def search_embedding(
|
| 259 |
request: SearchEmbeddingRequest,
|