import gradio as gr import json import os import re from datetime import datetime from typing import Dict, List, Optional, Tuple from huggingface_hub import HfApi, hf_hub_download, list_repo_files from pathlib import Path import tempfile from auth import verify_hf_token, get_user_from_request_headers # HuggingFace configuration HF_TOKEN = os.getenv("HF_TOKEN") # Required for writing to dataset DATASET_REPO = "Fraser/piclets" # Public dataset repository DATASET_TYPE = "dataset" # Initialize HuggingFace API with token if available api = HfApi(token=HF_TOKEN) if HF_TOKEN else HfApi() # Cache directory for local operations CACHE_DIR = Path("cache") CACHE_DIR.mkdir(exist_ok=True) class PicletDiscoveryService: """Manages Piclet discovery using HuggingFace datasets""" @staticmethod def normalize_object_name(name: str) -> str: """ Normalize object names for consistent storage and lookup Examples: "The Blue Pillow" -> "pillow", "wooden chairs" -> "wooden_chair" """ if not name: return "unknown" # Convert to lowercase and strip name = name.lower().strip() # Remove articles (the, a, an) name = re.sub(r'^(the|a|an)\s+', '', name) # Remove special characters except spaces name = re.sub(r'[^a-z0-9\s]', '', name) # Handle common plurals (basic pluralization rules) if name.endswith('ies') and len(name) > 4: name = name[:-3] + 'y' # berries -> berry elif name.endswith('ves') and len(name) > 4: name = name[:-3] + 'f' # leaves -> leaf elif name.endswith('es') and len(name) > 3: # Check if it's a special case like "glasses" if not name.endswith(('ses', 'xes', 'zes', 'ches', 'shes')): name = name[:-2] # boxes -> box (but keep glasses) elif name.endswith('s') and len(name) > 2 and not name.endswith('ss'): name = name[:-1] # chairs -> chair (but keep glass) # Replace spaces with underscores name = re.sub(r'\s+', '_', name.strip()) return name @staticmethod def load_piclet_data(object_name: str) -> Optional[dict]: """Load Piclet data from HuggingFace dataset""" try: normalized_name = PicletDiscoveryService.normalize_object_name(object_name) file_path = f"piclets/{normalized_name}.json" # Download the file from HuggingFace local_path = hf_hub_download( repo_id=DATASET_REPO, filename=file_path, repo_type=DATASET_TYPE, token=HF_TOKEN, cache_dir=str(CACHE_DIR) ) with open(local_path, 'r') as f: return json.load(f) except Exception as e: print(f"Could not load piclet data for {object_name}: {e}") return None @staticmethod def save_piclet_data(object_name: str, data: dict) -> bool: """Save Piclet data to HuggingFace dataset""" try: normalized_name = PicletDiscoveryService.normalize_object_name(object_name) file_path = f"piclets/{normalized_name}.json" # Create a temporary file with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: json.dump(data, f, indent=2) temp_path = f.name # Upload to HuggingFace api.upload_file( path_or_fileobj=temp_path, path_in_repo=file_path, repo_id=DATASET_REPO, repo_type=DATASET_TYPE, commit_message=f"Update piclet: {normalized_name}" ) # Clean up os.unlink(temp_path) return True except Exception as e: print(f"Failed to save piclet data: {e}") return False @staticmethod def load_user_data(sub: str) -> dict: """ Load user profile from dataset by HF user ID (sub) Args: sub: HuggingFace user ID (stable identifier) Returns: User profile dict or default profile if not found """ try: file_path = f"users/{sub}.json" local_path = hf_hub_download( repo_id=DATASET_REPO, filename=file_path, repo_type=DATASET_TYPE, token=HF_TOKEN, cache_dir=str(CACHE_DIR) ) with open(local_path, 'r') as f: return json.load(f) except: # Return default user profile if not found # Will be populated with actual data on first save return { "sub": sub, "preferred_username": None, "name": None, "picture": None, "joinedAt": datetime.now().isoformat(), "lastSeen": datetime.now().isoformat(), "discoveries": [], "uniqueFinds": 0, "totalFinds": 0, "rarityScore": 0, "visibility": "public" } @staticmethod def save_user_data(sub: str, data: dict) -> bool: """ Save user profile to dataset by HF user ID (sub) Args: sub: HuggingFace user ID (stable identifier) data: User profile dict Returns: True if successful, False otherwise """ try: file_path = f"users/{sub}.json" # Update lastSeen timestamp data["lastSeen"] = datetime.now().isoformat() with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: json.dump(data, f, indent=2) temp_path = f.name api.upload_file( path_or_fileobj=temp_path, path_in_repo=file_path, repo_id=DATASET_REPO, repo_type=DATASET_TYPE, commit_message=f"Update user profile: {data.get('preferred_username', sub)}" ) os.unlink(temp_path) return True except Exception as e: print(f"Failed to save user data: {e}") return False @staticmethod def get_or_create_user_profile(user_info: dict) -> dict: """ Get existing user profile or create new one from OAuth user_info Refreshes cached profile data on each call Args: user_info: OAuth user info from HF (sub, preferred_username, name, picture) Returns: User profile dict """ sub = user_info['sub'] # Load existing profile profile = PicletDiscoveryService.load_user_data(sub) # Update cached profile fields from OAuth profile['sub'] = sub profile['preferred_username'] = user_info.get('preferred_username') profile['name'] = user_info.get('name') profile['picture'] = user_info.get('picture') profile['email'] = user_info.get('email') # Set joinedAt only if this is a new profile if 'joinedAt' not in profile or not profile['joinedAt']: profile['joinedAt'] = datetime.now().isoformat() return profile @staticmethod def update_global_stats() -> dict: """Update and return global statistics""" try: # Try to load existing stats try: local_path = hf_hub_download( repo_id=DATASET_REPO, filename="metadata/stats.json", repo_type=DATASET_TYPE, token=HF_TOKEN, cache_dir=str(CACHE_DIR) ) with open(local_path, 'r') as f: stats = json.load(f) except: stats = { "totalDiscoveries": 0, "uniqueObjects": 0, "totalVariations": 0, "lastUpdated": datetime.now().isoformat() } return stats except Exception as e: print(f"Failed to update global stats: {e}") return {} # API Endpoints def search_piclet(object_name: str, attributes: List[str]) -> dict: """ Search for canonical Piclet or variations Returns matching piclet or None """ piclet_data = PicletDiscoveryService.load_piclet_data(object_name) if not piclet_data: return { "status": "new", "message": f"No Piclet found for '{object_name}'", "piclet": None } # Check if searching for canonical (no attributes) if not attributes or len(attributes) == 0: return { "status": "existing", "message": f"Found canonical Piclet for '{object_name}'", "piclet": piclet_data.get("canonical") } # Search for matching variation variations = piclet_data.get("variations", []) for variation in variations: var_attrs = set(variation.get("attributes", [])) search_attrs = set(attributes) # Check for close match (at least 50% overlap) overlap = len(var_attrs.intersection(search_attrs)) if overlap >= len(search_attrs) * 0.5: return { "status": "variation", "message": f"Found variation of '{object_name}'", "piclet": variation, "canonicalId": piclet_data["canonical"]["typeId"] } # No variation found, suggest creating one return { "status": "new_variation", "message": f"No variation found for '{object_name}' with attributes {attributes}", "canonicalId": piclet_data["canonical"]["typeId"], "piclet": None } def create_canonical(object_name: str, piclet_data: str, token_or_username: str) -> dict: """ Create a new canonical Piclet Args: object_name: The normalized object name (e.g., "pillow") piclet_data: JSON string of Piclet instance data token_or_username: Either OAuth token (starts with "hf_") or username for testing Returns: Dict with success status and piclet data """ try: piclet_json = json.loads(piclet_data) if isinstance(piclet_data, str) else piclet_data # Determine if this is a token or username user_info = None if token_or_username and token_or_username.startswith('hf_'): # OAuth token - verify it user_info = verify_hf_token(token_or_username) if not user_info: return { "success": False, "error": "Invalid OAuth token" } else: # Legacy username mode (for testing) user_info = { "sub": f"legacy_{token_or_username}", "preferred_username": token_or_username, "name": token_or_username, "picture": None } # Get or create user profile user_profile = PicletDiscoveryService.get_or_create_user_profile(user_info) # Create canonical entry with full discoverer info canonical_data = { "canonical": { "objectName": object_name, "typeId": f"{PicletDiscoveryService.normalize_object_name(object_name)}_canonical", "discoveredBy": user_info['preferred_username'], "discovererSub": user_info['sub'], "discovererUsername": user_info['preferred_username'], "discovererName": user_info.get('name'), "discovererPicture": user_info.get('picture'), "discoveredAt": datetime.now().isoformat(), "scanCount": 1, "picletData": piclet_json }, "variations": [] } # Save to dataset if PicletDiscoveryService.save_piclet_data(object_name, canonical_data): # Update user profile user_profile["discoveries"].append(canonical_data["canonical"]["typeId"]) user_profile["uniqueFinds"] += 1 user_profile["totalFinds"] += 1 user_profile["rarityScore"] += 100 # Bonus for canonical discovery PicletDiscoveryService.save_user_data(user_info['sub'], user_profile) return { "success": True, "message": f"Created canonical Piclet for '{object_name}'", "piclet": canonical_data["canonical"] } else: return { "success": False, "error": "Failed to save canonical Piclet" } except Exception as e: return { "success": False, "error": str(e) } def create_variation(canonical_id: str, attributes: List[str], piclet_data: str, token_or_username: str, object_name: str) -> dict: """ Create a variation of an existing canonical Piclet with OAuth verification Args: canonical_id: ID of the canonical Piclet attributes: List of variation attributes piclet_data: JSON data for the Piclet token_or_username: Either OAuth token (starts with "hf_") or username for testing object_name: Normalized object name Returns: Success/error dict with variation data """ try: piclet_json = json.loads(piclet_data) if isinstance(piclet_data, str) else piclet_data # Verify token or use legacy mode user_info = None if token_or_username and token_or_username.startswith('hf_'): user_info = verify_hf_token(token_or_username) if not user_info: return {"success": False, "error": "Invalid OAuth token"} else: # Legacy mode for testing user_info = { "sub": f"legacy_{token_or_username}", "preferred_username": token_or_username, "name": token_or_username, "picture": None } # Get or create user profile user_profile = PicletDiscoveryService.get_or_create_user_profile(user_info) # Load existing data existing_data = PicletDiscoveryService.load_piclet_data(object_name) if not existing_data: return { "success": False, "error": f"Canonical Piclet not found for '{object_name}'" } # Create variation entry variation_id = f"{PicletDiscoveryService.normalize_object_name(object_name)}_{len(existing_data['variations']) + 1:03d}" variation = { "typeId": variation_id, "attributes": attributes, "discoveredBy": user_info['preferred_username'], "discovererSub": user_info['sub'], "discovererUsername": user_info['preferred_username'], "discovererName": user_info.get('name'), "discovererPicture": user_info.get('picture'), "discoveredAt": datetime.now().isoformat(), "scanCount": 1, "picletData": piclet_json } # Add to variations existing_data["variations"].append(variation) # Save updated data if PicletDiscoveryService.save_piclet_data(object_name, existing_data): # Update user profile user_profile["discoveries"].append(variation_id) user_profile["totalFinds"] += 1 user_profile["rarityScore"] += 50 # Bonus for variation discovery PicletDiscoveryService.save_user_data(user_info['sub'], user_profile) return { "success": True, "message": f"Created variation of '{object_name}'", "piclet": variation } else: return { "success": False, "error": "Failed to save variation" } except Exception as e: return { "success": False, "error": str(e) } def increment_scan_count(piclet_id: str, object_name: str) -> dict: """ Increment the scan count for a Piclet """ try: data = PicletDiscoveryService.load_piclet_data(object_name) if not data: return { "success": False, "error": "Piclet not found" } # Check canonical if data["canonical"]["typeId"] == piclet_id: data["canonical"]["scanCount"] = data["canonical"].get("scanCount", 0) + 1 scan_count = data["canonical"]["scanCount"] else: # Check variations for variation in data["variations"]: if variation["typeId"] == piclet_id: variation["scanCount"] = variation.get("scanCount", 0) + 1 scan_count = variation["scanCount"] break else: return { "success": False, "error": "Piclet ID not found" } # Save updated data if PicletDiscoveryService.save_piclet_data(object_name, data): return { "success": True, "scanCount": scan_count } else: return { "success": False, "error": "Failed to update scan count" } except Exception as e: return { "success": False, "error": str(e) } def get_recent_activity(limit: int = 20) -> dict: """ Get recent discoveries across all users """ try: activities = [] # List all piclet files try: files = list_repo_files( repo_id=DATASET_REPO, repo_type=DATASET_TYPE, token=HF_TOKEN ) piclet_files = [f for f in files if f.startswith("piclets/") and f.endswith(".json")] except: piclet_files = [] # Load recent piclets (simplified - in production, maintain a separate activity log) for file_path in piclet_files[-limit:]: try: object_name = file_path.replace("piclets/", "").replace(".json", "") data = PicletDiscoveryService.load_piclet_data(object_name) if data: # Add canonical discovery canonical = data["canonical"] activities.append({ "type": "discovery", "objectName": object_name, "typeId": canonical["typeId"], "discoveredBy": canonical["discoveredBy"], "discoveredAt": canonical["discoveredAt"], "scanCount": canonical.get("scanCount", 1) }) # Add recent variations for variation in data.get("variations", [])[-5:]: activities.append({ "type": "variation", "objectName": object_name, "typeId": variation["typeId"], "attributes": variation["attributes"], "discoveredBy": variation["discoveredBy"], "discoveredAt": variation["discoveredAt"], "scanCount": variation.get("scanCount", 1) }) except: continue # Sort by discovery date activities.sort(key=lambda x: x.get("discoveredAt", ""), reverse=True) return { "success": True, "activities": activities[:limit] } except Exception as e: return { "success": False, "error": str(e), "activities": [] } def get_leaderboard(limit: int = 10) -> dict: """ Get top discoverers """ try: leaderboard = [] # List all user files try: files = list_repo_files( repo_id=DATASET_REPO, repo_type=DATASET_TYPE, token=HF_TOKEN ) user_files = [f for f in files if f.startswith("users/") and f.endswith(".json")] except: user_files = [] # Load user data for file_path in user_files: try: username = file_path.replace("users/", "").replace(".json", "") user_data = PicletDiscoveryService.load_user_data(username) leaderboard.append({ "username": username, "totalFinds": user_data.get("totalFinds", 0), "uniqueFinds": user_data.get("uniqueFinds", 0), "rarityScore": user_data.get("rarityScore", 0) }) except: continue # Sort by rarity score leaderboard.sort(key=lambda x: x["rarityScore"], reverse=True) # Add ranks for i, entry in enumerate(leaderboard[:limit]): entry["rank"] = i + 1 return { "success": True, "leaderboard": leaderboard[:limit] } except Exception as e: return { "success": False, "error": str(e), "leaderboard": [] } def get_user_profile(username: str) -> dict: """ Get user's discovery profile """ try: user_data = PicletDiscoveryService.load_user_data(username) return { "success": True, "profile": user_data } except Exception as e: return { "success": False, "error": str(e), "profile": None } # Create Gradio interface with gr.Blocks(title="Piclets Discovery Server") as app: gr.Markdown(""" # 🔍 Piclets Discovery Server Backend service for the Piclets discovery game. Each real-world object has ONE canonical Piclet! """) with gr.Tab("Search Piclet"): with gr.Row(): with gr.Column(): search_object = gr.Textbox(label="Object Name", placeholder="e.g., pillow") search_attrs = gr.Textbox(label="Attributes (comma-separated)", placeholder="e.g., velvet, blue") search_btn = gr.Button("Search", variant="primary") with gr.Column(): search_result = gr.JSON(label="Search Result") search_btn.click( fn=lambda obj, attrs: search_piclet(obj, [a.strip() for a in attrs.split(",")] if attrs else []), inputs=[search_object, search_attrs], outputs=search_result ) with gr.Tab("Create Canonical"): with gr.Row(): with gr.Column(): canonical_object = gr.Textbox(label="Object Name") canonical_data = gr.Textbox(label="Piclet Data (JSON)", lines=10) canonical_user = gr.Textbox(label="Username") canonical_btn = gr.Button("Create Canonical", variant="primary") with gr.Column(): canonical_result = gr.JSON(label="Creation Result") canonical_btn.click( fn=create_canonical, inputs=[canonical_object, canonical_data, canonical_user], outputs=canonical_result ) with gr.Tab("Create Variation"): with gr.Row(): with gr.Column(): var_object = gr.Textbox(label="Object Name") var_canonical = gr.Textbox(label="Canonical ID") var_attrs = gr.Textbox(label="Variation Attributes (comma-separated)") var_data = gr.Textbox(label="Piclet Data (JSON)", lines=10) var_user = gr.Textbox(label="Username") var_btn = gr.Button("Create Variation", variant="primary") with gr.Column(): var_result = gr.JSON(label="Creation Result") var_btn.click( fn=lambda obj, cid, attrs, data, user: create_variation( cid, [a.strip() for a in attrs.split(",")] if attrs else [], data, user, obj ), inputs=[var_object, var_canonical, var_attrs, var_data, var_user], outputs=var_result ) with gr.Tab("Activity Feed"): activity_limit = gr.Slider(5, 50, value=20, label="Number of Activities") activity_btn = gr.Button("Get Recent Activity") activity_result = gr.JSON(label="Recent Discoveries") activity_btn.click( fn=get_recent_activity, inputs=activity_limit, outputs=activity_result ) with gr.Tab("Leaderboard"): leader_limit = gr.Slider(5, 20, value=10, label="Top N Discoverers") leader_btn = gr.Button("Get Leaderboard") leader_result = gr.JSON(label="Top Discoverers") leader_btn.click( fn=get_leaderboard, inputs=leader_limit, outputs=leader_result ) with gr.Tab("User Profile"): profile_user = gr.Textbox(label="Username") profile_btn = gr.Button("Get Profile") profile_result = gr.JSON(label="User Profile") profile_btn.click( fn=get_user_profile, inputs=profile_user, outputs=profile_result ) # API Documentation gr.Markdown(""" ## API Endpoints All endpoints accept JSON and return JSON responses. - **search_piclet**: Search for canonical or variation Piclets - **create_canonical**: Register a new canonical Piclet - **create_variation**: Add a variation to existing canonical - **increment_scan_count**: Track discovery popularity - **get_recent_activity**: Global discovery feed - **get_leaderboard**: Top discoverers - **get_user_profile**: Individual discovery stats See API_DOCUMENTATION.md for detailed usage. """) if __name__ == "__main__": app.launch()