import gradio as gr from datetime import datetime from typing import Any, Dict, Iterable, List, Optional, Tuple from collections import Counter import json import os import html as html_lib import base64 from pathlib import Path from huggingface_hub import HfApi, InferenceClient import requests def _created_year(obj): if hasattr(obj, "created_at"): dt = getattr(obj, "created_at") return dt.year def _year_from_iso(value: Any) -> Optional[int]: if not value or not isinstance(value, str): return None try: # e.g. 2025-12-12T18:40:13.000Z dt = datetime.fromisoformat(value.replace("Z", "+00:00")) return dt.year except Exception: return None _ASSET_CACHE: Dict[str, str] = {} def _asset_data_uri(filename: str) -> str: """ Returns a data URI (base64) for a local asset in this repo. Cached in-memory to avoid re-reading files every render. """ if filename in _ASSET_CACHE: return _ASSET_CACHE[filename] path = Path(__file__).resolve().parent / filename try: raw = path.read_bytes() b64 = base64.b64encode(raw).decode("ascii") ext = path.suffix.lower() mime = "image/png" if ext == ".gif": mime = "image/gif" elif ext in (".jpg", ".jpeg"): mime = "image/jpeg" elif ext == ".webp": mime = "image/webp" uri = f"data:{mime};base64,{b64}" _ASSET_CACHE[filename] = uri return uri except Exception: # If missing, return empty string to avoid breaking HTML return "" def _http_get_json(url: str, *, token: Optional[str] = None, params: Optional[Dict[str, Any]] = None) -> Any: headers: Dict[str, str] = {} if token: headers["Authorization"] = f"Bearer {token}" r = requests.get(url, headers=headers, params=params, timeout=25) r.raise_for_status() return r.json() def fetch_likes_left_2025(username: str, token: Optional[str] = None) -> int: """ Count likes the user left in 2025 via /api/users/{username}/likes. Endpoint returns a list with `createdAt` descending. """ url = f"https://huggingface.co/api/users/{username}/likes" try: data = _http_get_json(url, token=token) except Exception: return 0 if not isinstance(data, list): return 0 total = 0 for item in data: if not isinstance(item, dict): continue yr = _year_from_iso(item.get("createdAt")) if yr is None: continue if yr < 2025: break if yr == 2025: total += 1 return total def _repo_id(obj: Any) -> str: if isinstance(obj, dict): return obj.get("id") or obj.get("modelId") or obj.get("repoId") or "N/A" return ( getattr(obj, "id", None) or getattr(obj, "modelId", None) or getattr(obj, "repoId", None) or getattr(obj, "repo_id", None) or "N/A" ) def _repo_likes(obj: Any) -> int: return int(getattr(obj, "likes", 0) or 0) def _repo_tags(obj: Any) -> List[str]: tags = getattr(obj, "tags", None) or [] return [t for t in tags if isinstance(t, str)] def _repo_pipeline_tag(obj: Any) -> Optional[str]: val = getattr(obj, "pipeline_tag", None) return val def _repo_library_name(obj: Any) -> Optional[str]: val = getattr(obj, "library_name", None) if isinstance(val, str) and val.strip(): return val.strip() val = getattr(obj, "libraryName", None) if isinstance(val, str) and val.strip(): return val.strip() return None def _collect_2025_sorted_desc(items: Iterable[Any]) -> List[Any]: """ We rely on API-side sorting (createdAt desc) + early-stop once we hit < 2025. This avoids pulling a user's entire history. """ out: List[Any] = [] for item in items: yr = _created_year(item) if yr is None: continue if yr < 2025: break if yr == 2025: out.append(item) return out def fetch_user_data_2025(username: str, token: Optional[str] = None) -> Dict[str, List[Any]]: """Fetch user's models/datasets/spaces created in 2025 (API-side sort + paginated early-stop).""" api = HfApi(token=token) data: Dict[str, List[Any]] = {"models": [], "datasets": [], "spaces": []} try: data["models"] = _collect_2025_sorted_desc( api.list_models(author=username, full=True, sort="createdAt", direction=-1) ) except Exception: data["models"] = [] try: data["datasets"] = _collect_2025_sorted_desc( api.list_datasets(author=username, full=True, sort="createdAt", direction=-1) ) except Exception: data["datasets"] = [] # list_spaces full=True isn't supported in some versions; fall back if needed try: data["spaces"] = _collect_2025_sorted_desc( api.list_spaces(author=username, full=True, sort="createdAt", direction=-1) ) except Exception: try: data["spaces"] = _collect_2025_sorted_desc( api.list_spaces(author=username, sort="createdAt", direction=-1) ) except Exception: data["spaces"] = [] return data def _normalize_task_tag(tag: str) -> Optional[str]: t = (tag or "").strip() if not t: return None for prefix in ("task_categories:", "task_ids:", "pipeline_tag:"): if t.startswith(prefix): t = t[len(prefix):].strip() t = t.strip().lower() return t or None def _suggested_nickname_for_task(task: Optional[str]) -> Optional[str]: if not task: return None t = task.strip().lower() mapping = { "text-generation": "LLM Whisperer ๐ฃ๏ธ", "image-text-to-text": "VLM Nerd ๐ค", "text-to-speech": "Fullโtime Yapper ๐ฃ๏ธ", "automatic-speech-recognition": "Subtitle Goblin ๐ง", "text-to-image": "Diffusion Gremlin ๐จ", "image-classification": "Pixel Judge ๐๏ธ", "token-classification": "NERd Lord ๐ค", "text-classification": "Opinion Machine ๐ง ", "translation": "Language Juggler ๐บ๏ธ", "summarization": "TL;DR Dealer โ๏ธ", "image-to-text": "Caption Connoisseur ๐ผ๏ธ", "zero-shot-classification": "Label Wizard ๐ช", } return mapping.get(t) def infer_task_and_modality(models: List[Any], datasets: List[Any], spaces: List[Any]) -> Tuple[Optional[str], Counter]: """ Returns: (most_common_task, task_counter) - Task is primarily inferred from model `pipeline_tag`, then from task-ish tags on all artifacts. """ model_tasks: List[str] = [] for m in models: pt = _repo_pipeline_tag(m) if pt: model_tasks.append(pt.strip().lower()) tag_tasks: List[str] = [] for obj in (models + datasets + spaces): for tag in _repo_tags(obj): nt = _normalize_task_tag(tag) if nt: tag_tasks.append(nt) counts = Counter(model_tasks if model_tasks else tag_tasks) top_task = counts.most_common(1)[0][0] if counts else None return top_task, counts def infer_most_common_library(models: List[Any]) -> Optional[str]: libs: List[str] = [] for m in models: ln = _repo_library_name(m) if ln: libs.append(ln) if not libs: return None return Counter(libs).most_common(1)[0][0] def _k2_model_candidates() -> List[str]: """ Kimi K2 repo IDs can vary; allow override via env and try a small list. """ env_model = (os.getenv("KIMI_K2_MODEL") or "moonshotai/Kimi-K2-Instruct").strip() candidates = [env_model] # de-dupe while preserving order seen = set() out = [] for c in candidates: if c and c not in seen: out.append(c) seen.add(c) return out def _esc(value: Any) -> str: if value is None: return "" return html_lib.escape(str(value), quote=True) def _profile_username(profile: Any) -> Optional[str]: if profile is None: return None for key in ("username", "preferred_username", "name", "user", "handle"): val = getattr(profile, key, None) if isinstance(val, str) and val.strip(): return val.strip().lstrip("@") data = getattr(profile, "data", None) if isinstance(data, dict): for key in ("username", "preferred_username", "name"): val = data.get(key) if isinstance(val, str) and val.strip(): return val.strip().lstrip("@") for container in ("profile", "user"): blob = data.get(container) if isinstance(blob, dict): val = blob.get("username") or blob.get("preferred_username") or blob.get("name") if isinstance(val, str) and val.strip(): return val.strip().lstrip("@") if isinstance(profile, dict): val = profile.get("username") or profile.get("preferred_username") or profile.get("name") if isinstance(val, str) and val.strip(): return val.strip().lstrip("@") return None def _profile_token(profile: Any) -> Optional[str]: """ Gradio's OAuth payload varies by version. We try common attribute names and `.data` shapes. """ if profile is None: return None for key in ("token", "access_token", "hf_token", "oauth_token", "oauth_access_token"): val = getattr(profile, key, None) if isinstance(val, str) and val.strip(): return val.strip() data = getattr(profile, "data", None) if isinstance(data, dict): for key in ("token", "access_token", "hf_token", "oauth_token", "oauth_access_token"): val = data.get(key) if isinstance(val, str) and val.strip(): return val.strip() # Common nested objects oauth_info = data.get("oauth_info") or data.get("oauth") or data.get("oauthInfo") or {} if isinstance(oauth_info, dict): val = oauth_info.get("access_token") or oauth_info.get("token") if isinstance(val, str) and val.strip(): return val.strip() if isinstance(profile, dict): val = profile.get("token") or profile.get("access_token") if isinstance(val, str) and val.strip(): return val.strip() return None def generate_roast_and_nickname_with_k2( *, username: str, total_artifacts_2025: int, models_2025: int, datasets_2025: int, spaces_2025: int, top_task: Optional[str], ) -> Tuple[Optional[str], Optional[str]]: """ Calls Kimi K2 via Hugging Face Inference Providers (via huggingface_hub InferenceClient). Returns (nickname, roast). If call fails, returns (None, None). """ token = (os.getenv("HF_TOKEN") or "").strip() if not token: return None, None vibe = top_task or "mysterious vibes" above_below = "above" if total_artifacts_2025 > 20 else "below" suggested = _suggested_nickname_for_task(top_task) system = ( "You are a witty, playful roast-comedian. Keep it fun, not cruel. " "No slurs, no hate, no harassment. Avoid profanity. Keep it short." ) user = f""" Create TWO things about this Hugging Face user, based on their 2025 activity stats. User: @{username} Artifacts created in 2025: {total_artifacts_2025} (models={models_2025}, datasets={datasets_2025}, spaces={spaces_2025}) which is {above_below} 20. Top task (pipeline_tag): {top_task or "unknown"} Nickname guidance (examples you SHOULD follow when applicable): - text-generation -> LLM Whisperer ๐ฃ๏ธ - image-text-to-text -> VLM Nerd ๐ค - text-to-speech -> Fullโtime Yapper ๐ฃ๏ธ If top task is known and you have a strong matching idea, pick a nickname like the examples. {f'If unsure, you may use this suggested nickname: {suggested}' if suggested else ''} Roast should reference the task and whether they are above/below 20 artifacts. Most common vibe: {vibe} Return ONLY valid JSON with exactly these keys: {{ "nickname": "...", // short, funny, can include 1 emoji "roast": "..." // 1-2 sentences max, playful, no bullying }} """.strip() client = InferenceClient(model="moonshotai/Kimi-K2-Instruct", token=token) resp = client.chat.completions.create( model="moonshotai/Kimi-K2-Instruct", messages=[ {"role": "system", "content": system}, {"role": "user", "content": user}, ], max_tokens=180, temperature=0.8, ) content = (resp.choices[0].message.content or "").strip() payload = json.loads(content) nickname = payload.get("nickname") roast = payload.get("roast") nickname_out = nickname.strip() if isinstance(nickname, str) else None roast_out = roast.strip() if isinstance(roast, str) else None return nickname_out, roast_out def generate_wrapped_report(profile: gr.OAuthProfile) -> str: """Generate the HF Wrapped 2025 report""" username = _profile_username(profile) or "unknown" token = _profile_token(profile) # Fetch 2025 data (API-side sort + early stop) user_data_2025 = fetch_user_data_2025(username, token) models_2025 = user_data_2025["models"] datasets_2025 = user_data_2025["datasets"] spaces_2025 = user_data_2025["spaces"] most_liked_model = max(models_2025, key=_repo_likes) if models_2025 else None most_liked_dataset = max(datasets_2025, key=_repo_likes) if datasets_2025 else None most_liked_space = max(spaces_2025, key=_repo_likes) if spaces_2025 else None total_likes = sum(_repo_likes(x) for x in (models_2025 + datasets_2025 + spaces_2025)) top_task, _task_counts = infer_task_and_modality(models_2025, datasets_2025, spaces_2025) top_library = infer_most_common_library(models_2025) total_artifacts_2025 = len(models_2025) + len(datasets_2025) + len(spaces_2025) nickname, roast = generate_roast_and_nickname_with_k2( username=username, total_artifacts_2025=total_artifacts_2025, models_2025=len(models_2025), datasets_2025=len(datasets_2025), spaces_2025=len(spaces_2025), top_task=top_task, ) # New 2025 engagement stats likes_left_2025 = fetch_likes_left_2025(username, token) # Inline icons (local assets) like_icon = _asset_data_uri("like_logo.png") likes_received_icon = _asset_data_uri("likes_received.png") model_icon = _asset_data_uri("model_logo.png") dataset_icon = _asset_data_uri("dataset_logo.png") spaces_icon = _asset_data_uri("spaces_logo.png") vibe_icon = _asset_data_uri("vibe_logo.gif") # Create HTML report html = f"""
Please log in with your Hugging Face account to see your personalized report!
Click the "Sign in with Hugging Face" button above ๐
Discover your Hugging Face journey this year!