Spaces:
Runtime error
Runtime error
| import collections | |
| import os | |
| from datetime import datetime, timedelta | |
| import json | |
| from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer | |
| from urllib.parse import parse_qs, urlparse | |
| from huggingface_hub import list_datasets, login, HfFolder | |
| from datasets import load_dataset, DatasetDict, Dataset | |
| import numpy as np | |
| datasets = { | |
| "stars": load_dataset("open-source-metrics/preprocessed_stars"), | |
| "issues": load_dataset("open-source-metrics/preprocessed_issues"), | |
| "pip": load_dataset("open-source-metrics/preprocessed_pip").sort('day'), | |
| } | |
| external_datasets = { | |
| "pip": load_dataset("open-source-metrics/pip-external").sort('day'), | |
| "stars": load_dataset("open-source-metrics/stars-external"), | |
| "issues": load_dataset("open-source-metrics/issues-external") | |
| } | |
| external_datasets['pip']['openai_python'] = external_datasets['pip']['openai'] | |
| del external_datasets['pip']['openai'] | |
| def cut_output(full_output: Dataset, library_names: list): | |
| output = full_output.to_dict().items() | |
| output = {k: v + [None] for k, v in output if k in library_names + ['day']} | |
| last_value = max(output[k].index(None) for k in output.keys() if k != 'day') | |
| return {k: v[:last_value] for k, v in output.items()} | |
| def parse_name_and_options(path): | |
| url = urlparse(path) | |
| query = parse_qs(url.query) | |
| library_names = query.get("input", None)[0] | |
| library_names = library_names.split(',') | |
| options = query.get("options", None)[0] | |
| options = options.split(',') | |
| return library_names, options | |
| def sum_of_lists(lists): | |
| def _sum(items): | |
| while None in items: | |
| items.remove(None) | |
| return sum(items) | |
| return [_sum(list(a)) for a in zip(*lists)] | |
| class RequestHandler(SimpleHTTPRequestHandler): | |
| def do_GET(self): | |
| print(self.path) | |
| if self.path == "/": | |
| self.path = "index.html" | |
| return SimpleHTTPRequestHandler.do_GET(self) | |
| if self.path.startswith("/initialize"): | |
| dataset_with_most_splits = max(datasets['stars'].column_names.values(), key=len) | |
| if 'day' in dataset_with_most_splits: | |
| dataset_with_most_splits.remove('day') | |
| external_dataset_keys = {k: set(v.keys()) for k, v in external_datasets.items()} | |
| external_dataset_with_most_splits = max([d for d in external_dataset_keys.values()], key=len) | |
| for external in external_dataset_with_most_splits: | |
| dataset_with_most_splits.remove(external) | |
| warnings = [] | |
| print("Initializing ...") | |
| for k, v in external_dataset_keys.items(): | |
| if len(v) < len(external_dataset_with_most_splits): | |
| warnings.append( | |
| f"The {k} external dataset does not contain all splits. Missing: {external_dataset_with_most_splits - v}" | |
| f".\nSelecting that split to show the pip install numbers will not work." | |
| ) | |
| dataset_with_most_splits = list(dataset_with_most_splits) | |
| dataset_with_most_splits.sort() | |
| external_dataset_with_most_splits = list(external_dataset_with_most_splits) | |
| external_dataset_with_most_splits.sort() | |
| res = { | |
| 'internal': dataset_with_most_splits, | |
| 'external': external_dataset_with_most_splits, | |
| 'warnings': warnings | |
| } | |
| print(f"Returning: {res}") | |
| return self.response(res) | |
| if self.path.startswith("/retrievePipInstalls"): | |
| errors = [] | |
| library_names, options = parse_name_and_options(self.path) | |
| cumulated = '1' in options | |
| week_over_week = '2' in options | |
| if week_over_week: | |
| if cumulated: | |
| cumulated_dict = { | |
| 'Cumulated': sum_of_lists([v for k, v in datasets['pip']['wow'].to_dict().items() if k in library_names]), | |
| 'day': datasets['pip']['wow'].to_dict()['day'] | |
| } | |
| return self.response(cumulated_dict) | |
| else: | |
| return self.response({k: v for k, v in datasets['pip']['wow'].to_dict().items() if k in library_names + ['day']}) | |
| else: | |
| if cumulated: | |
| cumulated_dict = { | |
| 'Cumulated': sum_of_lists([v for k, v in datasets['pip']['raw'].to_dict().items() if k in library_names]), | |
| 'day': datasets['pip']['raw'].to_dict()['day'] | |
| } | |
| return self.response(cumulated_dict) | |
| else: | |
| return self.response({k: v for k, v in datasets['pip']['raw'].to_dict().items() if k in library_names + ['day']}) | |
| if self.path.startswith("/retrieveStars"): | |
| library_names, options = parse_name_and_options(self.path) | |
| week_over_week = '1' in options | |
| cumulated = '2' in options | |
| if week_over_week: | |
| if cumulated: | |
| cumulated_dict = { | |
| 'Cumulated': sum_of_lists([v for k, v in datasets['stars']['wow'].to_dict().items() if k in library_names]), | |
| 'day': datasets['stars']['wow'].to_dict()['day'] | |
| } | |
| return self.response(cumulated_dict) | |
| else: | |
| return self.response({k: v for k, v in datasets['stars']['wow'].to_dict().items() if k in library_names + ['day']}) | |
| else: | |
| if cumulated: | |
| cumulated_dict = { | |
| 'Cumulated': sum_of_lists([v for k, v in datasets['stars']['raw'].to_dict().items() if k in library_names]), | |
| 'day': datasets['stars']['raw'].to_dict()['day'] | |
| } | |
| return self.response(cumulated_dict) | |
| else: | |
| return self.response({k: v for k, v in datasets['stars']['raw'].to_dict().items() if k in library_names + ['day']}) | |
| if self.path.startswith("/retrieveIssues"): | |
| library_names, options = parse_name_and_options(self.path) | |
| exclude_org_members = '1' in options | |
| week_over_week = '2' in options | |
| cumulated = '3' in options | |
| if week_over_week: | |
| if exclude_org_members: | |
| if cumulated: | |
| cumulated_dict = { | |
| 'Cumulated': sum_of_lists([v for k, v in datasets['issues']['eom_wow'].to_dict().items() if k in library_names]), | |
| 'day': datasets['issues']['eom_wow'].to_dict()['day'] | |
| } | |
| return self.response(cumulated_dict) | |
| else: | |
| return self.response(cut_output(datasets['issues']['eom_wow'], library_names)) | |
| else: | |
| if cumulated: | |
| cumulated_dict = { | |
| 'Cumulated': sum_of_lists([v for k, v in datasets['issues']['wow'].to_dict().items() if k in library_names]), | |
| 'day': datasets['issues']['wow'].to_dict()['day'] | |
| } | |
| return self.response(cumulated_dict) | |
| else: | |
| return self.response({k: v for k, v in datasets['issues']['wow'].to_dict().items() if k in library_names + ['day']}) | |
| else: | |
| if exclude_org_members: | |
| return self.response({k: v for k, v in datasets['issues']['eom'].to_dict().items() if k in library_names + ['day']}) | |
| else: | |
| return self.response({k: v for k, v in datasets['issues']['raw'].to_dict().items() if k in library_names + ['day']}) | |
| return SimpleHTTPRequestHandler.do_GET(self) | |
| def response(self, output): | |
| self.send_response(200) | |
| self.send_header("Content-Type", "application/json") | |
| self.end_headers() | |
| self.wfile.write(json.dumps(output).encode("utf-8")) | |
| return SimpleHTTPRequestHandler | |
| server = ThreadingHTTPServer(("", 7860), RequestHandler) | |
| print("Running on port 7860") | |
| server.serve_forever() | |