nam pham commited on
Commit
ad042b1
·
1 Parent(s): 090dddd

feat: update app

Browse files
Files changed (5) hide show
  1. app.py +327 -54
  2. data/annotated_data.json +0 -0
  3. pyproject.toml +1 -0
  4. requirements.txt +2 -1
  5. uv.lock +11 -0
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from huggingface_hub import HfApi
3
  import os
4
  import re
5
  import json
@@ -8,6 +8,11 @@ import random
8
  from typing import List, Dict, Union, Tuple
9
  from gliner import GLiNER
10
  from datasets import load_dataset
 
 
 
 
 
11
 
12
  # Available models for annotation
13
  AVAILABLE_MODELS = [
@@ -138,8 +143,58 @@ def extract_tokens_and_labels(data: List[Dict[str, Union[str, None]]]) -> Dict[s
138
  # Global variables for dataset viewer
139
  dynamic_dataset = None
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  def update_example(data):
142
  global dynamic_dataset
 
 
143
  tokens, ner = extract_tokens_and_labels(data)
144
  dynamic_dataset.data[dynamic_dataset.current]["tokenized_text"] = tokens
145
  dynamic_dataset.data[dynamic_dataset.current]["ner"] = ner
@@ -147,36 +202,19 @@ def update_example(data):
147
 
148
  def validate_example():
149
  global dynamic_dataset
 
 
150
  dynamic_dataset.data[dynamic_dataset.current]["validated"] = True
151
  return [("The example was validated!", None)]
152
 
153
- def next_example():
154
- global dynamic_dataset
155
- dynamic_dataset.next_example()
156
- return prepare_for_highlight(dynamic_dataset.load_current_example()), dynamic_dataset.current
157
-
158
- def previous_example():
159
- global dynamic_dataset
160
- dynamic_dataset.previous_example()
161
- return prepare_for_highlight(dynamic_dataset.load_current_example()), dynamic_dataset.current
162
-
163
  def save_dataset(inp):
164
  global dynamic_dataset
 
 
165
  with open("data/annotated_data.json", "wt") as file:
166
  json.dump(dynamic_dataset.data, file)
167
  return [("The validated dataset was saved as data/annotated_data.json", None)]
168
 
169
- def load_dataset():
170
- global dynamic_dataset
171
- try:
172
- with open("data/annotated_data.json", 'rt') as dataset:
173
- ANNOTATED_DATA = json.load(dataset)
174
- dynamic_dataset = DynamicDataset(ANNOTATED_DATA)
175
- max_value = len(dynamic_dataset.data) - 1 if dynamic_dataset.data else 0
176
- return prepare_for_highlight(dynamic_dataset.load_current_example()), 0, max_value
177
- except Exception as e:
178
- return [("Error loading dataset: " + str(e), None)], 0, 0
179
-
180
  # Original annotation functions
181
  def transform_data(data):
182
  tokens = tokenize_text(data['text'])
@@ -209,8 +247,9 @@ def merge_entities(entities):
209
  merged.append(current)
210
  return merged
211
 
212
- def annotate_text(model, text, labels: List[str], threshold: float, nested_ner: bool) -> Dict:
213
  labels = [label.strip() for label in labels]
 
214
  r = {
215
  "text": text,
216
  "entities": [
@@ -221,14 +260,36 @@ def annotate_text(model, text, labels: List[str], threshold: float, nested_ner:
221
  "end": entity["end"],
222
  "score": 0,
223
  }
224
- for entity in model.predict_entities(
225
- text, labels, flat_ner=not nested_ner, threshold=threshold
226
- )
227
  ],
228
  }
229
  r["entities"] = merge_entities(r["entities"])
230
  return transform_data(r)
231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  class AutoAnnotator:
233
  def __init__(
234
  self, model: str = "knowledgator/gliner-multitask-large-v0.5",
@@ -248,20 +309,29 @@ class AutoAnnotator:
248
  ) -> List[Dict]:
249
  self.stat["total"] = len(data)
250
  self.stat["current"] = -1 # Reset current progress
251
- for text in data:
252
- self.stat["current"] += 1
 
 
 
 
 
253
  if isinstance(prompt, list):
254
  prompt_text = random.choice(prompt)
255
  else:
256
  prompt_text = prompt
257
- text = f"{prompt_text}\n{text}" if prompt_text else text
258
-
259
- annotation = annotate_text(self.model, text, labels, threshold, nested_ner)
260
-
261
- if not annotation["ner"]: # If no entities identified
262
- annotation = {"tokenized_text": tokenize_text(text), "ner": [], "validated": False}
263
-
264
- self.annotated_data.append(annotation)
 
 
 
 
265
  return self.annotated_data
266
 
267
  # Global variables
@@ -281,31 +351,61 @@ def process_uploaded_file(file_obj):
281
  except Exception as e:
282
  return f"Error reading file: {str(e)}"
283
 
284
- def annotate(model, labels, threshold, prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  global annotator
286
  try:
287
  if not sentences:
288
  return "Please upload a file with text first!"
289
-
 
290
  labels = [label.strip() for label in labels.split(",")]
291
  annotator = AutoAnnotator(model)
292
  annotated_data = annotator.auto_annotate(sentences, labels, prompt, threshold)
293
-
294
- # Save annotated data
295
  os.makedirs("data", exist_ok=True)
296
- with open("data/annotated_data.json", "wt") as file:
 
297
  json.dump(annotated_data, file, ensure_ascii=False)
298
-
299
- # Upload to Hugging Face Hub
300
- api = HfApi()
301
- api.upload_file(
302
- path_or_fileobj="data/annotated_data.json",
303
- path_in_repo="annotated_data.json",
304
- repo_id="YOUR_USERNAME/YOUR_REPO_NAME", # Replace with your repo
305
- repo_type="dataset"
306
- )
307
-
308
- return "Successfully annotated and saved to Hugging Face Hub!"
 
 
 
 
 
 
309
  except Exception as e:
310
  return f"Error during annotation: {str(e)}"
311
 
@@ -504,6 +604,76 @@ def process_local_file(file_obj, file_format):
504
  except Exception as e:
505
  return f"Error processing file: {str(e)}"
506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
  # Create the main interface with tabs
508
  with gr.Blocks() as demo:
509
  gr.Markdown("# NER Annotation Tool")
@@ -539,14 +709,77 @@ with gr.Blocks() as demo:
539
  placeholder="Enter your annotation prompt (optional)",
540
  scale=2
541
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  annotate_btn = gr.Button("Annotate Data")
543
  output_info = gr.Textbox(label="Processing Status")
544
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
545
  annotate_btn.click(
546
  fn=annotate,
547
- inputs=[model, labels, threshold, prompt],
 
 
 
548
  outputs=[output_info]
549
  )
 
 
 
 
 
 
 
 
 
 
 
 
550
 
551
  with gr.TabItem("Dataset Viewer"):
552
  with gr.Row():
@@ -576,7 +809,14 @@ with gr.Blocks() as demo:
576
  visible=False
577
  )
578
 
579
- bar = gr.Slider(minimum=0, maximum=1, step=1, label="Progress", interactive=False)
 
 
 
 
 
 
 
580
 
581
  with gr.Row():
582
  previous_btn = gr.Button("Previous example")
@@ -586,6 +826,26 @@ with gr.Blocks() as demo:
586
  validate_btn = gr.Button("Validate")
587
  save_btn = gr.Button("Save validated dataset")
588
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
  inp_box = gr.HighlightedText(value=None, interactive=True)
590
 
591
  def toggle_local_inputs():
@@ -647,5 +907,18 @@ with gr.Blocks() as demo:
647
  validate_btn.click(fn=validate_example, inputs=None, outputs=inp_box)
648
  next_btn.click(fn=next_example, inputs=None, outputs=[inp_box, bar])
649
  previous_btn.click(fn=previous_example, inputs=None, outputs=[inp_box, bar])
 
 
 
 
 
 
 
 
 
 
 
 
 
650
 
651
  demo.launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import HfApi, create_repo
3
  import os
4
  import re
5
  import json
 
8
  from typing import List, Dict, Union, Tuple
9
  from gliner import GLiNER
10
  from datasets import load_dataset
11
+ from dotenv import load_dotenv
12
+
13
+ # Load environment variables from .env
14
+ load_dotenv()
15
+ HF_TOKEN = os.getenv("HUGGINGFACE_ACCESS_TOKEN")
16
 
17
  # Available models for annotation
18
  AVAILABLE_MODELS = [
 
143
  # Global variables for dataset viewer
144
  dynamic_dataset = None
145
 
146
+ def load_dataset():
147
+ global dynamic_dataset
148
+ try:
149
+ with open("data/annotated_data.json", 'rt') as dataset:
150
+ ANNOTATED_DATA = json.load(dataset)
151
+ dynamic_dataset = DynamicDataset(ANNOTATED_DATA)
152
+ max_value = len(dynamic_dataset.data) - 1 if dynamic_dataset.data else 0
153
+ return prepare_for_highlight(dynamic_dataset.load_current_example()), gr.update(value=0, maximum=max_value)
154
+ except Exception as e:
155
+ return [("Error loading dataset: " + str(e), None)], gr.update(value=0, maximum=1)
156
+
157
+ def example_by_id(id):
158
+ global dynamic_dataset
159
+ if dynamic_dataset is None:
160
+ return [("Please load a dataset first", None)], gr.update(value=0, maximum=1)
161
+ try:
162
+ id = int(id) # Ensure id is an integer
163
+ dynamic_dataset.example_by_id(id)
164
+ current = dynamic_dataset.current
165
+ max_value = len(dynamic_dataset.data) - 1
166
+ return prepare_for_highlight(dynamic_dataset.load_current_example()), gr.update(value=current, maximum=max_value)
167
+ except Exception as e:
168
+ return [("Error navigating to example: " + str(e), None)], gr.update(value=0, maximum=1)
169
+
170
+ def next_example():
171
+ global dynamic_dataset
172
+ if dynamic_dataset is None:
173
+ return [("Please load a dataset first", None)], gr.update(value=0, maximum=1)
174
+ try:
175
+ dynamic_dataset.next_example()
176
+ current = dynamic_dataset.current
177
+ max_value = len(dynamic_dataset.data) - 1
178
+ return prepare_for_highlight(dynamic_dataset.load_current_example()), gr.update(value=current, maximum=max_value)
179
+ except Exception as e:
180
+ return [("Error navigating to next example: " + str(e), None)], gr.update(value=0, maximum=1)
181
+
182
+ def previous_example():
183
+ global dynamic_dataset
184
+ if dynamic_dataset is None:
185
+ return [("Please load a dataset first", None)], gr.update(value=0, maximum=1)
186
+ try:
187
+ dynamic_dataset.previous_example()
188
+ current = dynamic_dataset.current
189
+ max_value = len(dynamic_dataset.data) - 1
190
+ return prepare_for_highlight(dynamic_dataset.load_current_example()), gr.update(value=current, maximum=max_value)
191
+ except Exception as e:
192
+ return [("Error navigating to previous example: " + str(e), None)], gr.update(value=0, maximum=1)
193
+
194
  def update_example(data):
195
  global dynamic_dataset
196
+ if dynamic_dataset is None:
197
+ return [("Please load a dataset first", None)]
198
  tokens, ner = extract_tokens_and_labels(data)
199
  dynamic_dataset.data[dynamic_dataset.current]["tokenized_text"] = tokens
200
  dynamic_dataset.data[dynamic_dataset.current]["ner"] = ner
 
202
 
203
  def validate_example():
204
  global dynamic_dataset
205
+ if dynamic_dataset is None:
206
+ return [("Please load a dataset first", None)]
207
  dynamic_dataset.data[dynamic_dataset.current]["validated"] = True
208
  return [("The example was validated!", None)]
209
 
 
 
 
 
 
 
 
 
 
 
210
  def save_dataset(inp):
211
  global dynamic_dataset
212
+ if dynamic_dataset is None:
213
+ return [("Please load a dataset first", None)]
214
  with open("data/annotated_data.json", "wt") as file:
215
  json.dump(dynamic_dataset.data, file)
216
  return [("The validated dataset was saved as data/annotated_data.json", None)]
217
 
 
 
 
 
 
 
 
 
 
 
 
218
  # Original annotation functions
219
  def transform_data(data):
220
  tokens = tokenize_text(data['text'])
 
247
  merged.append(current)
248
  return merged
249
 
250
+ def annotate_text(model: GLiNER, text, labels: List[str], threshold: float, nested_ner: bool) -> Dict:
251
  labels = [label.strip() for label in labels]
252
+ entities = model.predict_entities(text, labels, flat_ner=not nested_ner, threshold=threshold)
253
  r = {
254
  "text": text,
255
  "entities": [
 
260
  "end": entity["end"],
261
  "score": 0,
262
  }
263
+ for entity in entities
 
 
264
  ],
265
  }
266
  r["entities"] = merge_entities(r["entities"])
267
  return transform_data(r)
268
 
269
+ def batch_annotate_text(model: GLiNER, texts: List[str], labels: List[str], threshold: float, nested_ner: bool) -> List[Dict]:
270
+ """Annotate multiple texts in batch"""
271
+ labels = [label.strip() for label in labels]
272
+ batch_entities = model.batch_predict_entities(texts, labels, flat_ner=not nested_ner, threshold=threshold)
273
+
274
+ results = []
275
+ for text, entities in zip(texts, batch_entities):
276
+ r = {
277
+ "text": text,
278
+ "entities": [
279
+ {
280
+ "entity": entity["label"],
281
+ "word": entity["text"],
282
+ "start": entity["start"],
283
+ "end": entity["end"],
284
+ "score": 0,
285
+ }
286
+ for entity in entities
287
+ ],
288
+ }
289
+ r["entities"] = merge_entities(r["entities"])
290
+ results.append(transform_data(r))
291
+ return results
292
+
293
  class AutoAnnotator:
294
  def __init__(
295
  self, model: str = "knowledgator/gliner-multitask-large-v0.5",
 
309
  ) -> List[Dict]:
310
  self.stat["total"] = len(data)
311
  self.stat["current"] = -1 # Reset current progress
312
+
313
+ # Process texts in batches
314
+ batch_size = 32 # Adjust based on your GPU memory
315
+ processed_data = []
316
+
317
+ for i in range(0, len(data), batch_size):
318
+ batch_texts = data[i:i + batch_size]
319
  if isinstance(prompt, list):
320
  prompt_text = random.choice(prompt)
321
  else:
322
  prompt_text = prompt
323
+
324
+ # Add prompt to each text in batch
325
+ batch_texts = [f"{prompt_text}\n{text}" if prompt_text else text for text in batch_texts]
326
+
327
+ # Process batch
328
+ batch_results = batch_annotate_text(self.model, batch_texts, labels, threshold, nested_ner)
329
+ processed_data.extend(batch_results)
330
+
331
+ # Update progress
332
+ self.stat["current"] = min(i + batch_size, len(data))
333
+
334
+ self.annotated_data = processed_data
335
  return self.annotated_data
336
 
337
  # Global variables
 
351
  except Exception as e:
352
  return f"Error reading file: {str(e)}"
353
 
354
+ def is_valid_repo_name(repo_name):
355
+ # Hugging Face repo names must not contain slashes or spaces
356
+ return bool(re.match(r'^[A-Za-z0-9_.-]+$', repo_name))
357
+
358
+ def create_hf_repo(repo_name: str, repo_type: str = "dataset", private: bool = False):
359
+ """Create a new repository on Hugging Face Hub"""
360
+ if not is_valid_repo_name(repo_name):
361
+ raise Exception("Invalid repo name: must not contain slashes, spaces, or special characters except '-', '_', '.'")
362
+ try:
363
+ api = HfApi(token=HF_TOKEN)
364
+ user = api.whoami()['name']
365
+ repo_id = f"{user}/{repo_name}"
366
+ create_repo(
367
+ repo_id=repo_id,
368
+ repo_type=repo_type,
369
+ private=private,
370
+ exist_ok=True,
371
+ token=HF_TOKEN
372
+ )
373
+ return repo_id
374
+ except Exception as e:
375
+ raise Exception(f"Error creating repository: {str(e)}")
376
+
377
+ def annotate(model, labels, threshold, prompt, save_to_hub, repo_name, repo_type, is_private):
378
  global annotator
379
  try:
380
  if not sentences:
381
  return "Please upload a file with text first!"
382
+ if save_to_hub and not is_valid_repo_name(repo_name):
383
+ return "Error: Invalid repo name. Only use letters, numbers, '-', '_', or '.' (no slashes or spaces)."
384
  labels = [label.strip() for label in labels.split(",")]
385
  annotator = AutoAnnotator(model)
386
  annotated_data = annotator.auto_annotate(sentences, labels, prompt, threshold)
387
+ # Save annotated data locally
 
388
  os.makedirs("data", exist_ok=True)
389
+ local_path = "data/annotated_data.json"
390
+ with open(local_path, "wt") as file:
391
  json.dump(annotated_data, file, ensure_ascii=False)
392
+ status_messages = [f"Successfully annotated and saved locally to {local_path}"]
393
+ # Upload to Hugging Face Hub if requested
394
+ if save_to_hub:
395
+ try:
396
+ repo_id = create_hf_repo(repo_name, repo_type, is_private)
397
+ api = HfApi(token=HF_TOKEN)
398
+ api.upload_file(
399
+ path_or_fileobj=local_path,
400
+ path_in_repo="annotated_data.json",
401
+ repo_id=repo_id,
402
+ repo_type=repo_type,
403
+ token=HF_TOKEN
404
+ )
405
+ status_messages.append(f"Successfully uploaded to Hugging Face Hub repository: {repo_id}")
406
+ except Exception as e:
407
+ status_messages.append(f"Error with Hugging Face Hub: {str(e)}")
408
+ return "\n".join(status_messages)
409
  except Exception as e:
410
  return f"Error during annotation: {str(e)}"
411
 
 
604
  except Exception as e:
605
  return f"Error processing file: {str(e)}"
606
 
607
+ # Add a function to download the annotated data
608
+
609
+ def download_annotated_data():
610
+ file_path = "data/annotated_data.json"
611
+ if os.path.exists(file_path):
612
+ return file_path
613
+ else:
614
+ return None
615
+
616
+ def download_to_folder():
617
+ """Download annotated data to a local folder"""
618
+ try:
619
+ source_path = "data/annotated_data.json"
620
+ if not os.path.exists(source_path):
621
+ return "No annotated data found!"
622
+
623
+ # Create downloads directory if it doesn't exist
624
+ download_dir = os.path.expanduser("~/Downloads")
625
+ os.makedirs(download_dir, exist_ok=True)
626
+
627
+ # Copy file to downloads folder
628
+ import shutil
629
+ dest_path = os.path.join(download_dir, "annotated_data.json")
630
+ shutil.copy2(source_path, dest_path)
631
+ return f"Successfully downloaded to {dest_path}"
632
+ except Exception as e:
633
+ return f"Error downloading file: {str(e)}"
634
+
635
+ def update_hf_dataset(repo_name: str, repo_type: str = "dataset", is_private: bool = False):
636
+ """Update or create a Hugging Face dataset with the current annotated data"""
637
+ try:
638
+ if not dynamic_dataset or not dynamic_dataset.data:
639
+ return "No data to upload! Please load or annotate data first."
640
+
641
+ # Save current data to local file
642
+ os.makedirs("data", exist_ok=True)
643
+ local_path = "data/annotated_data.json"
644
+ with open(local_path, "wt") as file:
645
+ json.dump(dynamic_dataset.data, file, ensure_ascii=False)
646
+
647
+ # Create or update repository
648
+ try:
649
+ repo_id = create_hf_repo(repo_name, repo_type, is_private)
650
+ api = HfApi(token=HF_TOKEN)
651
+ api.upload_file(
652
+ path_or_fileobj=local_path,
653
+ path_in_repo="annotated_data.json",
654
+ repo_id=repo_id,
655
+ repo_type=repo_type,
656
+ token=HF_TOKEN
657
+ )
658
+ return f"Successfully uploaded to Hugging Face Hub repository: {repo_id}"
659
+ except Exception as e:
660
+ if "already exists" in str(e):
661
+ # If repo exists, just update the file
662
+ user = api.whoami()['name']
663
+ repo_id = f"{user}/{repo_name}"
664
+ api.upload_file(
665
+ path_or_fileobj=local_path,
666
+ path_in_repo="annotated_data.json",
667
+ repo_id=repo_id,
668
+ repo_type=repo_type,
669
+ token=HF_TOKEN
670
+ )
671
+ return f"Successfully updated existing repository: {repo_id}"
672
+ else:
673
+ raise e
674
+ except Exception as e:
675
+ return f"Error updating Hugging Face dataset: {str(e)}"
676
+
677
  # Create the main interface with tabs
678
  with gr.Blocks() as demo:
679
  gr.Markdown("# NER Annotation Tool")
 
709
  placeholder="Enter your annotation prompt (optional)",
710
  scale=2
711
  )
712
+
713
+ with gr.Group():
714
+ gr.Markdown("### Save Options")
715
+ save_to_hub = gr.Checkbox(
716
+ label="Save to Hugging Face Hub",
717
+ value=False
718
+ )
719
+
720
+ with gr.Group(visible=False) as hub_settings:
721
+ gr.Markdown("#### Hugging Face Hub Settings")
722
+ repo_name = gr.Textbox(
723
+ label="Repository Name",
724
+ placeholder="Enter repository name (e.g., my-ner-dataset)",
725
+ scale=2
726
+ )
727
+ repo_type = gr.Dropdown(
728
+ choices=["dataset", "model", "space"],
729
+ value="dataset",
730
+ label="Repository Type"
731
+ )
732
+ is_private = gr.Checkbox(
733
+ label="Private Repository",
734
+ value=False
735
+ )
736
+
737
  annotate_btn = gr.Button("Annotate Data")
738
  output_info = gr.Textbox(label="Processing Status")
739
 
740
+ # Add download buttons for annotated data
741
+ with gr.Row():
742
+ download_btn_annot = gr.Button("Download Annotated Data", visible=False)
743
+ download_file_annot = gr.File(label="Download", interactive=False, visible=False)
744
+ download_status = gr.Textbox(label="Download Status", visible=False)
745
+
746
+ def toggle_hub_settings(save_to_hub):
747
+ return {
748
+ hub_settings: gr.update(visible=save_to_hub)
749
+ }
750
+
751
+ save_to_hub.change(
752
+ fn=toggle_hub_settings,
753
+ inputs=[save_to_hub],
754
+ outputs=[hub_settings]
755
+ )
756
+
757
+ def show_download_buttons(status):
758
+ # Show download buttons only if annotation was successful
759
+ if status and status.startswith("Successfully annotated and saved locally"):
760
+ return gr.update(visible=True), gr.update(visible=True)
761
+ return gr.update(visible=False), gr.update(visible=False)
762
+
763
  annotate_btn.click(
764
  fn=annotate,
765
+ inputs=[
766
+ model, labels, threshold, prompt,
767
+ save_to_hub, repo_name, repo_type, is_private
768
+ ],
769
  outputs=[output_info]
770
  )
771
+ output_info.change(
772
+ fn=show_download_buttons,
773
+ inputs=[output_info],
774
+ outputs=[download_btn_annot, download_status]
775
+ )
776
+ def handle_download_annot():
777
+ file_path = download_annotated_data()
778
+ if file_path:
779
+ return gr.update(value=file_path, visible=True)
780
+ else:
781
+ return gr.update(visible=False)
782
+ download_btn_annot.click(fn=handle_download_annot, inputs=None, outputs=[download_file_annot])
783
 
784
  with gr.TabItem("Dataset Viewer"):
785
  with gr.Row():
 
809
  visible=False
810
  )
811
 
812
+ bar = gr.Slider(
813
+ minimum=0,
814
+ maximum=1,
815
+ step=1,
816
+ label="Progress",
817
+ interactive=True,
818
+ info="Use slider to navigate through examples"
819
+ )
820
 
821
  with gr.Row():
822
  previous_btn = gr.Button("Previous example")
 
826
  validate_btn = gr.Button("Validate")
827
  save_btn = gr.Button("Save validated dataset")
828
 
829
+ # Add Hugging Face upload section
830
+ with gr.Group():
831
+ gr.Markdown("### Upload to Hugging Face")
832
+ hf_repo_name = gr.Textbox(
833
+ label="Repository Name",
834
+ placeholder="Enter repository name (e.g., my-ner-dataset)",
835
+ scale=2
836
+ )
837
+ hf_repo_type = gr.Dropdown(
838
+ choices=["dataset", "model", "space"],
839
+ value="dataset",
840
+ label="Repository Type"
841
+ )
842
+ hf_is_private = gr.Checkbox(
843
+ label="Private Repository",
844
+ value=False
845
+ )
846
+ upload_to_hf_btn = gr.Button("Upload to Hugging Face")
847
+ hf_upload_status = gr.Textbox(label="Upload Status")
848
+
849
  inp_box = gr.HighlightedText(value=None, interactive=True)
850
 
851
  def toggle_local_inputs():
 
907
  validate_btn.click(fn=validate_example, inputs=None, outputs=inp_box)
908
  next_btn.click(fn=next_example, inputs=None, outputs=[inp_box, bar])
909
  previous_btn.click(fn=previous_example, inputs=None, outputs=[inp_box, bar])
910
+ bar.change(
911
+ fn=example_by_id,
912
+ inputs=[bar],
913
+ outputs=[inp_box, bar],
914
+ api_name="example_by_id"
915
+ )
916
+
917
+ # Add Hugging Face upload functionality
918
+ upload_to_hf_btn.click(
919
+ fn=update_hf_dataset,
920
+ inputs=[hf_repo_name, hf_repo_type, hf_is_private],
921
+ outputs=[hf_upload_status]
922
+ )
923
 
924
  demo.launch()
data/annotated_data.json CHANGED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -9,4 +9,5 @@ dependencies = [
9
  "gliner>=0.2.20",
10
  "gradio>=5.31.0",
11
  "huggingface-hub>=0.32.1",
 
12
  ]
 
9
  "gliner>=0.2.20",
10
  "gradio>=5.31.0",
11
  "huggingface-hub>=0.32.1",
12
+ "python-dotenv>=1.1.0",
13
  ]
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  gradio==5.31.0
2
  datasets>=3.6.0
3
  gliner>=0.2.20
4
- huggingface-hub>=0.32.1
 
 
1
  gradio==5.31.0
2
  datasets>=3.6.0
3
  gliner>=0.2.20
4
+ huggingface-hub>=0.32.1
5
+ python-dotenv>=1.1.0
uv.lock CHANGED
@@ -902,6 +902,7 @@ dependencies = [
902
  { name = "gliner" },
903
  { name = "gradio" },
904
  { name = "huggingface-hub" },
 
905
  ]
906
 
907
  [package.metadata]
@@ -910,6 +911,7 @@ requires-dist = [
910
  { name = "gliner", specifier = ">=0.2.20" },
911
  { name = "gradio", specifier = ">=5.31.0" },
912
  { name = "huggingface-hub", specifier = ">=0.32.1" },
 
913
  ]
914
 
915
  [[package]]
@@ -1646,6 +1648,15 @@ wheels = [
1646
  { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
1647
  ]
1648
 
 
 
 
 
 
 
 
 
 
1649
  [[package]]
1650
  name = "python-multipart"
1651
  version = "0.0.20"
 
902
  { name = "gliner" },
903
  { name = "gradio" },
904
  { name = "huggingface-hub" },
905
+ { name = "python-dotenv" },
906
  ]
907
 
908
  [package.metadata]
 
911
  { name = "gliner", specifier = ">=0.2.20" },
912
  { name = "gradio", specifier = ">=5.31.0" },
913
  { name = "huggingface-hub", specifier = ">=0.32.1" },
914
+ { name = "python-dotenv", specifier = ">=1.1.0" },
915
  ]
916
 
917
  [[package]]
 
1648
  { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
1649
  ]
1650
 
1651
+ [[package]]
1652
+ name = "python-dotenv"
1653
+ version = "1.1.0"
1654
+ source = { registry = "https://pypi.org/simple" }
1655
+ sdist = { url = "https://files.pythonhosted.org/packages/88/2c/7bb1416c5620485aa793f2de31d3df393d3686aa8a8506d11e10e13c5baf/python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5", size = 39920, upload-time = "2025-03-25T10:14:56.835Z" }
1656
+ wheels = [
1657
+ { url = "https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d", size = 20256, upload-time = "2025-03-25T10:14:55.034Z" },
1658
+ ]
1659
+
1660
  [[package]]
1661
  name = "python-multipart"
1662
  version = "0.0.20"