import json from pathlib import Path from assignment5.chrf import calculate_chrf from assignment5.mbr import select_best_hypothesis # Load data data_dir = Path(__file__).parent / "mt_data" source_sentences = (data_dir / "source_sentences.txt").read_text().splitlines() reference_translations = (data_dir / "reference_translations.txt").read_text().splitlines() beam_search_translations = (data_dir / "beam_search_translations.txt").read_text().splitlines() with open(data_dir / "samples.jsonl") as f: samples = [json.loads(line)["samples"] for line in f] # Step 1: Select the best hypothesis for each source sentence using MBR decoding mbr_translations = [select_best_hypothesis(sample_set) for sample_set in samples] # Step 2: Calculate ChrF scores for MBR translations mbr_chrf_scores = [ calculate_chrf(mbr_translation, reference) for mbr_translation, reference in zip(mbr_translations, reference_translations) ] average_mbr_chrf = sum(mbr_chrf_scores) / len(mbr_chrf_scores) # Step 3: Calculate ChrF scores for beam search translations beam_chrf_scores = [ calculate_chrf(beam_translation, reference) for beam_translation, reference in zip(beam_search_translations, reference_translations) ] average_beam_chrf = sum(beam_chrf_scores) / len(beam_chrf_scores) # Step 4: Print the results print(f"Average ChrF score for MBR decoding: {average_mbr_chrf:.2f}") print(f"Average ChrF score for beam search: {average_beam_chrf:.2f}") if average_mbr_chrf > average_beam_chrf: print("MBR decoding produced better translations.") else: print("Beam search produced better translations.")