File size: 3,580 Bytes
922c3ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python3
"""

Encoding Fix Script for Legal Dashboard OCR

==========================================



This script fixes Unicode encoding issues that can occur on Windows systems.

Based on solutions from: https://docs.appseed.us/content/how-to-fix/unicodedecodeerror-charmap-codec-cant-decode-byte-0x9d/

"""

import os
import sys
import codecs


def fix_file_encoding(file_path, target_encoding='utf-8'):
    """Fix encoding issues in a file"""
    try:
        # Try to read with different encodings
        encodings_to_try = ['utf-8', 'utf-8-sig',
                            'cp1252', 'latin-1', 'iso-8859-1']

        content = None
        used_encoding = None

        for encoding in encodings_to_try:
            try:
                with open(file_path, 'r', encoding=encoding) as f:
                    content = f.read()
                    used_encoding = encoding
                    print(
                        f"βœ… Successfully read {file_path} with {encoding} encoding")
                    break
            except UnicodeDecodeError:
                continue

        if content is None:
            print(f"❌ Could not read {file_path} with any encoding")
            return False

        # Write back with UTF-8 encoding
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(content)

        print(f"βœ… Fixed encoding for {file_path}")
        return True

    except Exception as e:
        print(f"❌ Error fixing {file_path}: {e}")
        return False


def fix_project_encoding():
    """Fix encoding issues in the entire project"""
    print("πŸ”§ Fixing encoding issues in Legal Dashboard OCR project...")

    # Files that might have encoding issues
    files_to_fix = [
        "huggingface_space/app.py",
        "huggingface_space/README.md",
        "requirements.txt",
        "README.md",
        "DEPLOYMENT_INSTRUCTIONS.md",
        "FINAL_DEPLOYMENT_INSTRUCTIONS.md",
        "DEPLOYMENT_SUMMARY.md"
    ]

    fixed_count = 0
    total_files = len(files_to_fix)

    for file_path in files_to_fix:
        if os.path.exists(file_path):
            if fix_file_encoding(file_path):
                fixed_count += 1
        else:
            print(f"⚠️ File not found: {file_path}")

    print(f"\nπŸ“Š Encoding Fix Results:")
    print(f"βœ… Fixed: {fixed_count}/{total_files} files")

    return fixed_count == total_files


def set_environment_encoding():
    """Set environment variables for proper encoding"""
    print("\nπŸ”§ Setting environment variables for encoding...")

    # Set UTF-8 environment variable for Windows
    os.environ['PYTHONUTF8'] = '1'

    # For Windows CMD
    print("For Windows CMD, run: set PYTHONUTF8=1")

    # For PowerShell
    print("For PowerShell, run: $env:PYTHONUTF8=1")

    print("βœ… Environment encoding variables set")


def main():
    """Main function to fix encoding issues"""
    print("πŸš€ Legal Dashboard OCR - Encoding Fix")
    print("=" * 50)

    # Fix file encodings
    files_ok = fix_project_encoding()

    # Set environment encoding
    set_environment_encoding()

    print("\n" + "=" * 50)
    if files_ok:
        print("πŸŽ‰ All encoding issues fixed!")
        print("βœ… Project is ready for deployment")
        return 0
    else:
        print("⚠️ Some encoding issues remain")
        print("Please check the files manually")
        return 1


if __name__ == "__main__":
    sys.exit(main())