Spaces:
Runtime error
Runtime error
| import os | |
| import pandas as pd | |
| from .zip_extractor import ZipExtractor | |
| import logging | |
| class DataIngestion: | |
| def data_ingestion(self,zip_path: str) -> pd.DataFrame: | |
| """ | |
| Function to extract a zip file, read the CSV data into a DataFrame, | |
| and handle any extraction or reading errors gracefully. | |
| Parameters: | |
| zip_path : str | |
| Path to the zip file containing the CSV files. | |
| Returns: | |
| pd.DataFrame | |
| A pandas DataFrame containing the data from the CSV file. | |
| Raises: | |
| Exception if extraction or CSV reading fails. | |
| """ | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.FileHandler("logging.log",'w+'), | |
| logging.StreamHandler() | |
| ] | |
| ) | |
| try: | |
| # Initialize ZipExtractor with specified zip path | |
| extractor = ZipExtractor(zip_path=zip_path) | |
| logging.info(f"Initialized ZipExtractor with path: {zip_path}") | |
| # Extract files and ensure CSVs are in the specified folder | |
| extractor.extract_files() | |
| logging.info("CSV Files are extracted from {zip_path}.") | |
| # Get the output folder where CSV files are extracted | |
| output_folder = extractor.output_folder | |
| logging.info(f"Extracted files are located in: {output_folder}") | |
| # Find extracted CSV files in the output folder | |
| csv_files = [file for file in os.listdir(output_folder) if file.endswith('.csv')] | |
| if not csv_files: | |
| logging.error("No CSV files found in the extracted folder.") | |
| raise FileNotFoundError("No CSV files found in the extracted folder.") | |
| # Read the first CSV file found into a DataFrame | |
| csv_path = os.path.join(output_folder, csv_files[0]) | |
| data = pd.read_csv(csv_path) | |
| logging.info(f"Successfully loaded data from {csv_files[0]}") | |
| # logging.debug(f"Data preview:\n{data.head()}") | |
| return data | |
| except FileNotFoundError as e: | |
| logging.error(f"Error: {e}") | |
| raise | |
| except pd.errors.EmptyDataError: | |
| logging.error("Error: The CSV file is empty.") | |
| raise | |
| except pd.errors.ParserError: | |
| logging.error("Error: The CSV file contains parsing errors.") | |
| raise | |
| except Exception as e: | |
| logging.error(f"An unexpected error occurred during data ingestion: {e}") | |
| raise | |
| if __name__ == "__main__": | |
| # Example usage | |
| # data_ingest = DataIngestion() | |
| # try: | |
| # df = data_ingest.data_ingestion("data/raw/customer_churn_dataset-training-master.csv.zip") | |
| # except Exception as e: | |
| # logging.error(f"Data ingestion failed: {e}") | |
| pass | |