.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "auto_examples/database_report_analysis.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note You can download :ref:`below ` the full example code and run it online in `Codespaces `__ .. image:: https://github.com/codespaces/badge.svg :target: https://codespaces.new/spark-cleantech-l3/sda-copy?quickstart=1 --- .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_examples_database_report_analysis.py: SDA Database Report Analysis Script. ==================================== This script analyzes all available test data on the machine and provides comprehensive database metrics about tests, files, table detection, worksheet structure, and processing success rates. .. GENERATED FROM PYTHON SOURCE LINES 9-224 .. code-block:: Python import time import pandas as pd from sda.api import list_all_files, list_all_tests from sda.api.database_report import ( analyze_database_files, create_summary_dataframe, save_excel_with_formatting, ) # Main analysis execution print("šŸ” SDA Test Data Comprehensive Analysis") print("=" * 60) print("Analyzing all available test data on this machine...\n") start_time = time.time() # 1. Discover all tests and files print("šŸ“Š DISCOVERY PHASE") print("-" * 30) all_tests = list_all_tests() print(f"āœ… Found {len(all_tests)} tests") all_files = list_all_files(verbose=0) print(f"āœ… Found {len(all_files)} total files") # Filter Excel files for analysis excel_files = [f for f in all_files if f.suffix.lower() in [".xlsx", ".xlsm", ".xls"]] print(f"šŸ“‹ Excel files for analysis: {len(excel_files)}") if not excel_files: print("āŒ No Excel files found for analysis. Exiting.") exit() print() # 2. Perform comprehensive database analysis print("⚔ DATABASE ANALYSIS PHASE") print("-" * 40) results_df = analyze_database_files( file_filter="*.xls*", max_files=None, # Analyze ALL files verbose=True, include_details=False, ) if results_df.empty: print("āŒ No files were successfully analyzed.") exit() # 3. Generate comprehensive metrics print("\nšŸ“ˆ COMPREHENSIVE METRICS") print("=" * 60) # Basic counts total_files = len(results_df) successful_files = len(results_df[results_df["status"] == "Success"]) failed_files = len(results_df[results_df["status"] == "Error"]) # Table detection counts (Excel files only) excel_files_df = results_df[ results_df["file_name"].str.endswith((".xlsx", ".xlsm", ".xls"), na=False) ] total_excel_files = len(excel_files_df) tables_detected_count = ( len(excel_files_df[excel_files_df["tables_detected"]]) if total_excel_files > 0 else 0 ) tables_openable_count = ( len(excel_files_df[excel_files_df["table_openable"]]) if total_excel_files > 0 else 0 ) # Data metrics successful_df = results_df[results_df["status"] == "Success"] total_data_points = successful_df["num_points"].sum() if len(successful_df) > 0 else 0 # Performance metrics total_parse_time = ( successful_df["parse_time_seconds"].sum() if len(successful_df) > 0 else 0 ) avg_parse_time = ( successful_df["parse_time_seconds"].mean() if len(successful_df) > 0 else 0 ) avg_parsing_rate = ( successful_df["points_per_second"].mean() if len(successful_df) > 0 else 0 ) print("šŸ“Š FILE ANALYSIS:") print(f" • Total tests discovered: {len(all_tests)}") print(f" • Total files discovered: {len(all_files)}") print(f" • Excel files analyzed: {total_files}") print( f" • Files successfully read: {successful_files} ({successful_files / total_files * 100:.1f}%)" ) print( f" • Files that failed to read: {failed_files} ({failed_files / total_files * 100:.1f}%)" ) print() print("šŸ“‹ TABLE DETECTION:") if total_excel_files > 0: print( f" • Tests with tables detected: {tables_detected_count} " f"({tables_detected_count / total_excel_files * 100:.1f}%)" ) assert successful_files == tables_openable_count print( f" • Tests with tables openable: {tables_openable_count} " f"({tables_openable_count / tables_detected_count * 100:.1f}% of detected)" ) else: print(" • No Excel files to analyze for table detection") print() print("šŸ“ˆ DATA METRICS:") print(f" • Total data points (run points): {total_data_points:,}") print( f" • Average data points per file: {total_data_points / successful_files:.0f}" if successful_files > 0 else " • Average data points per file: 0" ) print( f" • Total file size: {successful_df['file_size_mb'].sum():.1f} MB" if len(successful_df) > 0 else " • Total file size: 0 MB" ) print() print("ā±ļø PERFORMANCE METRICS:") print(f" • Total parsing time: {total_parse_time:.1f} seconds") print(f" • Average opening time per file: {avg_parse_time:.3f} seconds") print(f" • Average parsing rate: {avg_parsing_rate:.0f} points/second") if len(successful_df) > 0: fastest_file = successful_df.loc[successful_df["parse_time_seconds"].idxmin()] slowest_file = successful_df.loc[successful_df["parse_time_seconds"].idxmax()] largest_file = successful_df.loc[successful_df["num_points"].idxmax()] print("šŸ† Performance Highlights:") print( f" • Slowest file: {slowest_file['file_name']} ({slowest_file['parse_time_seconds']:.3f}s)" ) print( f" • Largest file: {largest_file['file_name']} ({largest_file['num_points']:,} points)" ) # 4. Create summary DataFrame using library function print("\nšŸ“‹ SUMMARY DATAFRAME") print("=" * 60) summary_df = create_summary_dataframe(results_df, all_tests, all_files) # Format the DataFrame for display summary_display = summary_df.copy() summary_display["Value"] = summary_display["Value"].apply( lambda x: f"{x:,.1f}" if isinstance(x, float) else f"{x:,}" ) summary_display["Percentage"] = summary_display["Percentage"].apply( lambda x: f"{x:.1f}%" if x is not None else "" ) print(summary_display.to_string(index=False)) # 5. Save comprehensive results analysis_time = time.time() - start_time print("\nšŸ’¾ SAVING RESULTS") print("=" * 60) # Prepare data for Excel export output_file = "sda_comprehensive_analysis.xlsx" # Create a copy for Excel export and convert URLs to hyperlinks excel_export_df = results_df.copy() if "plume_url" in excel_export_df.columns: excel_export_df["plume_url"] = excel_export_df["plume_url"].apply( lambda url: f'=HYPERLINK("{url}", "{url}")' if pd.notna(url) and url else "" ) # Create data dictionary for Excel export excel_data = {"Summary": summary_df, "Detailed Results": excel_export_df} # Add failed files analysis if failed_files > 0: failed_analysis = excel_export_df[excel_export_df["status"] == "Error"][ ["test_name", "file_name", "plume_url", "error_message", "file_size_mb"] ] excel_data["Failed Files"] = failed_analysis # Save with formatting using library function success = save_excel_with_formatting(excel_data, output_file) if not success: print("āŒ Failed to save Excel report") # Final summary print("\nšŸŽÆ ANALYSIS COMPLETE") print("=" * 60) print(f"ā±ļø Total analysis time: {analysis_time:.1f} seconds") print(f"šŸ“Š Files analyzed: {total_files}") print(f"šŸ“Š Test Files (with Tables) found: {tables_detected_count}") print( f"āœ… Success rate: {successful_files / tables_detected_count * 100:.1f}%" if total_files > 0 else "āœ… Success rate: 0%" ) print(f"šŸ“ˆ Total data points: {total_data_points:,}") print(f"šŸ’¾ Results saved to: {output_file}") .. _sphx_glr_download_auto_examples_database_report_analysis.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: database_report_analysis.ipynb ` .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: database_report_analysis.py ` .. container:: sphx-glr-download sphx-glr-download-zip :download:`Download zipped: database_report_analysis.zip `