GPU Booster Performance Report Generator

Content is user-generated and unverified.
#!/usr/bin/env python3
"""
GPU Booster Performance Report Generator
Comprehensive system for analyzing and reporting on DeepSeek R1 performance metrics
"""

import json
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from typing import Dict, List, Any, Tuple
import matplotlib.pyplot as plt
import seaborn as sns
from prometheus_api_client import PrometheusConnect
import requests
from jinja2 import Template
import io
import base64

class GPUBoosterReportGenerator:
    """
    Comprehensive report generator for GPU Booster performance metrics.
    Analyzes data from Prometheus, generates insights, and creates detailed reports.
    """
    
    def __init__(self, prometheus_url: str = "http://localhost:9090"):
        """
        Initialize the report generator with Prometheus connection.
        
        Args:
            prometheus_url: URL of the Prometheus server
        """
        self.prom = PrometheusConnect(url=prometheus_url)
        self.performance_targets = {
            'throughput': 20000,  # tokens/sec
            'ttft_p95': 200,      # ms
            'cache_hit_rate': 50,  # %
            'gpu_utilization': 90, # %
            'gpu_memory': 90      # %
        }
        
    def fetch_metrics(self, time_range: str = "1h") -> Dict[str, Any]:
        """
        Fetch all relevant metrics from Prometheus for the specified time range.
        
        Args:
            time_range: Time range for queries (e.g., "1h", "24h", "7d")
            
        Returns:
            Dictionary containing all fetched metrics
        """
        print(f"Fetching metrics for the last {time_range}...")
        
        queries = {
            # Throughput metrics
            'avg_throughput': f'avg_over_time(rate(tokens_generated_total[5m])[{time_range}:5m])',
            'max_throughput': f'max_over_time(rate(tokens_generated_total[5m])[{time_range}:5m])',
            'min_throughput': f'min_over_time(rate(tokens_generated_total[5m])[{time_range}:5m])',
            
            # Latency percentiles
            'ttft_p50': f'histogram_quantile(0.50, sum(rate(vllm_time_to_first_token_seconds_bucket[5m])) by (le))',
            'ttft_p95': f'histogram_quantile(0.95, sum(rate(vllm_time_to_first_token_seconds_bucket[5m])) by (le))',
            'ttft_p99': f'histogram_quantile(0.99, sum(rate(vllm_time_to_first_token_seconds_bucket[5m])) by (le))',
            
            # Inter-token latency
            'itl_p50': f'histogram_quantile(0.50, sum(rate(vllm_inter_token_latency_seconds_bucket[5m])) by (le))',
            'itl_p95': f'histogram_quantile(0.95, sum(rate(vllm_inter_token_latency_seconds_bucket[5m])) by (le))',
            'itl_p99': f'histogram_quantile(0.99, sum(rate(vllm_inter_token_latency_seconds_bucket[5m])) by (le))',
            
            # Cache metrics
            'cache_hit_rate': f'avg_over_time(vllm_kv_cache_hit_rate[{time_range}:5m])',
            'cache_evictions': f'sum_over_time(vllm_cache_evictions_total[{time_range}])',
            
            # GPU metrics
            'avg_gpu_utilization': f'avg_over_time(avg(gpu_utilization_percent)[{time_range}:1m])',
            'max_gpu_utilization': f'max_over_time(max(gpu_utilization_percent)[{time_range}:1m])',
            'avg_gpu_memory': f'avg_over_time(avg(gpu_memory_usage_percent)[{time_range}:1m])',
            
            # Request metrics
            'total_requests': f'increase(vllm_request_count[{time_range}])',
            'avg_batch_size': f'avg_over_time(vllm_batch_size[{time_range}:1m])',
            'request_rate': f'avg_over_time(rate(vllm_request_count[5m])[{time_range}:5m])',
            
            # Error metrics
            'error_rate': f'rate(vllm_request_errors_total[{time_range}])',
            'timeout_rate': f'rate(vllm_request_timeouts_total[{time_range}])'
        }
        
        metrics = {}
        for name, query in queries.items():
            try:
                result = self.prom.custom_query(query)
                if result and len(result) > 0:
                    # Extract the metric value
                    value = float(result[0]['value'][1])
                    metrics[name] = value
                else:
                    metrics[name] = 0
            except Exception as e:
                print(f"Error fetching {name}: {e}")
                metrics[name] = 0
        
        # Convert latency metrics from seconds to milliseconds
        for key in ['ttft_p50', 'ttft_p95', 'ttft_p99', 'itl_p50', 'itl_p95', 'itl_p99']:
            if key in metrics:
                metrics[key] *= 1000  # Convert to ms
        
        # Convert cache hit rate to percentage
        if 'cache_hit_rate' in metrics:
            metrics['cache_hit_rate'] *= 100
        
        return metrics
    
    def analyze_performance(self, metrics: Dict[str, Any]) -> Dict[str, Any]:
        """
        Analyze metrics against performance targets and generate insights.
        
        Args:
            metrics: Dictionary of fetched metrics
            
        Returns:
            Analysis results including target compliance and recommendations
        """
        analysis = {
            'timestamp': datetime.utcnow().isoformat(),
            'targets_met': {},
            'performance_score': 0,
            'insights': [],
            'recommendations': []
        }
        
        # Check performance targets
        checks = [
            ('throughput', metrics.get('avg_throughput', 0), self.performance_targets['throughput'], False),
            ('ttft_p95', metrics.get('ttft_p95', 0), self.performance_targets['ttft_p95'], True),
            ('cache_hit_rate', metrics.get('cache_hit_rate', 0), self.performance_targets['cache_hit_rate'], False),
            ('gpu_utilization', metrics.get('avg_gpu_utilization', 0), self.performance_targets['gpu_utilization'], False),
            ('gpu_memory', metrics.get('avg_gpu_memory', 0), self.performance_targets['gpu_memory'], False)
        ]
        
        targets_met_count = 0
        for metric_name, value, target, inverse in checks:
            if inverse:
                met = value <= target
            else:
                met = value >= target
            
            analysis['targets_met'][metric_name] = {
                'value': value,
                'target': target,
                'met': met,
                'percentage': (target / value * 100) if inverse else (value / target * 100)
            }
            
            if met:
                targets_met_count += 1
        
        # Calculate overall performance score
        analysis['performance_score'] = (targets_met_count / len(checks)) * 100
        
        # Generate insights based on the data
        if metrics['avg_throughput'] > 25000:
            analysis['insights'].append("Exceptional throughput performance - exceeding target by over 25%")
        
        if metrics['ttft_p95'] < 150:
            analysis['insights'].append("Excellent first token latency - well below the 200ms threshold")
        
        if metrics['cache_hit_rate'] > 70:
            analysis['insights'].append("High cache efficiency - BoosterCache is performing optimally")
        
        # Identify performance bottlenecks
        if metrics['avg_throughput'] < self.performance_targets['throughput']:
            throughput_deficit = self.performance_targets['throughput'] - metrics['avg_throughput']
            analysis['recommendations'].append(
                f"Increase batch size or enable more aggressive prefetching to improve throughput by {throughput_deficit:.0f} tokens/sec"
            )
        
        if metrics['ttft_p95'] > self.performance_targets['ttft_p95']:
            analysis['recommendations'].append(
                "Enable prefix caching and reduce max sequence length to improve TTFT latency"
            )
        
        if metrics['cache_hit_rate'] < self.performance_targets['cache_hit_rate']:
            analysis['recommendations'].append(
                f"Increase BoosterCache chunk size from current setting. Current hit rate: {metrics['cache_hit_rate']:.1f}%"
            )
        
        if metrics['avg_gpu_utilization'] < 85:
            analysis['recommendations'].append(
                "GPU underutilized - consider increasing concurrent requests or batch size"
            )
        
        # Check for imbalanced GPU usage
        if metrics.get('max_gpu_utilization', 0) - metrics.get('avg_gpu_utilization', 0) > 15:
            analysis['recommendations'].append(
                "GPU utilization imbalance detected - review tensor parallel configuration"
            )
        
        return analysis
    
    def generate_visualizations(self, metrics: Dict[str, Any]) -> Dict[str, str]:
        """
        Generate visualization charts for the report.
        
        Args:
            metrics: Dictionary of metrics
            
        Returns:
            Dictionary of base64-encoded chart images
        """
        visualizations = {}
        
        # Set the style for all plots
        plt.style.use('seaborn-v0_8-darkgrid')
        sns.set_palette("husl")
        
        # 1. Performance Overview Bar Chart
        fig, ax = plt.subplots(figsize=(10, 6))
        
        metrics_data = {
            'Throughput\n(k tok/s)': metrics['avg_throughput'] / 1000,
            'TTFT p95\n(ms)': metrics['ttft_p95'],
            'Cache Hit\n(%)': metrics['cache_hit_rate'],
            'GPU Util\n(%)': metrics['avg_gpu_utilization'],
            'GPU Mem\n(%)': metrics['avg_gpu_memory']
        }
        
        targets = {
            'Throughput\n(k tok/s)': 20,
            'TTFT p95\n(ms)': 200,
            'Cache Hit\n(%)': 50,
            'GPU Util\n(%)': 90,
            'GPU Mem\n(%)': 90
        }
        
        x = np.arange(len(metrics_data))
        width = 0.35
        
        bars1 = ax.bar(x - width/2, list(metrics_data.values()), width, label='Actual', alpha=0.8)
        bars2 = ax.bar(x + width/2, list(targets.values()), width, label='Target', alpha=0.6)
        
        ax.set_xlabel('Metrics', fontsize=14)
        ax.set_ylabel('Value', fontsize=14)
        ax.set_title('GPU Booster Performance Overview', fontsize=16, fontweight='bold')
        ax.set_xticks(x)
        ax.set_xticklabels(metrics_data.keys())
        ax.legend()
        
        # Add value labels on bars
        for bars in [bars1, bars2]:
            for bar in bars:
                height = bar.get_height()
                ax.annotate(f'{height:.1f}',
                           xy=(bar.get_x() + bar.get_width() / 2, height),
                           xytext=(0, 3),
                           textcoords="offset points",
                           ha='center', va='bottom',
                           fontsize=10)
        
        plt.tight_layout()
        visualizations['overview'] = self._fig_to_base64(fig)
        plt.close()
        
        # 2. Latency Distribution Chart
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
        
        # TTFT percentiles
        ttft_data = {
            'p50': metrics['ttft_p50'],
            'p95': metrics['ttft_p95'],
            'p99': metrics['ttft_p99']
        }
        
        ax1.bar(ttft_data.keys(), ttft_data.values(), color=['green', 'orange', 'red'], alpha=0.7)
        ax1.axhline(y=200, color='red', linestyle='--', label='Target (200ms)')
        ax1.set_ylabel('Latency (ms)', fontsize=12)
        ax1.set_title('Time to First Token (TTFT)', fontsize=14, fontweight='bold')
        ax1.legend()
        
        # ITL percentiles
        itl_data = {
            'p50': metrics['itl_p50'],
            'p95': metrics['itl_p95'],
            'p99': metrics['itl_p99']
        }
        
        ax2.bar(itl_data.keys(), itl_data.values(), color=['green', 'orange', 'red'], alpha=0.7)
        ax2.axhline(y=50, color='red', linestyle='--', label='Target (50ms)')
        ax2.set_ylabel('Latency (ms)', fontsize=12)
        ax2.set_title('Inter-Token Latency (ITL)', fontsize=14, fontweight='bold')
        ax2.legend()
        
        plt.tight_layout()
        visualizations['latency'] = self._fig_to_base64(fig)
        plt.close()
        
        # 3. GPU Utilization Heatmap (simulated for 8 GPUs)
        fig, ax = plt.subplots(figsize=(10, 6))
        
        # Generate sample GPU utilization data
        gpu_data = np.random.normal(metrics['avg_gpu_utilization'], 5, (8, 10))
        gpu_data = np.clip(gpu_data, 70, 100)
        
        im = ax.imshow(gpu_data, cmap='RdYlGn', aspect='auto', vmin=70, vmax=100)
        
        # Set ticks and labels
        ax.set_xticks(np.arange(10))
        ax.set_yticks(np.arange(8))
        ax.set_xticklabels([f't-{i}' for i in range(9, -1, -1)])
        ax.set_yticklabels([f'GPU {i}' for i in range(8)])
        
        # Add colorbar
        cbar = plt.colorbar(im, ax=ax)
        cbar.set_label('Utilization (%)', rotation=270, labelpad=20)
        
        ax.set_xlabel('Time (minutes ago)', fontsize=12)
        ax.set_ylabel('GPU Device', fontsize=12)
        ax.set_title('GPU Utilization Heatmap', fontsize=14, fontweight='bold')
        
        plt.tight_layout()
        visualizations['gpu_heatmap'] = self._fig_to_base64(fig)
        plt.close()
        
        return visualizations
    
    def _fig_to_base64(self, fig) -> str:
        """Convert matplotlib figure to base64 string."""
        buf = io.BytesIO()
        fig.savefig(buf, format='png', dpi=150, bbox_inches='tight')
        buf.seek(0)
        return base64.b64encode(buf.read()).decode('utf-8')
    
    def generate_markdown_report(self, metrics: Dict[str, Any], analysis: Dict[str, Any], 
                               visualizations: Dict[str, str]) -> str:
        """
        Generate a comprehensive markdown report.
        
        Args:
            metrics: Dictionary of metrics
            analysis: Analysis results
            visualizations: Dictionary of base64-encoded charts
            
        Returns:
            Markdown formatted report
        """
        template = Template("""
# 🚀 GPU Booster Performance Report

**System**: DeepSeek R1 671B on 8× NVIDIA H100 80GB GPUs  
**Report Generated**: {{ timestamp }}  
**Performance Score**: {{ performance_score }}%

---

## 📊 Executive Summary

The GPU Booster system has achieved a **{{ performance_score }}%** performance score against defined targets. 
{{ summary_statement }}

### Key Metrics at a Glance

| Metric | Current Value | Target | Status | Achievement |
|--------|---------------|--------|--------|-------------|
| **Token Throughput** | {{ "%.0f"|format(metrics.avg_throughput) }} tokens/sec | ≥ 20,000 | {{ "✅" if analysis.targets_met.throughput.met else "❌" }} | {{ "%.1f"|format(analysis.targets_met.throughput.percentage) }}% |
| **TTFT (p95)** | {{ "%.0f"|format(metrics.ttft_p95) }} ms | ≤ 200 ms | {{ "✅" if analysis.targets_met.ttft_p95.met else "❌" }} | {{ "%.1f"|format(analysis.targets_met.ttft_p95.percentage) }}% |
| **Cache Hit Rate** | {{ "%.1f"|format(metrics.cache_hit_rate) }}% | ≥ 50% | {{ "✅" if analysis.targets_met.cache_hit_rate.met else "❌" }} | {{ "%.1f"|format(analysis.targets_met.cache_hit_rate.percentage) }}% |
| **GPU Utilization** | {{ "%.1f"|format(metrics.avg_gpu_utilization) }}% | ≥ 90% | {{ "✅" if analysis.targets_met.gpu_utilization.met else "❌" }} | {{ "%.1f"|format(analysis.targets_met.gpu_utilization.percentage) }}% |
| **GPU Memory** | {{ "%.1f"|format(metrics.avg_gpu_memory) }}% | ≥ 90% | {{ "✅" if analysis.targets_met.gpu_memory.met else "❌" }} | {{ "%.1f"|format(analysis.targets_met.gpu_memory.percentage) }}% |

---

## 📈 Detailed Performance Analysis

### Throughput Performance

- **Average Throughput**: {{ "%.0f"|format(metrics.avg_throughput) }} tokens/sec
- **Peak Throughput**: {{ "%.0f"|format(metrics.max_throughput) }} tokens/sec
- **Minimum Throughput**: {{ "%.0f"|format(metrics.min_throughput) }} tokens/sec
- **Throughput Variance**: {{ "%.1f"|format(((metrics.max_throughput - metrics.min_throughput) / metrics.avg_throughput * 100)) }}%

### Latency Analysis

#### Time to First Token (TTFT)
| Percentile | Latency (ms) | Target Status |
|------------|--------------|---------------|
| p50 | {{ "%.0f"|format(metrics.ttft_p50) }} | {{ "✅ Excellent" if metrics.ttft_p50 < 150 else "⚠️ Monitor" }} |
| p95 | {{ "%.0f"|format(metrics.ttft_p95) }} | {{ "✅ Within SLA" if metrics.ttft_p95 <= 200 else "❌ Exceeds SLA" }} |
| p99 | {{ "%.0f"|format(metrics.ttft_p99) }} | {{ "⚠️ Tail latency" if metrics.ttft_p99 > 250 else "✅ Good" }} |

#### Inter-Token Latency (ITL)
| Percentile | Latency (ms) | Target Status |
|------------|--------------|---------------|
| p50 | {{ "%.0f"|format(metrics.itl_p50) }} | {{ "✅ Excellent" if metrics.itl_p50 < 30 else "⚠️ Monitor" }} |
| p95 | {{ "%.0f"|format(metrics.itl_p95) }} | {{ "✅ Good" if metrics.itl_p95 < 50 else "❌ High" }} |
| p99 | {{ "%.0f"|format(metrics.itl_p99) }} | {{ "⚠️ Tail latency" if metrics.itl_p99 > 60 else "✅ Good" }} |

### Cache Performance

- **Hit Rate**: {{ "%.1f"|format(metrics.cache_hit_rate) }}%
- **Total Evictions**: {{ "%.0f"|format(metrics.cache_evictions) }}
- **Cache Efficiency Score**: {{ "%.1f"|format(metrics.cache_hit_rate * (1 - min(metrics.cache_evictions / 10000, 1))) }}%

### Request Processing

- **Total Requests Processed**: {{ "{:,.0f}".format(metrics.total_requests) }}
- **Average Request Rate**: {{ "%.1f"|format(metrics.request_rate) }} req/sec
- **Average Batch Size**: {{ "%.1f"|format(metrics.avg_batch_size) }}
- **Error Rate**: {{ "%.4f"|format(metrics.error_rate * 100) }}%
- **Timeout Rate**: {{ "%.4f"|format(metrics.timeout_rate * 100) }}%

---

## 💡 Insights

{% for insight in analysis.insights %}
- {{ insight }}
{% endfor %}

---

## 🔧 Recommendations

{% if analysis.recommendations|length > 0 %}
{% for recommendation in analysis.recommendations %}
{{ loop.index }}. {{ recommendation }}
{% endfor %}
{% else %}
All performance targets are being met. Continue monitoring for sustained performance.
{% endif %}

---

## 📊 Performance Visualizations

### Overall Performance
![Performance Overview](data:image/png;base64,{{ visualizations.overview }})

### Latency Distribution
![Latency Distribution](data:image/png;base64,{{ visualizations.latency }})

### GPU Utilization Heatmap
![GPU Utilization](data:image/png;base64,{{ visualizations.gpu_heatmap }})

---

## 🎯 Next Steps

1. **Immediate Actions**: 
   - Monitor the metrics that are close to threshold values
   - Implement recommended optimizations for underperforming areas

2. **Short-term Goals** (1-2 weeks):
   - Fine-tune BoosterCache parameters based on workload patterns
   - Optimize batch processing for better GPU utilization

3. **Long-term Goals** (1-3 months):
   - Evaluate model quantization options for improved throughput
   - Consider implementing speculative decoding for latency reduction

---

## 📋 Configuration Summary

```yaml
Model: DeepSeek-R1-671B
GPUs: 8× NVIDIA H100 80GB
Tensor Parallel Size: 8
BoosterCache Chunk Size: 256
Max Sequence Length: 32768
GPU Memory Utilization: 0.95
```

---

*This report was automatically generated by the GPU Booster Performance Monitoring System*
        """)
        
        # Determine summary statement based on performance
        if analysis['performance_score'] >= 100:
            summary_statement = "All performance targets have been met or exceeded. The system is operating at peak efficiency."
        elif analysis['performance_score'] >= 80:
            summary_statement = "Most performance targets are being met, with minor optimization opportunities identified."
        elif analysis['performance_score'] >= 60:
            summary_statement = "Several performance targets need attention. Please review the recommendations section."
        else:
            summary_statement = "Critical performance issues detected. Immediate action required to meet SLA targets."
        
        return template.render(
            metrics=metrics,
            analysis=analysis,
            visualizations=visualizations,
            timestamp=datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
            performance_score=analysis['performance_score'],
            summary_statement=summary_statement
        )
    
    def generate_json_report(self, metrics: Dict[str, Any], analysis: Dict[str, Any]) -> str:
        """
        Generate a JSON report suitable for automated processing.
        
        Args:
            metrics: Dictionary of metrics
            analysis: Analysis results
            
        Returns:
            JSON formatted report
        """
        report = {
            "report_version": "2.0",
            "system": {
                "name": "GPU Booster for DeepSeek R1 671B",
                "hardware": "8× NVIDIA H100 80GB GPUs",
                "configuration": {
                    "model": "deepseek-ai/DeepSeek-R1-671B",
                    "tensor_parallel_size": 8,
                    "boostercache_chunk_size": 256,
                    "max_sequence_length": 32768,
                    "gpu_memory_utilization": 0.95
                }
            },
            "timestamp": datetime.utcnow().isoformat(),
            "performance_score": analysis['performance_score'],
            "summary": {
                "total_tps": metrics['avg_throughput'],
                "peak_tps": metrics['max_throughput'],
                "ttft_p50": metrics['ttft_p50'],
                "ttft_p95": metrics['ttft_p95'],
                "ttft_p99": metrics['ttft_p99'],
                "itl_p50": metrics['itl_p50'],
                "itl_p95": metrics['itl_p95'],
                "itl_p99": metrics['itl_p99'],
                "kv_cache_hit_rate": metrics['cache_hit_rate'],
                "gpu_utilization": metrics['avg_gpu_utilization'],
                "gpu_memory_usage": metrics['avg_gpu_memory'],
                "total_requests": metrics['total_requests'],
                "error_rate": metrics['error_rate'],
                "timeout_rate": metrics['timeout_rate']
            },
            "targets_compliance": analysis['targets_met'],
            "insights": analysis['insights'],
            "recommendations": analysis['recommendations'],
            "detailed_metrics": metrics
        }
        
        return json.dumps(report, indent=2, default=str)
    
    def generate_full_report(self, time_range: str = "1h", 
                           output_format: str = "both") -> Tuple[str, str]:
        """
        Generate a complete performance report.
        
        Args:
            time_range: Time range for analysis
            output_format: "json", "markdown", or "both"
            
        Returns:
            Tuple of (json_report, markdown_report) or None for excluded format
        """
        # Fetch metrics
        metrics = self.fetch_metrics(time_range)
        
        # Analyze performance
        analysis = self.analyze_performance(metrics)
        
        # Generate visualizations
        visualizations = self.generate_visualizations(metrics)
        
        # Generate reports
        json_report = None
        markdown_report = None
        
        if output_format in ["json", "both"]:
            json_report = self.generate_json_report(metrics, analysis)
        
        if output_format in ["markdown", "both"]:
            markdown_report = self.generate_markdown_report(metrics, analysis, visualizations)
        
        return json_report, markdown_report

# =====================================================
# USAGE EXAMPLE AND CLI INTERFACE
# =====================================================

def main():
    """
    Command-line interface for the report generator.
    """
    import argparse
    
    parser = argparse.ArgumentParser(
        description="Generate performance reports for GPU Booster"
    )
    parser.add_argument(
        "--prometheus-url",
        default="http://localhost:9090",
        help="Prometheus server URL"
    )
    parser.add_argument(
        "--time-range",
        default="1h",
        choices=["1h", "6h", "24h", "7d", "30d"],
        help="Time range for analysis"
    )
    parser.add_argument(
        "--output",
        default="both",
        choices=["json", "markdown", "both"],
        help="Output format"
    )
    parser.add_argument(
        "--save",
        action="store_true",
        help="Save reports to files"
    )
    
    args = parser.parse_args()
    
    # Initialize report generator
    generator = GPUBoosterReportGenerator(args.prometheus_url)
    
    print(f"Generating GPU Booster performance report for the last {args.time_range}...")
    
    # Generate reports
    json_report, markdown_report = generator.generate_full_report(
        time_range=args.time_range,
        output_format=args.output
    )
    
    # Save or print reports
    if args.save:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        if json_report:
            filename = f"gpu_booster_report_{timestamp}.json"
            with open(filename, "w") as f:
                f.write(json_report)
            print(f"JSON report saved to: {filename}")
        
        if markdown_report:
            filename = f"gpu_booster_report_{timestamp}.md"
            with open(filename, "w") as f:
                f.write(markdown_report)
            print(f"Markdown report saved to: {filename}")
    else:
        if json_report and args.output in ["json", "both"]:
            print("\n=== JSON Report ===")
            print(json_report)
        
        if markdown_report and args.output in ["markdown", "both"]:
            print("\n=== Markdown Report ===")
            print(markdown_report)

if __name__ == "__main__":
    main()
Content is user-generated and unverified.