Polish infrastructure portfolio projects

2026-04-29 23:30:30 +00:00
parent b0537b4bff
commit 8783892241
34 changed files with 762 additions and 1226 deletions
@@ -0,0 +1,10 @@
+.PHONY: run test demo
+
+run:
+	python3 cli.py --help
+
+test:
+	python3 -m py_compile cli.py collectors/*.py validators/*.py reports/*.py
+
+demo:
+	python3 cli.py compare examples/before.json examples/after.json --output /tmp/migration-diff.json
@@ -1,389 +1,56 @@
 # Migration Validation Framework

-A comprehensive Python CLI tool for validating system migrations through data collection, snapshot comparison, and automated reporting. Designed for enterprise migration workflows where system consistency and data integrity are critical.
+## Problem Statement

-## Overview
+Infrastructure migrations often fail in small, expensive ways: a mount option changes, a service is disabled, or disk usage moves past an operational threshold. Teams need structured evidence that the migrated host still matches the expected operating profile.

-The Migration Validation Framework provides a systematic approach to validating system migrations by:
+## Solution Overview

- Collecting comprehensive system data before and after migration
- Generating structured JSON snapshots for comparison
- Performing intelligent diff analysis between snapshots
- Generating detailed HTML reports with change visualization
- Providing CLI interface for integration into migration pipelines
+This project provides a Python CLI that collects system state into JSON snapshots and compares before/after files. The output is designed for change records, migration gates, and post-cutover validation.

-## Architecture
+## Architecture Overview

 ```
-┌─────────────────┐    ┌─────────────────┐    ┌─────────────────┐
-│   CLI Interface │    │   Data          │    │   Validation    │
-│   (cli.py)      │◄──►│   Collectors    │◄──►│   Engine         │
-│                 │    │                 │    │                 │
-│ - Command       │    │ - mounts.py     │    │ - compare.py    │
-│   parsing       │    │ - services.py   │    │ - diff.py       │
-│ - Workflow      │    │ - disk_usage.py │    │ - validate.py   │
-│   orchestration │    │ - network.py    │    │                 │
-└─────────────────┘    └─────────────────┘    └─────────────────┘
-         │                       │                       │
-         ▼                       ▼                       ▼
-┌─────────────────┐    ┌─────────────────┐    ┌─────────────────┐
-│   JSON          │    │   Comparison    │    │   HTML          │
-│   Snapshots     │    │   Results       │    │   Reports       │
-│                 │    │                 │    │                 │
-│ - Pre-migration │    │ - Differences   │    │ - Summary       │
-│ - Post-migration│    │ - Risk levels   │    │ - Details       │
-│ - Metadata      │    │ - Validation    │    │ - Charts        │
-└─────────────────┘    └─────────────────┘    └─────────────────┘
+Operator -> CLI -> Collectors -> JSON Snapshot -> Comparator -> Diff/Report
 ```

-## Quick Start
+Core components:

-### Prerequisites
+- `cli.py` provides collect, compare, snapshot, list, and report commands.
+- `collectors/` gathers mounts, services, and disk usage.
+- `validators/compare.py` identifies drift and validation failures.
+- `reports/` contains report generation helpers.
+- `examples/` contains realistic before/after evidence.

- Python 3.8+
- SSH access to target systems
- Appropriate permissions for data collection
-
-### Installation
+## How to Run

 ```bash
 cd migration-validation-framework
-pip install -r requirements.txt
+python3 cli.py collect --output before.json --systems web01,db01
+python3 cli.py collect --output after.json --systems web01,db01
+python3 cli.py compare before.json after.json --output diff.json
+python3 cli.py compare examples/before.json examples/after.json --output /tmp/migration-diff.json
 ```

-### Basic Usage
+Legacy snapshot IDs are still supported:

 ```bash
-# Create pre-migration snapshot
-python cli.py snapshot --env production --label pre-migration --systems web01,db01
-
-# Perform migration...
-
-# Create post-migration snapshot
-python cli.py snapshot --env production --label post-migration --systems web01,db01
-
-# Compare snapshots
-python cli.py compare pre-migration post-migration --output comparison_001
-
-# Generate HTML report
-python cli.py report --comparison comparison_001 --format html --output migration_report.html
+python3 cli.py snapshot --env prod --label pre --systems web01,db01
+python3 cli.py compare prod-pre-20260429_020000 prod-post-20260429_030000 --output change-0429
 ```

-## Project Structure
+## Example Output

-```
-migration-validation-framework/
-├── cli.py                 # Main CLI interface
-├── collectors/           # Data collection modules
-│   ├── mounts.py        # Filesystem mount collection
-│   ├── services.py      # System services collection
-│   ├── disk_usage.py    # Disk usage statistics
-│   ├── network.py       # Network configuration
-│   └── processes.py     # Running processes
-├── validators/          # Validation and comparison logic
-│   ├── compare.py       # Snapshot comparison engine
-│   ├── diff.py          # Difference calculation
-│   └── validate.py      # Validation rules
-├── reports/             # Report generation
-│   ├── html_report.py   # HTML report generator
-│   ├── json_report.py   # JSON report generator
-│   └── summary.py       # Summary calculations
-├── config/              # Configuration files
-│   ├── collectors.yaml  # Collector configurations
-│   └── validators.yaml  # Validation rules
-├── tests/               # Unit and integration tests
-├── logs/                # Application logs
-└── snapshots/           # Stored snapshots
+```text
+Comparison completed: diff.json (FAIL)
+Overall risk: high
+Total changes: 4
+Failed checks: critical_services_running
+Recommendation: restore sshd before production cutover
 ```

-## Data Collectors
+Sample inputs and output are available in [examples/before.json](examples/before.json), [examples/after.json](examples/after.json), and [examples/diff.json](examples/diff.json).

-### Mounts Collector (`collectors/mounts.py`)
-Collects filesystem mount information including:
- Mount points and devices
- Filesystem types
- Mount options
- Capacity and usage statistics
+## Real-World Use Case

-### Services Collector (`collectors/services.py`)
-Gathers system service status:
- Running services
- Service states (active, inactive, failed)
- Startup configuration
- Dependencies
-
-### Disk Usage Collector (`collectors/disk_usage.py`)
-Analyzes disk space utilization:
- Directory size statistics
- File system usage
- Inode usage
- Largest files and directories
-
-### Network Collector (`collectors/network.py`)
-Captures network configuration:
- Interface configurations
- Routing tables
- DNS settings
- Firewall rules
-
-### Processes Collector (`collectors/processes.py`)
-Documents running processes:
- Process lists with PIDs
- Memory and CPU usage
- Process owners
- Command lines
-
-## Validation Engine
-
-### Comparison Logic (`validators/compare.py`)
-Performs intelligent comparison of snapshots:
- Structural differences detection
- Semantic change analysis
- Risk level assessment
- Change categorization
-
-### Difference Calculator (`validators/diff.py`)
-Calculates detailed differences:
- Added/removed/modified items
- Quantitative changes
- Configuration drift detection
- Anomaly identification
-
-### Validation Rules (`validators/validate.py`)
-Applies validation rules:
- Critical change detection
- Compliance checking
- Threshold validation
- Custom rule engine
-
-## Reporting
-
-### HTML Reports (`reports/html_report.py`)
-Generates comprehensive HTML reports featuring:
- Executive summary dashboard
- Detailed change logs
- Risk assessment visualizations
- Interactive charts and graphs
- Export capabilities
-
-### JSON Reports (`reports/json_report.py`)
-Provides structured JSON output for:
- API integration
- Automated processing
- Audit trails
- Compliance reporting
-
-## CLI Interface
-
-### Commands
-
-```bash
-# Snapshot management
-python cli.py snapshot --env <env> --label <label> [--systems <hosts>]
-python cli.py list-snapshots [--env <env>]
-python cli.py delete-snapshot <snapshot-id>
-
-# Comparison operations
-python cli.py compare <snapshot1> <snapshot2> [--output <comparison-id>]
-python cli.py list-comparisons
-python cli.py show-comparison <comparison-id>
-
-# Reporting
-python cli.py report --comparison <comparison-id> --format <format> [--output <file>]
-python cli.py export --comparison <comparison-id> --format <format>
-
-# Configuration
-python cli.py config --show
-python cli.py config --set <key> <value>
-```
-
-### Options
-
- `--env`: Target environment (production, staging, development)
- `--systems`: Comma-separated list of target systems
- `--parallel`: Number of parallel collection threads
- `--timeout`: Collection timeout in seconds
- `--verbose`: Enable verbose output
- `--dry-run`: Preview actions without execution
-
-## Configuration
-
-### Collector Configuration (`config/collectors.yaml`)
-
-```yaml
-collectors:
-  mounts:
-    enabled: true
-    timeout: 30
-    exclude_patterns:
-      - "/proc/*"
-      - "/sys/*"
-
-  services:
-    enabled: true
-    include_disabled: false
-    service_manager: systemd
-
-  disk_usage:
-    enabled: true
-    max_depth: 3
-    exclude_paths:
-      - "/tmp"
-      - "/var/log"
-```
-
-### Validation Rules (`config/validators.yaml`)
-
-```yaml
-rules:
-  critical_services:
-    - sshd
-    - systemd
-    - network
-
-  filesystem_thresholds:
-    warning: 80
-    critical: 95
-
-  network_changes:
-    allow_new_interfaces: false
-    allow_route_changes: false
-```
-
-## Examples
-
-### Complete Migration Validation Workflow
-
-```bash
-# 1. Pre-migration snapshot
-python cli.py snapshot --env production --label "migration-pre-20241201" \
-    --systems web01,web02,db01,lb01 --parallel 4
-
-# 2. Execute migration process
-# ... migration steps ...
-
-# 3. Post-migration snapshot
-python cli.py snapshot --env production --label "migration-post-20241201" \
-    --systems web01,web02,db01,lb01 --parallel 4
-
-# 4. Compare snapshots
-python cli.py compare migration-pre-20241201 migration-post-20241201 \
-    --output migration-dec2024
-
-# 5. Generate reports
-python cli.py report --comparison migration-dec2024 --format html \
-    --output migration_validation_report.html
-
-python cli.py report --comparison migration-dec2024 --format json \
-    --output migration_validation_data.json
-```
-
-### Automated Validation in CI/CD
-
-```bash
-#!/bin/bash
-# CI/CD validation script
-
-ENVIRONMENT=$1
-SNAPSHOT_LABEL="ci-${BUILD_NUMBER}"
-
-# Create snapshot
-python cli.py snapshot --env $ENVIRONMENT --label $SNAPSHOT_LABEL
-
-# Compare with baseline
-python cli.py compare baseline-$ENVIRONMENT $SNAPSHOT_LABEL --output ci-$BUILD_NUMBER
-
-# Generate report
-python cli.py report --comparison ci-$BUILD_NUMBER --format html
-
-# Check for critical changes
-if python cli.py check-critical --comparison ci-$BUILD_NUMBER; then
-    echo "Migration validation passed"
-    exit 0
-else
-    echo "Critical changes detected - review required"
-    exit 1
-fi
-```
-
-## Security Considerations
-
- SSH key-based authentication only
- Encrypted snapshot storage
- Access control for sensitive data
- Audit logging of all operations
- Data sanitization and filtering
-
-## Performance Optimization
-
- Parallel data collection
- Incremental snapshots
- Compressed storage
- Memory-efficient processing
- Timeout handling
-
-## Monitoring and Logging
-
- Comprehensive logging to `logs/validation.log`
- Performance metrics collection
- Error tracking and alerting
- Audit trail generation
-
-## Troubleshooting
-
-### Common Issues
-
-**Connection Failures:**
-```bash
-# Check SSH connectivity
-ssh -i ~/.ssh/id_rsa user@target-host
-
-# Verify Python availability
-python cli.py --test-connection --systems target-host
-```
-
-**Collection Timeouts:**
-```bash
-# Increase timeout
-python cli.py snapshot --timeout 300 --systems slow-host
-
-# Check system load
-ssh user@target-host uptime
-```
-
-**Permission Errors:**
-```bash
-# Verify sudo access
-ssh user@target-host sudo -l
-
-# Check file permissions
-ssh user@target-host ls -la /etc/
-```
-
-## Development
-
-### Adding New Collectors
-
-1. Create collector module in `collectors/`
-2. Implement collection logic
-3. Add configuration schema
-4. Update CLI interface
-5. Add unit tests
-
-### Custom Validation Rules
-
-1. Define rules in `config/validators.yaml`
-2. Implement validation logic in `validators/`
-3. Update report generation
-4. Test with sample data
-
-## Contributing
-
-1. Follow existing code structure and naming conventions
-2. Add comprehensive tests for new functionality
-3. Update documentation for API changes
-4. Ensure backward compatibility
-
-## License
-
-Enterprise Internal Use Only
+During a data center migration, a platform team can collect baseline state before cutover, collect the same evidence after DNS or workload migration, and attach the diff to the change ticket. The framework gives reviewers a compact signal on whether the host is ready for production traffic.
@@ -29,8 +29,8 @@ class MigrationValidator:

    def __init__(self, verbose: bool = False):
        self.verbose = verbose
-        self.setup_logging()
        self.ensure_directories()
+        self.setup_logging()

    def setup_logging(self):
        """Configure logging."""
@@ -97,13 +97,23 @@ class MigrationValidator:

    def load_snapshot(self, snapshot_id: str) -> Dict[str, Any]:
        """Load snapshot from disk."""
-        snapshot_file = SNAPSHOTS_DIR / f"{snapshot_id}.json"
+        snapshot_path = Path(snapshot_id)
+        snapshot_file = snapshot_path if snapshot_path.exists() else SNAPSHOTS_DIR / f"{snapshot_id}.json"
        if not snapshot_file.exists():
            raise FileNotFoundError(f"Snapshot {snapshot_id} not found")

        with open(snapshot_file, 'r') as f:
            return json.load(f)

+    def collect_to_file(self, output_file: str, systems: List[str]) -> str:
+        """Collect a snapshot and write it to an explicit file path."""
+        snapshot = self.collect_system_data(systems)
+        with open(output_file, 'w') as f:
+            json.dump(snapshot, f, indent=2)
+            f.write("\n")
+        self.logger.info(f"Snapshot written: {output_file}")
+        return output_file
+
    def create_snapshot(self, env: str, label: str, systems: List[str]) -> str:
        """Create and save a system snapshot."""
        self.logger.info(f"Creating snapshot for environment: {env}, label: {label}")
@@ -136,6 +146,27 @@ class MigrationValidator:
        self.logger.info(f"Comparison saved: {output_id}")
        return comparison

+    def compare_files(self, before_file: str, after_file: str, output_file: Optional[str] = None) -> Dict[str, Any]:
+        """Compare two explicit JSON snapshot files."""
+        self.logger.info(f"Comparing files: {before_file} vs {after_file}")
+
+        before = self.load_snapshot(before_file)
+        after = self.load_snapshot(after_file)
+        comparison = compare.compare_snapshots(before, after)
+        comparison["metadata"] = {
+            "before": before_file,
+            "after": after_file,
+            "timestamp": datetime.now().isoformat()
+        }
+
+        if output_file:
+            with open(output_file, 'w') as f:
+                json.dump(comparison, f, indent=2)
+                f.write("\n")
+            self.logger.info(f"Comparison written: {output_file}")
+
+        return comparison
+
    def generate_report(self, comparison_id: str, format_type: str, output_file: Optional[str] = None) -> str:
        """Generate a report from comparison results."""
        self.logger.info(f"Generating {format_type} report for comparison: {comparison_id}")
@@ -169,14 +200,14 @@ def main():
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
 Examples:
-  # Create pre-migration snapshot
-  python cli.py snapshot --env production --label pre-migration --systems web01,db01
+  # Collect pre-migration snapshot
+  python3 cli.py collect --output before.json --systems web01,db01

-  # Compare snapshots
-  python cli.py compare pre-migration-snapshot post-migration-snapshot --output comparison_001
+  # Compare snapshot files
+  python3 cli.py compare before.json after.json --output diff.json

  # Generate HTML report
-  python cli.py report --comparison comparison_001 --format html
+  python3 cli.py report --comparison comparison_001 --format html
        """
    )

@@ -185,6 +216,11 @@ Examples:

    subparsers = parser.add_subparsers(dest='command', help='Available commands')

+    # Collect command
+    collect_parser = subparsers.add_parser('collect', help='Collect a system snapshot to a JSON file')
+    collect_parser.add_argument('--output', required=True, help='Output JSON file')
+    collect_parser.add_argument('--systems', default='localhost', help='Comma-separated list of systems')
+
    # Snapshot command
    snapshot_parser = subparsers.add_parser('snapshot', help='Create system snapshot')
    snapshot_parser.add_argument('--env', required=True, help='Target environment')
@@ -195,7 +231,7 @@ Examples:
    compare_parser = subparsers.add_parser('compare', help='Compare two snapshots')
    compare_parser.add_argument('snapshot1', help='First snapshot ID')
    compare_parser.add_argument('snapshot2', help='Second snapshot ID')
-    compare_parser.add_argument('--output', required=True, help='Comparison output ID')
+    compare_parser.add_argument('--output', help='Output comparison ID or JSON file')

    # Report command
    report_parser = subparsers.add_parser('report', help='Generate report from comparison')
@@ -217,7 +253,16 @@ Examples:
    validator = MigrationValidator(verbose=args.verbose)

    try:
-        if args.command == 'snapshot':
+        if args.command == 'collect':
+            systems = [system.strip() for system in args.systems.split(',') if system.strip()]
+            if args.dry_run:
+                print(f"DRY RUN: Would collect {systems} into {args.output}")
+                return
+
+            output_file = validator.collect_to_file(args.output, systems)
+            print(f"Snapshot written: {output_file}")
+
+        elif args.command == 'snapshot':
            systems = args.systems.split(',')
            if args.dry_run:
                print(f"DRY RUN: Would create snapshot for systems: {systems}")
@@ -231,8 +276,16 @@ Examples:
                print(f"DRY RUN: Would compare {args.snapshot1} vs {args.snapshot2}")
                return

-            comparison = validator.compare_snapshots(args.snapshot1, args.snapshot2, args.output)
-            print(f"Comparison completed: {args.output}")
+            output = args.output
+            if output and output.endswith('.json'):
+                comparison = validator.compare_files(args.snapshot1, args.snapshot2, output)
+                result = "PASS" if comparison.get("validation_results", {}).get("passed") else "FAIL"
+                print(f"Comparison completed: {output} ({result})")
+            else:
+                output_id = output or datetime.now().strftime('%Y%m%d_%H%M%S')
+                comparison = validator.compare_snapshots(args.snapshot1, args.snapshot2, output_id)
+                result = "PASS" if comparison.get("validation_results", {}).get("passed") else "FAIL"
+                print(f"Comparison completed: {output_id} ({result})")

        elif args.command == 'report':
            if args.dry_run:
@@ -267,4 +320,4 @@ Examples:
        sys.exit(1)

 if __name__ == "__main__":
-    main()
+    main()
@@ -0,0 +1,30 @@
+# Migration Validation Framework Architecture
+
+## Components
+
+- CLI: parses operator commands and coordinates workflows.
+- Collectors: gather mounts, services, and disk usage from target systems.
+- Snapshot files: JSON evidence used as immutable migration checkpoints.
+- Comparator: evaluates drift between before and after snapshots.
+- Reports: stores JSON or HTML output for audit and review.
+
+## Data Flow
+
+```
+Operator
+  -> python3 cli.py collect
+  -> collectors over SSH
+  -> before.json / after.json
+  -> python3 cli.py compare
+  -> diff.json with PASS/FAIL validation
+```
+
+## Validation Flow
+
+```
+before.json -> Comparator -> service checks
+after.json  -> Comparator -> filesystem checks -> validation result
+                         -> mount checks
+```
+
+The framework keeps collection and comparison separate so migration evidence can be reviewed, archived, and replayed without recollecting from production systems.
@@ -0,0 +1,40 @@
+{
+  "metadata": {
+    "timestamp": "2026-04-29T03:40:00Z",
+    "systems": ["web01"],
+    "version": "1.0"
+  },
+  "data": {
+    "web01": {
+      "mounts": {
+        "mounts": [
+          {"device": "/dev/sda1", "mountpoint": "/", "fstype": "ext4", "options": "rw,relatime"},
+          {"device": "/dev/sdb1", "mountpoint": "/var", "fstype": "xfs", "options": "rw,noatime"}
+        ],
+        "usage": {
+          "/": {"filesystem": "/dev/sda1", "use_percent": "62%"},
+          "/var": {"filesystem": "/dev/sdb1", "use_percent": "94%"}
+        },
+        "timestamp": "2026-04-29T03:40:00Z"
+      },
+      "services": {
+        "service_manager": "systemd",
+        "services": [
+          {"name": "sshd", "active_state": "failed", "sub_state": "failed"},
+          {"name": "nginx", "active_state": "active", "sub_state": "running"},
+          {"name": "node-exporter", "active_state": "active", "sub_state": "running"}
+        ],
+        "timestamp": "2026-04-29T03:40:00Z"
+      },
+      "disk_usage": {
+        "filesystem_usage": [
+          {"filesystem": "/dev/sda1", "type": "ext4", "size": "80G", "used": "50G", "available": "30G", "use_percent": "62%", "mountpoint": "/"},
+          {"filesystem": "/dev/sdb1", "type": "xfs", "size": "200G", "used": "188G", "available": "12G", "use_percent": "94%", "mountpoint": "/var"}
+        ],
+        "directory_sizes": [{"path": "/var/lib/app", "size": "139G"}],
+        "largest_files": [{"path": "/var/lib/app/import/archive.tar", "size": "42G"}],
+        "timestamp": "2026-04-29T03:40:00Z"
+      }
+    }
+  }
+}
@@ -0,0 +1,39 @@
+{
+  "metadata": {
+    "timestamp": "2026-04-29T01:15:00Z",
+    "systems": ["web01"],
+    "version": "1.0"
+  },
+  "data": {
+    "web01": {
+      "mounts": {
+        "mounts": [
+          {"device": "/dev/sda1", "mountpoint": "/", "fstype": "ext4", "options": "rw,relatime"},
+          {"device": "/dev/sdb1", "mountpoint": "/var", "fstype": "xfs", "options": "rw,noatime"}
+        ],
+        "usage": {
+          "/": {"filesystem": "/dev/sda1", "use_percent": "61%"},
+          "/var": {"filesystem": "/dev/sdb1", "use_percent": "68%"}
+        },
+        "timestamp": "2026-04-29T01:15:00Z"
+      },
+      "services": {
+        "service_manager": "systemd",
+        "services": [
+          {"name": "sshd", "active_state": "active", "sub_state": "running"},
+          {"name": "nginx", "active_state": "active", "sub_state": "running"}
+        ],
+        "timestamp": "2026-04-29T01:15:00Z"
+      },
+      "disk_usage": {
+        "filesystem_usage": [
+          {"filesystem": "/dev/sda1", "type": "ext4", "size": "80G", "used": "49G", "available": "31G", "use_percent": "61%", "mountpoint": "/"},
+          {"filesystem": "/dev/sdb1", "type": "xfs", "size": "200G", "used": "136G", "available": "64G", "use_percent": "68%", "mountpoint": "/var"}
+        ],
+        "directory_sizes": [{"path": "/var/lib/app", "size": "84G"}],
+        "largest_files": [],
+        "timestamp": "2026-04-29T01:15:00Z"
+      }
+    }
+  }
+}
@@ -0,0 +1,211 @@
+{
+  "summary": {
+    "total_systems": 1,
+    "systems_with_changes": 1,
+    "total_changes": 7,
+    "changes_by_type": {
+      "mounts": 2,
+      "services": 2,
+      "disk_usage": 3
+    },
+    "most_affected_systems": [
+      [
+        "web01",
+        7
+      ]
+    ]
+  },
+  "differences": {
+    "mounts": {
+      "web01": {
+        "added_mounts": [],
+        "removed_mounts": [],
+        "changed_mounts": [],
+        "usage_changes": [
+          {
+            "mountpoint": "/",
+            "before": {
+              "filesystem": "/dev/sda1",
+              "use_percent": "61%"
+            },
+            "after": {
+              "filesystem": "/dev/sda1",
+              "use_percent": "62%"
+            }
+          },
+          {
+            "mountpoint": "/var",
+            "before": {
+              "filesystem": "/dev/sdb1",
+              "use_percent": "68%"
+            },
+            "after": {
+              "filesystem": "/dev/sdb1",
+              "use_percent": "94%"
+            }
+          }
+        ]
+      }
+    },
+    "services": {
+      "web01": {
+        "added_services": [
+          {
+            "name": "node-exporter",
+            "active_state": "active",
+            "sub_state": "running"
+          }
+        ],
+        "removed_services": [],
+        "status_changes": [
+          {
+            "name": "sshd",
+            "before": {
+              "active_state": "active",
+              "sub_state": "running"
+            },
+            "after": {
+              "active_state": "failed",
+              "sub_state": "failed"
+            }
+          }
+        ],
+        "configuration_changes": []
+      }
+    },
+    "disk_usage": {
+      "web01": {
+        "filesystem_changes": [
+          {
+            "mountpoint": "/",
+            "before": {
+              "filesystem": "/dev/sda1",
+              "type": "ext4",
+              "size": "80G",
+              "used": "49G",
+              "available": "31G",
+              "use_percent": "61%",
+              "mountpoint": "/"
+            },
+            "after": {
+              "filesystem": "/dev/sda1",
+              "type": "ext4",
+              "size": "80G",
+              "used": "50G",
+              "available": "30G",
+              "use_percent": "62%",
+              "mountpoint": "/"
+            }
+          },
+          {
+            "mountpoint": "/var",
+            "before": {
+              "filesystem": "/dev/sdb1",
+              "type": "xfs",
+              "size": "200G",
+              "used": "136G",
+              "available": "64G",
+              "use_percent": "68%",
+              "mountpoint": "/var"
+            },
+            "after": {
+              "filesystem": "/dev/sdb1",
+              "type": "xfs",
+              "size": "200G",
+              "used": "188G",
+              "available": "12G",
+              "use_percent": "94%",
+              "mountpoint": "/var"
+            }
+          }
+        ],
+        "directory_size_changes": [],
+        "significant_usage_changes": [
+          {
+            "mountpoint": "/var",
+            "change_percent": 26,
+            "before": {
+              "filesystem": "/dev/sdb1",
+              "type": "xfs",
+              "size": "200G",
+              "used": "136G",
+              "available": "64G",
+              "use_percent": "68%",
+              "mountpoint": "/var"
+            },
+            "after": {
+              "filesystem": "/dev/sdb1",
+              "type": "xfs",
+              "size": "200G",
+              "used": "188G",
+              "available": "12G",
+              "use_percent": "94%",
+              "mountpoint": "/var"
+            }
+          }
+        ]
+      }
+    }
+  },
+  "risk_assessment": {
+    "overall_risk": "high",
+    "risk_factors": [
+      {
+        "type": "service_failure",
+        "description": "Service failed: sshd",
+        "level": 3
+      },
+      {
+        "type": "disk_usage_spike",
+        "description": "Significant disk usage change: /var (26%)",
+        "level": 2
+      }
+    ],
+    "critical_changes": [],
+    "recommendations": [
+      "Immediate review required - critical changes detected",
+      "Consider rolling back migration if critical services are affected"
+    ]
+  },
+  "validation_results": {
+    "passed": false,
+    "checks": [
+      {
+        "name": "critical_services_running",
+        "description": "Verify critical services remain operational",
+        "passed": false,
+        "details": [
+          "Critical service sshd failed on web01"
+        ]
+      },
+      {
+        "name": "filesystem_integrity",
+        "description": "Verify filesystem integrity maintained",
+        "passed": true,
+        "details": []
+      },
+      {
+        "name": "no_critical_mounts_removed",
+        "description": "Verify critical mount points remain",
+        "passed": true,
+        "details": []
+      }
+    ],
+    "failed_checks": [
+      {
+        "name": "critical_services_running",
+        "description": "Verify critical services remain operational",
+        "passed": false,
+        "details": [
+          "Critical service sshd failed on web01"
+        ]
+      }
+    ],
+    "result": "FAIL"
+  },
+  "metadata": {
+    "before": "migration-validation-framework/examples/before.json",
+    "after": "migration-validation-framework/examples/after.json",
+    "timestamp": "2026-04-29T23:29:07.510774"
+  }
+}
@@ -0,0 +1,19 @@
+# Scenario: Before/After Migration Comparison
+
+## Description
+
+Compare a pre-cutover host snapshot against a post-cutover snapshot and determine whether the migrated system is ready for production traffic.
+
+## Commands
+
+```bash
+cd migration-validation-framework
+python3 cli.py compare examples/before.json examples/after.json --output /tmp/migration-diff.json
+```
+
+## Expected Result
+
+- The command writes a JSON diff.
+- The result is `FAIL` because `sshd` is failed after migration.
+- The risk assessment highlights the `/var` disk usage increase.
+- The remediation path is to restore SSH and reduce or expand `/var` before approving cutover.
@@ -37,9 +37,12 @@ class SnapshotComparator:
        # Compare each data type
        data_types = ["mounts", "services", "disk_usage"]

+        data1 = snapshot1.get("data", {})
+        data2 = snapshot2.get("data", {})
+
        for data_type in data_types:
-            if data_type in snapshot1.get("data", {}) and data_type in snapshot2.get("data", {}):
-                differences = self.compare_data_type(snapshot1["data"], snapshot2["data"], data_type)
+            if self.data_type_exists(data1, data_type) or self.data_type_exists(data2, data_type):
+                differences = self.compare_data_type(data1, data2, data_type)
                comparison["differences"][data_type] = differences

        # Generate summary
@@ -50,10 +53,15 @@ class SnapshotComparator:

        # Validation results
        comparison["validation_results"] = self.validate_changes(comparison["differences"])
+        comparison["validation_results"]["result"] = "PASS" if comparison["validation_results"]["passed"] else "FAIL"

        logger.info("Snapshot comparison completed")
        return comparison

+    def data_type_exists(self, systems: Dict[str, Any], data_type: str) -> bool:
+        """Return true when at least one system has the requested collector data."""
+        return any(data_type in system_data for system_data in systems.values())
+
    def compare_data_type(self, data1: Dict[str, Any], data2: Dict[str, Any], data_type: str) -> Dict[str, Any]:
        """Compare a specific data type between two snapshots."""
        differences = {}
@@ -237,7 +245,7 @@ class SnapshotComparator:
    def generate_summary(self, differences: Dict[str, Any]) -> Dict[str, Any]:
        """Generate a summary of all differences."""
        summary = {
-            "total_systems": len(differences),
+            "total_systems": 0,
            "systems_with_changes": 0,
            "total_changes": 0,
            "changes_by_type": {},
@@ -259,6 +267,8 @@ class SnapshotComparator:
                summary["changes_by_type"][data_type] += change_count
                summary["total_changes"] += change_count

+        summary["total_systems"] = len(system_change_counts)
+
        # Count systems with changes
        summary["systems_with_changes"] = len([s for s in system_change_counts.values() if s > 0])

@@ -488,4 +498,4 @@ class SnapshotComparator:
 def compare_snapshots(snapshot1: Dict[str, Any], snapshot2: Dict[str, Any]) -> Dict[str, Any]:
    """Main comparison function."""
    comparator = SnapshotComparator()
-    return comparator.compare_snapshots(snapshot1, snapshot2)
+    return comparator.compare_snapshots(snapshot1, snapshot2)