feat: Add comprehensive enterprise Linux infrastructure portfolio with Ansible, Python, and ELK stack
CI Pipeline / lint-ansible (push) Waiting to run
CI Pipeline / test-python (push) Waiting to run
CI Pipeline / validate-docker (push) Waiting to run
CI Pipeline / security-scan (push) Waiting to run
CI Pipeline / documentation (push) Waiting to run
CI Pipeline / integration-test (push) Blocked by required conditions
CI Pipeline / lint-ansible (push) Waiting to run
CI Pipeline / test-python (push) Waiting to run
CI Pipeline / validate-docker (push) Waiting to run
CI Pipeline / security-scan (push) Waiting to run
CI Pipeline / documentation (push) Waiting to run
CI Pipeline / integration-test (push) Blocked by required conditions
This commit is contained in:
@@ -0,0 +1,207 @@
|
||||
"""
|
||||
Disk Usage Data Collector
|
||||
|
||||
Collects disk usage statistics including directory sizes,
|
||||
file system usage, and largest files information.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
from typing import Dict, Any, List
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DiskUsageCollector:
|
||||
"""Collector for disk usage statistics."""
|
||||
|
||||
def __init__(self):
|
||||
self.max_depth = 3
|
||||
self.exclude_paths = [
|
||||
"/proc",
|
||||
"/sys",
|
||||
"/dev",
|
||||
"/run",
|
||||
"/tmp",
|
||||
"/var/log"
|
||||
]
|
||||
|
||||
def collect_disk_usage(self, system: str) -> Dict[str, Any]:
|
||||
"""Collect disk usage information from target system."""
|
||||
logger.info(f"Collecting disk usage data from {system}")
|
||||
|
||||
try:
|
||||
# Collect filesystem usage
|
||||
filesystem_usage = self.collect_filesystem_usage(system)
|
||||
|
||||
# Collect directory sizes
|
||||
directory_sizes = self.collect_directory_sizes(system)
|
||||
|
||||
# Collect largest files
|
||||
largest_files = self.collect_largest_files(system)
|
||||
|
||||
return {
|
||||
"filesystem_usage": filesystem_usage,
|
||||
"directory_sizes": directory_sizes,
|
||||
"largest_files": largest_files,
|
||||
"timestamp": self.get_timestamp(system)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to collect disk usage from {system}: {e}")
|
||||
raise
|
||||
|
||||
def collect_filesystem_usage(self, system: str) -> List[Dict[str, Any]]:
|
||||
"""Collect filesystem usage statistics."""
|
||||
usage_stats = []
|
||||
|
||||
try:
|
||||
# Run df command
|
||||
result = subprocess.run(
|
||||
["ssh", system, "df -h --output=source,fstype,size,used,avail,pcent,target"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"df command failed: {result.stderr}")
|
||||
|
||||
# Parse output
|
||||
lines = result.stdout.strip().split('\n')
|
||||
if len(lines) < 2:
|
||||
return usage_stats
|
||||
|
||||
for line in lines[1:]: # Skip header
|
||||
parts = line.split()
|
||||
if len(parts) >= 7:
|
||||
usage_stat = {
|
||||
"filesystem": parts[0],
|
||||
"type": parts[1],
|
||||
"size": parts[2],
|
||||
"used": parts[3],
|
||||
"available": parts[4],
|
||||
"use_percent": parts[5],
|
||||
"mountpoint": parts[6]
|
||||
}
|
||||
usage_stats.append(usage_stat)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error(f"Timeout collecting filesystem usage from {system}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to collect filesystem usage from {system}: {e}")
|
||||
raise
|
||||
|
||||
return usage_stats
|
||||
|
||||
def collect_directory_sizes(self, system: str) -> List[Dict[str, Any]]:
|
||||
"""Collect sizes of top-level directories."""
|
||||
directory_sizes = []
|
||||
|
||||
try:
|
||||
# Get top-level directories
|
||||
dirs_to_check = ["/", "/home", "/var", "/usr", "/opt", "/etc"]
|
||||
|
||||
for directory in dirs_to_check:
|
||||
if directory in self.exclude_paths:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Run du command for directory size
|
||||
result = subprocess.run(
|
||||
["ssh", system, f"du -sh {directory} 2>/dev/null"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
size, path = result.stdout.strip().split('\t', 1)
|
||||
directory_sizes.append({
|
||||
"path": path,
|
||||
"size": size
|
||||
})
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning(f"Timeout getting size for {directory} on {system}")
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get size for {directory} on {system}: {e}")
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to collect directory sizes from {system}: {e}")
|
||||
raise
|
||||
|
||||
return directory_sizes
|
||||
|
||||
def collect_largest_files(self, system: str) -> List[Dict[str, Any]]:
|
||||
"""Collect information about largest files in the system."""
|
||||
largest_files = []
|
||||
|
||||
try:
|
||||
# Find largest files (excluding certain paths)
|
||||
exclude_expr = " ".join(f"-not -path '{path}/*'" for path in self.exclude_paths)
|
||||
|
||||
cmd = f"find / {exclude_expr} -type f -exec ls -lh {{}} \\; 2>/dev/null | sort -k5 -hr | head -20"
|
||||
|
||||
result = subprocess.run(
|
||||
["ssh", system, cmd],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
parts = line.split()
|
||||
if len(parts) >= 9:
|
||||
file_info = {
|
||||
"permissions": parts[0],
|
||||
"links": parts[1],
|
||||
"owner": parts[2],
|
||||
"group": parts[3],
|
||||
"size": parts[4],
|
||||
"month": parts[5],
|
||||
"day": parts[6],
|
||||
"time": parts[7],
|
||||
"path": " ".join(parts[8:])
|
||||
}
|
||||
largest_files.append(file_info)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error(f"Timeout collecting largest files from {system}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to collect largest files from {system}: {e}")
|
||||
raise
|
||||
|
||||
return largest_files
|
||||
|
||||
def get_timestamp(self, system: str) -> str:
|
||||
"""Get current timestamp from target system."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ssh", system, "date -Iseconds"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
return result.stdout.strip()
|
||||
else:
|
||||
return "unknown"
|
||||
|
||||
except Exception:
|
||||
return "unknown"
|
||||
|
||||
def collect(system: str) -> Dict[str, Any]:
|
||||
"""Main collection function for disk usage data."""
|
||||
collector = DiskUsageCollector()
|
||||
return collector.collect_disk_usage(system)
|
||||
Reference in New Issue
Block a user