Files
portfolio/platform-projects/hpc-slurm-ai-cluster/templates/slurm.conf.j2
T
2026-06-05 15:38:56 +00:00

68 lines
2.9 KiB
Django/Jinja

# Managed by Ansible
ClusterName={{ slurm_cluster_name }}
SlurmctldHost={{ slurm_control_machine }}({{ slurm_control_addr }})
SlurmUser={{ slurm_user }}
AuthType=auth/munge
StateSaveLocation=/var/spool/slurmctld
SlurmdSpoolDir=/var/spool/slurmd
SwitchType=switch/none
MpiDefault={{ slurm_default_mpi_type }}
ProctrackType={{ slurm_proctrack_type }}
ReturnToService={{ slurm_return_to_service }}
{% if slurm_gres_types is defined and slurm_gres_types | length > 0 %}
GresTypes={{ slurm_gres_types }}
{% endif %}
SlurmctldPidFile=/run/slurmctld.pid
SlurmdPidFile=/run/slurmd.pid
SlurmctldPort={{ slurmctld_port }}
SlurmdPort={{ slurmd_port }}
TaskPlugin={{ slurm_task_plugin }}
SelectType={{ slurm_select_type }}
SelectTypeParameters={{ slurm_select_type_parameters }}
SchedulerType=sched/backfill
# Priority / fairshare
PriorityType={{ slurm_priority_type | default('priority/multifactor') }}
PriorityDecayHalfLife={{ slurm_priority_decay_half_life | default('7-0') }}
PriorityCalcPeriod={{ slurm_priority_calc_period | default(5) }}
PriorityFavorSmall={{ slurm_priority_favor_small | default('NO') }}
PriorityWeightAge={{ slurm_priority_weight_age | default(1000) }}
PriorityWeightFairshare={{ slurm_priority_weight_fairshare | default(10000) }}
PriorityWeightJobSize={{ slurm_priority_weight_job_size | default(1000) }}
PriorityWeightPartition={{ slurm_priority_weight_partition | default(1000) }}
PriorityWeightQOS={{ slurm_priority_weight_qos | default(10000) }}
PriorityMaxAge={{ slurm_priority_max_age | default('1-0') }}
SlurmctldTimeout=120
SlurmdTimeout=300
InactiveLimit=0
KillWait=30
Waittime=0
AccountingStorageType={{ slurm_accounting_storage_type }}
{% if slurm_accounting_storage_type == "accounting_storage/slurmdbd" %}
AccountingStorageHost={{ slurm_accounting_storage_host }}
AccountingStoragePort={{ slurm_accounting_storage_port }}
AccountingStorageEnforce={{ slurm_accounting_storage_enforce | default('associations,limits,qos') }}
AccountingStorageTRES={{ slurm_accounting_storage_tres | default('cpu,mem,energy,node,billing,fs/disk,pages,vmem,gres/gpu') }}
{% endif %}
JobAcctGatherType={{ slurm_job_acct_gather_type | default('jobacct_gather/none') }}
JobCompType={{ slurm_job_comp_type }}
SlurmctldDebug=info
SlurmdDebug=info
SlurmctldLogFile=/var/log/slurm/slurmctld.log
SlurmdLogFile=/var/log/slurm/slurmd.log
{% for node in slurm_nodes if node.managed_state | default('present') == 'present' %}
NodeName={{ node.name }} NodeAddr={{ node.addr }} CPUs={{ node.cpus }}{% if node.topology | default('') | length > 0 %} {{ node.topology }}{% endif %} RealMemory={{ node.real_memory }}{% if node.gres | default('') | length > 0 %} Gres={{ node.gres }}{% endif %}{% if node.features | default('') | length > 0 %} Feature={{ node.features }}{% endif %} State=UNKNOWN
{% endfor %}
{% for partition in slurm_partitions %}
PartitionName={{ partition.name }} Nodes={{ partition.nodes }} Default={{ partition.default }} MaxTime={{ partition.max_time }} State={{ partition.state }}
{% endfor %}