This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
# Managed by Ansible
|
||||
# Slurm cgroup configuration
|
||||
|
||||
CgroupPlugin=autodetect
|
||||
|
||||
ConstrainCores=yes
|
||||
ConstrainRAMSpace=yes
|
||||
ConstrainSwapSpace=no
|
||||
ConstrainDevices=yes
|
||||
|
||||
AllowedRAMSpace=100
|
||||
AllowedSwapSpace=0
|
||||
MaxRAMPercent=100
|
||||
MaxSwapPercent=0
|
||||
|
||||
MinRAMSpace=30
|
||||
@@ -0,0 +1,4 @@
|
||||
# Managed by Ansible
|
||||
{% for node in slurm_nodes if node.managed_state | default('present') == 'present' and node.gres | default('') | length > 0 %}
|
||||
NodeName={{ node.name }} Name=gpu File={{ node.gres_file | default('/dev/nvidia0') }}
|
||||
{% endfor %}
|
||||
@@ -0,0 +1,67 @@
|
||||
# Managed by Ansible
|
||||
|
||||
ClusterName={{ slurm_cluster_name }}
|
||||
SlurmctldHost={{ slurm_control_machine }}({{ slurm_control_addr }})
|
||||
|
||||
SlurmUser={{ slurm_user }}
|
||||
AuthType=auth/munge
|
||||
StateSaveLocation=/var/spool/slurmctld
|
||||
SlurmdSpoolDir=/var/spool/slurmd
|
||||
SwitchType=switch/none
|
||||
MpiDefault={{ slurm_default_mpi_type }}
|
||||
ProctrackType={{ slurm_proctrack_type }}
|
||||
ReturnToService={{ slurm_return_to_service }}
|
||||
{% if slurm_gres_types is defined and slurm_gres_types | length > 0 %}
|
||||
GresTypes={{ slurm_gres_types }}
|
||||
{% endif %}
|
||||
|
||||
SlurmctldPidFile=/run/slurmctld.pid
|
||||
SlurmdPidFile=/run/slurmd.pid
|
||||
SlurmctldPort={{ slurmctld_port }}
|
||||
SlurmdPort={{ slurmd_port }}
|
||||
|
||||
TaskPlugin={{ slurm_task_plugin }}
|
||||
SelectType={{ slurm_select_type }}
|
||||
SelectTypeParameters={{ slurm_select_type_parameters }}
|
||||
|
||||
SchedulerType=sched/backfill
|
||||
# Priority / fairshare
|
||||
PriorityType={{ slurm_priority_type | default('priority/multifactor') }}
|
||||
PriorityDecayHalfLife={{ slurm_priority_decay_half_life | default('7-0') }}
|
||||
PriorityCalcPeriod={{ slurm_priority_calc_period | default(5) }}
|
||||
PriorityFavorSmall={{ slurm_priority_favor_small | default('NO') }}
|
||||
PriorityWeightAge={{ slurm_priority_weight_age | default(1000) }}
|
||||
PriorityWeightFairshare={{ slurm_priority_weight_fairshare | default(10000) }}
|
||||
PriorityWeightJobSize={{ slurm_priority_weight_job_size | default(1000) }}
|
||||
PriorityWeightPartition={{ slurm_priority_weight_partition | default(1000) }}
|
||||
PriorityWeightQOS={{ slurm_priority_weight_qos | default(10000) }}
|
||||
PriorityMaxAge={{ slurm_priority_max_age | default('1-0') }}
|
||||
|
||||
SlurmctldTimeout=120
|
||||
SlurmdTimeout=300
|
||||
InactiveLimit=0
|
||||
KillWait=30
|
||||
Waittime=0
|
||||
|
||||
AccountingStorageType={{ slurm_accounting_storage_type }}
|
||||
{% if slurm_accounting_storage_type == "accounting_storage/slurmdbd" %}
|
||||
AccountingStorageHost={{ slurm_accounting_storage_host }}
|
||||
AccountingStoragePort={{ slurm_accounting_storage_port }}
|
||||
AccountingStorageEnforce={{ slurm_accounting_storage_enforce | default('associations,limits,qos') }}
|
||||
AccountingStorageTRES={{ slurm_accounting_storage_tres | default('cpu,mem,energy,node,billing,fs/disk,pages,vmem,gres/gpu') }}
|
||||
{% endif %}
|
||||
JobAcctGatherType={{ slurm_job_acct_gather_type | default('jobacct_gather/none') }}
|
||||
JobCompType={{ slurm_job_comp_type }}
|
||||
|
||||
SlurmctldDebug=info
|
||||
SlurmdDebug=info
|
||||
SlurmctldLogFile=/var/log/slurm/slurmctld.log
|
||||
SlurmdLogFile=/var/log/slurm/slurmd.log
|
||||
|
||||
{% for node in slurm_nodes if node.managed_state | default('present') == 'present' %}
|
||||
NodeName={{ node.name }} NodeAddr={{ node.addr }} CPUs={{ node.cpus }}{% if node.topology | default('') | length > 0 %} {{ node.topology }}{% endif %} RealMemory={{ node.real_memory }}{% if node.gres | default('') | length > 0 %} Gres={{ node.gres }}{% endif %}{% if node.features | default('') | length > 0 %} Feature={{ node.features }}{% endif %} State=UNKNOWN
|
||||
{% endfor %}
|
||||
|
||||
{% for partition in slurm_partitions %}
|
||||
PartitionName={{ partition.name }} Nodes={{ partition.nodes }} Default={{ partition.default }} MaxTime={{ partition.max_time }} State={{ partition.state }}
|
||||
{% endfor %}
|
||||
@@ -0,0 +1,38 @@
|
||||
# Managed by Ansible
|
||||
# Slurm database daemon configuration
|
||||
|
||||
AuthType=auth/munge
|
||||
|
||||
DbdHost={{ slurmdbd_host }}
|
||||
DbdPort={{ slurmdbd_port }}
|
||||
|
||||
SlurmUser={{ slurm_user }}
|
||||
|
||||
DebugLevel=info
|
||||
LogFile=/var/log/slurm/slurmdbd.log
|
||||
PidFile=/run/slurmdbd.pid
|
||||
|
||||
CommitDelay={{ slurmdbd_commit_delay | default(1) }}
|
||||
|
||||
StorageType={{ slurmdbd_storage_type }}
|
||||
StorageHost={{ slurmdbd_storage_host }}
|
||||
StoragePort={{ slurmdbd_storage_port }}
|
||||
StorageLoc={{ slurmdbd_storage_loc }}
|
||||
StorageUser={{ slurmdbd_storage_user }}
|
||||
StoragePass={{ slurmdbd_storage_pass }}
|
||||
|
||||
# Retention / purge policy
|
||||
PurgeEventAfter={{ slurmdbd_purge_event_after | default('12months') }}
|
||||
PurgeJobAfter={{ slurmdbd_purge_job_after | default('12months') }}
|
||||
PurgeResvAfter={{ slurmdbd_purge_resv_after | default('12months') }}
|
||||
PurgeStepAfter={{ slurmdbd_purge_step_after | default('3months') }}
|
||||
PurgeSuspendAfter={{ slurmdbd_purge_suspend_after | default('3months') }}
|
||||
PurgeTXNAfter={{ slurmdbd_purge_txn_after | default('12months') }}
|
||||
PurgeUsageAfter={{ slurmdbd_purge_usage_after | default('24months') }}
|
||||
|
||||
ArchiveEvents={{ slurmdbd_archive_events | default('no') }}
|
||||
ArchiveJobs={{ slurmdbd_archive_jobs | default('no') }}
|
||||
ArchiveSteps={{ slurmdbd_archive_steps | default('no') }}
|
||||
ArchiveSuspend={{ slurmdbd_archive_suspend | default('no') }}
|
||||
ArchiveTXN={{ slurmdbd_archive_txn | default('no') }}
|
||||
ArchiveUsage={{ slurmdbd_archive_usage | default('no') }}
|
||||
Reference in New Issue
Block a user