I have a strange issue when I try to schedule the following job.
I’m requesting the scratch resource that is calculated by a periodic hook.
#!/bin/bash
#PBS -l select=1:ncpus=20:scratch=10g
#PBS -q workq
#PBS -l walltime=00:10:00
#PBS -j oe
#PBS -N scratch
sleep 10m
The job gets queued, but it will never start. The job comment is updated with the following value:
comment = Not Running: Insufficient amount of resource: ncpu
These are the settings of the nodes (all the nodes have the same settings)
gnode03
Mom = gnode03
ntype = PBS
state = free
pcpus = 32
resources_available.arch = linux
resources_available.host = gnode03
resources_available.hpmem = 0b
resources_available.mem = 0b
resources_available.ncpus = 0
resources_available.ngpus = 0
resources_available.scratch = 3559041024kb
resources_available.vmem = 0b
resources_available.vnode = gnode03
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
license = l
last_state_change_time = Sun Nov 20 18:11:54 2022
last_used_time = Wed Oct 26 11:23:50 2022
gnode03[0]
Mom = gnode03
ntype = PBS
state = free
pcpus = 16
resources_available.arch = linux
resources_available.host = gnode03
resources_available.hpmem = 0b
resources_available.mem = 128284mb
resources_available.ncpus = 16
resources_available.ngpus = 2
resources_available.scratch = @gnode03
resources_available.vmem = 144634mb
resources_available.vnode = gnode03[0]
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.ngpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
license = l
last_state_change_time = Sun Nov 20 18:11:54 2022
last_used_time = Thu Nov 17 08:50:24 2022
gnode03[1]
Mom = gnode03
ntype = PBS
state = free
pcpus = 16
resources_available.arch = linux
resources_available.host = gnode03
resources_available.hpmem = 0b
resources_available.mem = 128984mb
resources_available.ncpus = 16
resources_available.ngpus = 2
resources_available.scratch = @gnode03
resources_available.vmem = 145334mb
resources_available.vnode = gnode03[1]
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.ngpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
license = l
last_state_change_time = Sun Nov 20 18:11:54 2022
last_used_time = Thu Nov 17 08:50:24 2022
The strange behavior is that it gets scheduled if I submit a job that requires all the ncpus of a node (16+16).
#PBS -l select=1:ncpus=32:scratch=10g
Also, if I submit a job requiring a complete vnode ncpus set (16), it works.
#PBS -l select=1:ncpus=16:scratch=10g
I’m running the PBS version: 22.05.11