hello,
I am handling a cluster with 40 nodes and in that different normal queues with two high-priority queue has been implemented(queue details given below). now the problem is that whenever a user submits a job using high priority queue(hpq), that hpq job are taking resources from other jobs which are in the running (R) state, if the required resources are not available for that hpq job and because of that other running (R) jobs are going to standBy (S) state. how to prevent that from happening?
queue details:
Create and define queue hpq_1day_large
create queue hpq_1day_large
set queue hpq_1day_large queue_type = Execution
set queue hpq_1day_large Priority = 500
set queue hpq_1day_large max_queued = [o:PBS_ALL=5]
set queue hpq_1day_large acl_user_enable = True
set queue hpq_1day_large acl_users = cdsjsar
set queue hpq_1day_large acl_users += secmon
set queue hpq_1day_large resources_max.nodect = 29
set queue hpq_1day_large resources_max.walltime = 24:00:00
set queue hpq_1day_large resources_min.ncpus = 16
set queue hpq_1day_large resources_min.nodect = 1
set queue hpq_1day_large resources_default.walltime = 24:00:00
set queue hpq_1day_large acl_group_enable = False
set queue hpq_1day_large acl_groups = hpqCPU
set queue hpq_1day_large default_chunk.nodetype = cpunode
set queue hpq_1day_large default_chunk.Qlist = hrl
set queue hpq_1day_large enabled = True
set hpq_gpu_1day started = True
Create and define queue qreg_1day_small
create queue qreg_1day_small
set queue qreg_1day_small queue_type = Execution
set queue qreg_1day_small max_queued = [o:PBS_ALL=100]
set queue qreg_1day_small max_queued += [u:PBS_GENERIC=5]
set queue qreg_1day_small resources_max.ncpus = 127
set queue qreg_1day_small resources_max.nodect = 29
set queue qreg_1day_small resources_max.walltime = 24:00:00
set queue qreg_1day_small resources_min.ncpus = 1
set queue qreg_1day_small resources_min.nodect = 1
set queue qreg_1day_small resources_default.walltime = 24:00:00
set queue qreg_1day_small default_chunk.nodetype = cpunode
set queue qreg_1day_small default_chunk.Qlist = qrs
set queue qreg_1day_small max_run = [o:PBS_ALL=75]
set queue qreg_1day_small max_run += [u:PBS_GENERIC=5]
set queue qreg_1day_small enabled = True
set queue qreg_1day_small started = True
Create and define queue qssd_1day_small
create queue qssd_1day_small
set queue qssd_1day_small queue_type = Execution
set queue qssd_1day_small max_queued = [o:PBS_ALL=20]
set queue qssd_1day_small max_queued += [u:PBS_GENERIC=2]
set queue qssd_1day_small resources_max.ncpus = 127
set queue qssd_1day_small resources_max.nodect = 8
set queue qssd_1day_small resources_max.walltime = 24:00:00
set queue qssd_1day_small resources_min.ncpus = 1
set queue qssd_1day_small resources_min.nodect = 1
set queue qssd_1day_small resources_default.walltime = 24:00:00
set queue qssd_1day_small default_chunk.nodetype = cpunode
set queue qssd_1day_small default_chunk.Qlist = qss
set queue qssd_1day_small max_run = [o:PBS_ALL=10]
set queue qssd_1day_small max_run += [u:PBS_GENERIC=1]
set queue qssd_1day_small enabled = True
set queue qssd_1day_small started = True
Create and define queue qreg_1day_large
create queue qreg_1day_large
set queue qreg_1day_large queue_type = Execution
set queue qreg_1day_large max_queued = [o:PBS_ALL=20]
set queue qreg_1day_large max_queued += [u:PBS_GENERIC=2]
set queue qreg_1day_large resources_max.ncpus = 512
set queue qreg_1day_large resources_max.nodect = 29
set queue qreg_1day_large resources_max.walltime = 24:00:00
set queue qreg_1day_large resources_min.ncpus = 256
set queue qreg_1day_large resources_min.nodect = 1
set queue qreg_1day_large resources_default.walltime = 24:00:00
set queue qreg_1day_large default_chunk.nodetype = cpunode
set queue qreg_1day_large default_chunk.Qlist = qrl
set queue qreg_1day_large max_run = [o:PBS_ALL=10]
set queue qreg_1day_large max_run += [u:PBS_GENERIC=1]
set queue qreg_1day_large enabled = True
set queue qreg_1day_large started = True
and others.