Hi,
I have 4 CPU-only nodes and 1 GPU node.
I am trying to submit a job to a GPU node and it is throwing the below error.
“qsub: Access from host not allowed, or unknown host”
I am able to submit cpu jobs on CPU-only nodes.
The PBS-related configuration is as below:
###############
cat cuda.pbs
###############
#!/bin/bash
#PBS -N gpu_job
#PBS -l ncpus=1
#PBS -l ngpus=1
#PBS -q gpuq
#PBS -l walltime=00:00:20
#PBS -o output_6.log
#PBS -e error_6.log
Navigate to the directory where your code is located
cd /hpc/home/hpcuser01/cuda-job/
Run the executable
./matrix_multiplication_6
###########################
############################
[root@gpu01 ~]# cat /etc/pbs.conf
############################
PBS_EXEC=/opt/pbs
PBS_SERVER=admin1
PBS_START_SERVER=0
PBS_START_SCHED=0
PBS_START_COMM=0
PBS_START_MOM=1
PBS_HOME=/var/spool/pbs
PBS_CORE_LIMIT=unlimited
PBS_SCP=/bin/scp
#############################################
pbsnodes -a
############################################
n1
Mom = n1
Port = 15002
pbs_version = 2022.1.1.20220926110806
ntype = PBS
state = free
pcpus = 128
resources_available.arch = linux
resources_available.enablegpu = False
resources_available.host = n1
resources_available.mem = 263419196kb
resources_available.ncpus = 128
resources_available.vnode = n1
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
license = l
last_state_change_time = Sat May 20 21:27:18 2023
last_used_time = Tue May 23 15:52:38 2023
server_instance_id = admin1.mguif.local:15001
n2
Mom = n2
Port = 15002
pbs_version = 2022.1.1.20220926110806
ntype = PBS
state = free
pcpus = 128
resources_available.arch = linux
resources_available.enablegpu = False
resources_available.host = n2
resources_available.mem = 263419196kb
resources_available.ncpus = 128
resources_available.vnode = n2
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
license = l
last_state_change_time = Sat May 20 21:27:18 2023
last_used_time = Mon May 22 11:22:11 2023
server_instance_id = admin1.mguif.local:15001
n3
Mom = n3
Port = 15002
pbs_version = 2022.1.1.20220926110806
ntype = PBS
state = free
pcpus = 128
resources_available.arch = linux
resources_available.enablegpu = False
resources_available.host = n3
resources_available.mem = 263419196kb
resources_available.ncpus = 128
resources_available.vnode = n3
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
license = l
last_state_change_time = Sat May 20 21:27:18 2023
last_used_time = Mon May 22 11:22:11 2023
server_instance_id = admin1.mguif.local:15001
n4
Mom = n4
Port = 15002
pbs_version = 2022.1.1.20220926110806
ntype = PBS
state = free
pcpus = 128
resources_available.arch = linux
resources_available.enablegpu = False
resources_available.host = n4
resources_available.mem = 263419196kb
resources_available.ncpus = 128
resources_available.vnode = n4
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
license = l
last_state_change_time = Sat May 20 21:27:18 2023
last_used_time = Mon May 22 11:22:11 2023
server_instance_id = admin1.mguif.local:15001
gpu01
Mom = gpu01
Port = 15002
pbs_version = 2022.1.1.20220926110806
ntype = PBS
state = free
pcpus = 64
resources_available.arch = linux
resources_available.enablegpu = True
resources_available.host = gpu01
resources_available.mem = 791976532kb
resources_available.ncpus = 64
resources_available.ngpus = 4
resources_available.vnode = gpu01
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.ngpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
license = l
last_state_change_time = Tue May 23 13:49:31 2023
last_used_time = Mon May 22 16:07:47 2023
server_instance_id = admin1.mguif.local:15001
################################################################
Qmgr: print server
Create resources and set their properties.
Create and define resource enablegpu
create resource enablegpu
set resource enablegpu type = boolean
set resource enablegpu flag = h
Create and define resource ngpus
create resource ngpus
set resource ngpus type = long
set resource ngpus flag = hn
Create queues and set their attributes.
Create and define queue workq
create queue workq
set queue workq queue_type = Execution
set queue workq enabled = True
set queue workq started = True
Create and define queue gpu
create queue gpu
set queue gpu queue_type = Execution
set queue gpu enabled = True
set queue gpu started = True
Create and define queue gpuq
create queue gpuq
set queue gpuq queue_type = Execution
set queue gpuq acl_host_enable = True
set queue gpuq acl_hosts = gpu01
set queue gpuq enabled = True
set queue gpuq started = True
Set server attributes.
set server scheduling = True
set server default_queue = workq
set server log_events = 511
set server mailer = /usr/sbin/sendmail
set server mail_from = adm
set server query_other_jobs = True
set server resources_default.ncpus = 1
set server default_chunk.ncpus = 1
set server scheduler_iteration = 600
set server resv_enable = True
set server node_fail_requeue = 310
set server max_array_size = 10000
set server pbs_license_info = 6200@admin1
set server pbs_license_min = 0
set server pbs_license_max = 2147483647
set server pbs_license_linger_time = 31536000
set server eligible_time_enable = False
set server job_history_enable = True
set server job_history_duration = 720:00:00
set server max_concurrent_provision = 5
set server max_job_sequence_id = 9999999
Qmgr:
####################################################