Node: z53, Error: Unknown node

I successfully created the node z53 using qmgr as follows:

qmgr -c “create node z53”

Everything also looks good when I do pbsnodes -a as shown below:

z53
Mom = lustwz53
ntype = PBS
state = free
pcpus = 40
resources_available.arch = linux
resources_available.host = lustwz53
resources_available.mem = 131842484kb
resources_available.ncpus = 40
resources_available.vnode = z53
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
last_state_change_time = Wed Feb 12 16:57:59 2020
last_used_time = Wed Feb 12 16:55:51 2020

But when I do pbsnodes z53 it gives the error:

Node: z53, Error: Unknown node

In /etc/hosts z53 is defined as:

xx.xxx.xxx.xx lustwz53 z53

I am able to submit jobs to the node. Just wondering why pbsnodes z53 doesn’t recognize the shortname?

qmgr -c "d n z53 "
qmgr -c “create node z53 Mom=lustwz53”

pbsnodes z53 # check whether this works

Otherwise:
Please share the output of pbs_hostn -v lustwz53
nslookup lustwz53

Does pbsnodes -v z53 work?

Thanks, I tried your commands and here is the output below ( I’ve xxxx’ed out some stuff )

root@lustwzb34:/root # qmgr -c "d n z53 "
root@lustwzb34:/root # qmgr -c “create node z53 Mom=lustwz53”
root@lustwzb34:/root # pbsnodes z53
Node: z53, Error: Unknown node
root@lustwzb34:/root # pbs_hostn -v lustwz53
primary name: lustwz53 (from gethostbyname())
aliases: z53
aliases: lustwz53.xxxxx.xxx-corp.com
address length: 4 bytes
address: 10.xxx.xx.xx (783663626 dec) name: lustwz53
root@lustwzb34:/root # nslookup lustwz53
Server: 10.xxx.xx.xx
Address: 10.xxx.xx.xx#53

lustwz53.xxxxx.xxx.net canonical name = lustwz53.xxxxxx.xxx-corp.com.
Name: lustwz53.xxxxxx.xxx-corp.com
Address: 10.xxx.xx.xx

1 Like

Interestingly, yes that did work!

root@lustwzb34:/root # pbsnodes -v z53
z53
Mom = lustwz53
Port = 15002
pbs_version = 18.1.2
ntype = PBS
state = free
pcpus = 40
resources_available.arch = linux
resources_available.host = lustwz53
resources_available.mem = 131842484kb
resources_available.ncpus = 40
resources_available.vnode = z53
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
last_state_change_time = Thu Feb 13 16:06:03 2020

root@lustwzb34:/root # pbsnodes z53
Node: z53, Error: Unknown node

I guess PBS is using the primary name for the compute node, instead of the z53 alias.
pbsnodes takes a host list, which the server must be comparing whatever Mom is set to.
The -v option of pbsnodes tells it to use vnode names, and since you create a vnode with the name z53, it works.

1 Like