Dear all,
After installing OpenPBS, I’m experiencing some problems submitting jobs; they are always in a held state;
[test@cluster ~]$ qstat -f
Job Id: 2024.cluster
Job_Name = myscript
Job_Owner = test@cluster
job_state = H
queue = workq
server = cluster
Checkpoint = u
ctime = Thu Aug 10 15:19:34 2023
Error_Path = ip200177125.mmc.ufjf.br:/home/test/myscript.err
Hold_Types = s
Join_Path = n
Keep_Files = n
Mail_Points = a
mtime = Thu Aug 10 15:19:37 2023
Output_Path = ip200177125.mmc.ufjf.br:/home/test/myscript.out
Priority = 0
qtime = Thu Aug 10 15:19:34 2023
Rerunable = True
Resource_List.mpiprocs = 64
Resource_List.ncpus = 1
Resource_List.nodect = 1
Resource_List.place = scatter
Resource_List.select = 1:host=compute-1-1:mpiprocs=64
Resource_List.walltime = 48:00:00
stime = Thu Aug 10 15:19:37 2023
obittime = Thu Aug 10 15:19:37 2023
Shell_Path_List = /bin/bash
substate = 20
Variable_List = PBS_O_HOME=/home/test,PBS_O_LANG=pt_BR.UTF-8,
PBS_O_LOGNAME=test,
PBS_O_PATH=/home/test/.local/bin:/home/test/bin:/opt/ohpc/pub/mpi/libf
abric/1.13.0/bin:/opt/ohpc/pub/mpi/ucx-ohpc/1.11.2/bin:/opt/ohpc/pub/li
bs/hwloc/bin:/opt/ohpc/pub/mpi/openmpi4-gnu12/4.1.4/bin:/opt/ohpc/pub/c
ompiler/gcc/12.2.0/bin:/opt/ohpc/pub/utils/prun/2.2:/opt/ohpc/pub/utils
/autotools/bin:/opt/ohpc/pub/bin:/usr/local/cuda-12.2/bin:/usr/local/sb
in:/usr/local/bin:/usr/sbin:/usr/bin:/opt/pbs/bin:/opt/pbs/sbin:/root/b
in:/opt/pbs/bin,PBS_O_MAIL=/var/spool/mail/root,PBS_O_SHELL=/bin/bash,
PBS_O_WORKDIR=/home/test,PBS_O_SYSTEM=Linux,
UCX_BIN=/opt/ohpc/pub/mpi/ucx-ohpc/1.11.2/bin,
LD_LIBRARY_PATH=/opt/ohpc/pub/mpi/libfabric/1.13.0/lib:/opt/ohpc/pub/m
pi/ucx-ohpc/1.11.2/lib:/opt/ohpc/pub/libs/hwloc/lib:/opt/ohpc/pub/mpi/o
penmpi4-gnu12/4.1.4/lib:/opt/ohpc/pub/compiler/gcc/12.2.0/lib64:/usr/lo
cal/cuda-12.2/lib64,
LS_COLORS=rs=0:di=38;5;33:ln=38;5;51:mh=00:pi=40;38;5;11:so=38;5;13:do
=38;5;5:bd=48;5;232;38;5;11:cd=48;5;232;38;5;3:or=48;5;232;38;5;9:mi=01
;05;37;41:su=48;5;196;38;5;15:sg=48;5;11;38;5;16:ca=48;5;196;38;5;226:t
w=48;5;10;38;5;16:ow=48;5;10;38;5;21:st=48;5;21;38;5;15:ex=38;5;40:.ta
r=38;5;9:.tgz=38;5;9:.arc=38;5;9:.arj=38;5;9:.taz=38;5;9:.lha=38;5
;9:.lz4=38;5;9:.lzh=38;5;9:.lzma=38;5;9:.tlz=38;5;9:.txz=38;5;9:.
tzo=38;5;9:.t7z=38;5;9:.zip=38;5;9:.z=38;5;9:.dz=38;5;9:.gz=38;5;9
:.lrz=38;5;9:.lz=38;5;9:.lzo=38;5;9:.xz=38;5;9:.zst=38;5;9:.tzst=
38;5;9:.bz2=38;5;9:.bz=38;5;9:.tbz=38;5;9:.tbz2=38;5;9:.tz=38;5;9:
.deb=38;5;9:.rpm=38;5;9:.jar=38;5;9:.war=38;5;9:.ear=38;5;9:.sar=
38;5;9:.rar=38;5;9:.alz=38;5;9:.ace=38;5;9:.zoo=38;5;9:.cpio=38;5;
9:.7z=38;5;9:.rz=38;5;9:.cab=38;5;9:.wim=38;5;9:.swm=38;5;9:.dwm=
38;5;9:.esd=38;5;9:.jpg=38;5;13:.jpeg=38;5;13:.mjpg=38;5;13:.mjpeg
=38;5;13:.gif=38;5;13:.bmp=38;5;13:.pbm=38;5;13:.pgm=38;5;13:.ppm=
38;5;13:.tga=38;5;13:.xbm=38;5;13:.xpm=38;5;13:.tif=38;5;13:.tiff=
38;5;13:.png=38;5;13:.svg=38;5;13:.svgz=38;5;13:.mng=38;5;13:.pcx=
38;5;13:.mov=38;5;13:.mpg=38;5;13:.mpeg=38;5;13:.m2v=38;5;13:.mkv=
38;5;13:.webm=38;5;13:.ogm=38;5;13:.mp4=38;5;13:.m4v=38;5;13:.mp4v
=38;5;13:.vob=38;5;13:.qt=38;5;13:.nuv=38;5;13:.wmv=38;5;13:.asf=3
8;5;13:.rm=38;5;13:.rmvb=38;5;13:.flc=38;5;13:.avi=38;5;13:.fli=38
;5;13:.flv=38;5;13:.gl=38;5;13:.dl=38;5;13:.xcf=38;5;13:.xwd=38;5;
13:.yuv=38;5;13:.cgm=38;5;13:.emf=38;5;13:.ogv=38;5;13:.ogx=38;5;1
3:.aac=38;5;45:.au=38;5;45:.flac=38;5;45:.m4a=38;5;45:.mid=38;5;45
:.midi=38;5;45:.mka=38;5;45:.mp3=38;5;45:.mpc=38;5;45:.ogg=38;5;45
:.ra=38;5;45:.wav=38;5;45:.oga=38;5;45:.opus=38;5;45:.spx=38;5;45:
*.xspf=38;5;45:,LIBFABRIC_DIR=/opt/ohpc/pub/mpi/libfabric/1.13.0,
__LMOD_REF_COUNT_PATH=/opt/ohpc/pub/mpi/libfabric/1.13.0/bin:1;/opt/oh
pc/pub/mpi/ucx-ohpc/1.11.2/bin:1;/opt/ohpc/pub/libs/hwloc/bin:1;/opt/oh
pc/pub/mpi/openmpi4-gnu12/4.1.4/bin:1;/opt/ohpc/pub/compiler/gcc/12.2.0
/bin:1;/opt/ohpc/pub/utils/prun/2.2:1;/opt/ohpc/pub/utils/autotools/bin
:1;/opt/ohpc/pub/bin:1;/usr/local/cuda-12.2/bin:1;/usr/local/sbin:1;/us
r/local/bin:1;/usr/sbin:1;/usr/bin:1;/opt/pbs/bin:1;/opt/pbs/sbin:1;/ro
ot/bin:1,
ModuleTable002=Mi8xMi4yLjAubHVhIiwKZnVsbE5hbWUgPSAiZ251MTIvMTIuMi4wI
iwKbG9hZE9yZGVyID0gMywKcHJvcFQgPSB7fSwKc3RhY2tEZXB0aCA9IDEsCnN0YXR1cyA9
ICJhY3RpdmUiLAp1c2VyTmFtZSA9ICJnbnUxMiIsCndWID0gIl4wMDAwMDAxMi4wMDAwMDA
wMDIuKnpmaW5hbCIsCn0sCmh3bG9jID0gewpmbiA9ICIvb3B0L29ocGMvcHViL21vZHVsZW
ZpbGVzL2h3bG9jLzIuNy4wIiwKZnVsbE5hbWUgPSAiaHdsb2MvMi43LjAiLApsb2FkT3JkZ
XIgPSA0LApwcm9wVCA9IHt9LApyZWZfY291bnQgPSAxLApzdGFja0RlcHRoID0gMiwKc3Rh
dHVzID0gImFjdGl2ZSIsCnVzZXJOYW1lID0gImh3bG9jIiwKd1YgPSAiMDAwMDAwMDAyLjA
wMDAwMDAwNy4qemZpbmFsIiwKfSwKbGli,
SSH_CONNECTION=200.17.71.23 32958 200.17.71.25 22,
INCLUDE=/opt/ohpc/pub/compiler/gcc/12.2.0/include,
UCX_LIB=/opt/ohpc/pub/mpi/ucx-ohpc/1.11.2/lib,
LMOD_FAMILY_MPI_VERSION=4.1.4,LANG=pt_BR.UTF-8,HISTCONTROL=ignoredups,
LMOD_FAMILY_COMPILER_VERSION=12.2.0,HOSTNAME=cluster.mmc.ufjf.br,
OLDPWD=/root,UCX_WARN_UNUSED_ENV_VARS=N,
HWLOC_BIN=/opt/ohpc/pub/libs/hwloc/bin,
__LMOD_REF_COUNT_INCLUDE=/opt/ohpc/pub/compiler/gcc/12.2.0/include:1,
__LMOD_REF_COUNT_LD_LIBRARY_PATH=/opt/ohpc/pub/mpi/libfabric/1.13.0/li
b:1;/opt/ohpc/pub/mpi/ucx-ohpc/1.11.2/lib:1;/opt/ohpc/pub/libs/hwloc/li
b:1;/opt/ohpc/pub/mpi/openmpi4-gnu12/4.1.4/lib:1;/opt/ohpc/pub/compiler
/gcc/12.2.0/lib64:1;/usr/local/cuda-12.2/lib64:1,
__LMOD_REF_COUNT_PKG_CONFIG_PATH=/opt/ohpc/pub/mpi/libfabric/1.13.0/li
b/pkgconfig:1;/opt/ohpc/pub/mpi/ucx-ohpc/1.11.2/lib/pkgconfig:1;/opt/oh
pc/pub/mpi/openmpi4-gnu12/4.1.4/lib/pkgconfig:1,
ModuleTable004=PSAiTS4qemZpbmFsIiwKfSwKb3Blbm1waTQgPSB7CmZuID0gIi9vc
HQvb2hwYy9wdWIvbW9kdWxlZGVwcy9nbnUxMi9vcGVubXBpNC80LjEuNCIsCmZ1bGxOYW1l
ID0gIm9wZW5tcGk0LzQuMS40IiwKbG9hZE9yZGVyID0gNywKcHJvcFQgPSB7fSwKc3RhY2t
EZXB0aCA9IDEsCnN0YXR1cyA9ICJhY3RpdmUiLAp1c2VyTmFtZSA9ICJvcGVubXBpNCIsCn
dWID0gIjAwMDAwMDAwNC4wMDAwMDAwMDEuMDAwMDAwMDA0Lip6ZmluYWwiLAp9LApwcnVuI
D0gewpmbiA9ICIvb3B0L29ocGMvcHViL21vZHVsZWZpbGVzL3BydW4vMi4yIiwKZnVsbE5h
bWUgPSAicHJ1bi8yLjIiLApsb2FkT3JkZXIgPSAyLApwcm9wVCA9IHt9LApzdGFja0RlcHR
oID0gMSwKc3RhdHVzID0gImFjdGl2ZSIs,which_declare=declare -f,
UCX_DIR=/opt/ohpc/pub/mpi/ucx-ohpc/1.11.2,XDG_SESSION_ID=6,
HWLOC_DIR=/opt/ohpc/pub/libs/hwloc,USER=test,SELINUX_ROLE_REQUESTED=,
__LMOD_REF_COUNT_MODULEPATH=/opt/ohpc/pub/moduledeps/gnu12-openmpi4:1;
/opt/ohpc/pub/moduledeps/gnu12:1;/opt/ohpc/pub/modulefiles:1,
UCX_INC=/opt/ohpc/pub/mpi/ucx-ohpc/1.11.2/include,
LIBFABRIC_BIN=/opt/ohpc/pub/mpi/libfabric/1.13.0/bin,PWD=/home/test,
HOME=/home/test,LMOD_COLORIZE=no,SSH_CLIENT=200.17.71.23 32958 22,
LMOD_VERSION=8.7.6,LMOD_SETTARG_CMD=:,SELINUX_LEVEL_REQUESTED=,
BASH_ENV=/opt/ohpc/admin/lmod/lmod/init/bash,
LIBFABRIC_INC=/opt/ohpc/pub/mpi/libfabric/1.13.0/include,
HWLOC_INC=/opt/ohpc/pub/libs/hwloc/include,LMOD_SHELL_PRGM=bash,
ModuleTable001=X01vZHVsZVRhYmxlXyA9IHsKTVR2ZXJzaW9uID0gMywKY19yZWJ1a
WxkVGltZSA9IGZhbHNlLApjX3Nob3J0VGltZSA9IGZhbHNlLApkZXB0aFQgPSB7fSwKZmFt
aWx5ID0gewpNUEkgPSAib3Blbm1waTQiLApjb21waWxlciA9ICJnbnUxMiIsCn0sCm1UID0
gewphdXRvdG9vbHMgPSB7CmZuID0gIi9vcHQvb2hwYy9wdWIvbW9kdWxlZmlsZXMvYXV0b3
Rvb2xzIiwKZnVsbE5hbWUgPSAiYXV0b3Rvb2xzIiwKbG9hZE9yZGVyID0gMSwKcHJvcFQgP
SB7fSwKc3RhY2tEZXB0aCA9IDEsCnN0YXR1cyA9ICJhY3RpdmUiLAp1c2VyTmFtZSA9ICJh
dXRvdG9vbHMiLAp3ViA9ICJNLip6ZmluYWwiLAp9LApnbnUxMiA9IHsKZm4gPSAiL29wdC9
vaHBjL3B1Yi9tb2R1bGVmaWxlcy9nbnUx,
LOADEDMODULES=autotools:prun/2.2:gnu12/12.2.0:hwloc/2.7.0:ucx/1.11.2:l
ibfabric/1.13.0:openmpi4/4.1.4:ohpc,
__LMOD_REF_COUNT_MANPATH=/opt/ohpc/pub/mpi/libfabric/1.13.0/share/man:
1;/opt/ohpc/pub/libs/hwloc/man:1;/opt/ohpc/pub/mpi/openmpi4-gnu12/4.1.4
/share/man:1;/opt/ohpc/pub/compiler/gcc/12.2.0/share/man:1;/opt/ohpc/pu
b/utils/autotools/share/man:1;/usr/local/share/man:2;/usr/share/man/ove
rrides:2;/usr/share/man/en:2;/usr/share/man:2;/opt/pbs/share/man:1,
ModuleTable006=dWIvbW9kdWxlZmlsZXMiLAp9LApzeXN0ZW1CYXNlTVBBVEggPSAiL
29wdC9vaHBjL2FkbWluL21vZHVsZWZpbGVzOi9vcHQvb2hwYy9wdWIvbW9kdWxlZmlsZXMi
LAp9Cg==,
ModuleTable003=ZmFicmljID0gewpmbiA9ICIvb3B0L29ocGMvcHViL21vZHVsZWZpb
GVzL2xpYmZhYnJpYy8xLjEzLjAiLApmdWxsTmFtZSA9ICJsaWJmYWJyaWMvMS4xMy4wIiwK
bG9hZE9yZGVyID0gNiwKcHJvcFQgPSB7fSwKcmVmX2NvdW50ID0gMSwKc3RhY2tEZXB0aCA
9IDIsCnN0YXR1cyA9ICJhY3RpdmUiLAp1c2VyTmFtZSA9ICJsaWJmYWJyaWMiLAp3ViA9IC
IwMDAwMDAwMDEuMDAwMDAwMDEzLip6ZmluYWwiLAp9LApvaHBjID0gewpmbiA9ICIvb3B0L
29ocGMvcHViL21vZHVsZWZpbGVzL29ocGMiLApmdWxsTmFtZSA9ICJvaHBjIiwKbG9hZE9y
ZGVyID0gOCwKcHJvcFQgPSB7fSwKc3RhY2tEZXB0aCA9IDAsCnN0YXR1cyA9ICJhY3RpdmU
iLAp1c2VyTmFtZSA9ICJvaHBjIiwKd1Yg,LMOD_ROOT=/opt/ohpc/admin/lmod,
SSH_TTY=/dev/pts/2,MAIL=/var/spool/mail/root,
HWLOC_LIB=/opt/ohpc/pub/libs/hwloc/lib,SHELL=/bin/bash,
TERM=xterm-256color,ModuleTable_Sz=6,LMOD_FAMILY_COMPILER=gnu12,
SELINUX_USE_CURRENT_RANGE=,SHLVL=2,
MANPATH=/opt/ohpc/pub/mpi/libfabric/1.13.0/share/man:/opt/ohpc/pub/lib
s/hwloc/man:/opt/ohpc/pub/mpi/openmpi4-gnu12/4.1.4/share/man:/opt/ohpc/
pub/compiler/gcc/12.2.0/share/man:/opt/ohpc/pub/utils/autotools/share/m
an:/usr/local/share/man:/usr/share/man/overrides:/usr/share/man/en:/usr
/share/man:/opt/pbs/share/man:/opt/pbs/share/man,
LMOD_PREPEND_BLOCK=normal,
MODULEPATH=/opt/ohpc/pub/moduledeps/gnu12-openmpi4:/opt/ohpc/pub/modul
edeps/gnu12:/opt/ohpc/pub/modulefiles,
MPI_DIR=/opt/ohpc/pub/mpi/openmpi4-gnu12/4.1.4,LOGNAME=test,
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/0/bus,
XDG_RUNTIME_DIR=/run/user/0,
OMPI_MCA_mca_base_component_show_load_errors=0,
PATH=/home/test/.local/bin:/home/test/bin:/opt/ohpc/pub/mpi/libfabric/
1.13.0/bin:/opt/ohpc/pub/mpi/ucx-ohpc/1.11.2/bin:/opt/ohpc/pub/libs/hwl
oc/bin:/opt/ohpc/pub/mpi/openmpi4-gnu12/4.1.4/bin:/opt/ohpc/pub/compile
r/gcc/12.2.0/bin:/opt/ohpc/pub/utils/prun/2.2:/opt/ohpc/pub/utils/autot
ools/bin:/opt/ohpc/pub/bin:/usr/local/cuda-12.2/bin:/usr/local/sbin:/us
r/local/bin:/usr/sbin:/usr/bin:/opt/pbs/bin:/opt/pbs/sbin:/root/bin:/op
t/pbs/bin,
LMFILES=/opt/ohpc/pub/modulefiles/autotools:/opt/ohpc/pub/modulefile
s/prun/2.2:/opt/ohpc/pub/modulefiles/gnu12/12.2.0.lua:/opt/ohpc/pub/mod
ulefiles/hwloc/2.7.0:/opt/ohpc/pub/modulefiles/ucx/1.11.2:/opt/ohpc/pub
/modulefiles/libfabric/1.13.0:/opt/ohpc/pub/moduledeps/gnu12/openmpi4/4
.1.4:/opt/ohpc/pub/modulefiles/ohpc,
DEBUGINFOD_URLS=https://debuginfod.centos.org/,
MODULESHOME=/opt/ohpc/admin/lmod/lmod,LMOD_SETTARG_FULL_SUPPORT=no,
PKG_CONFIG_PATH=/opt/ohpc/pub/mpi/libfabric/1.13.0/lib/pkgconfig:/opt/
ohpc/pub/mpi/ucx-ohpc/1.11.2/lib/pkgconfig:/opt/ohpc/pub/mpi/openmpi4-g
nu12/4.1.4/lib/pkgconfig,HISTSIZE=1000,
LMOD_PKG=/opt/ohpc/admin/lmod/lmod,
LMOD_CMD=/opt/ohpc/admin/lmod/lmod/libexec/lmod,
ModuleTable005=CnVzZXJOYW1lID0gInBydW4iLAp3ViA9ICIwMDAwMDAwMDIuMDAwM
DAwMDAyLip6ZmluYWwiLAp9LAp1Y3ggPSB7CmZuID0gIi9vcHQvb2hwYy9wdWIvbW9kdWxl
ZmlsZXMvdWN4LzEuMTEuMiIsCmZ1bGxOYW1lID0gInVjeC8xLjExLjIiLApsb2FkT3JkZXI
gPSA1LApwcm9wVCA9IHt9LApyZWZfY291bnQgPSAxLApzdGFja0RlcHRoID0gMiwKc3RhdH
VzID0gImFjdGl2ZSIsCnVzZXJOYW1lID0gInVjeCIsCndWID0gIjAwMDAwMDAwMS4wMDAwM
DAwMTEuMDAwMDAwMDAyLip6ZmluYWwiLAp9LAp9LAptcGF0aEEgPSB7CiIvb3B0L29ocGMv
cHViL21vZHVsZWRlcHMvZ251MTItb3Blbm1waTQiLCAiL29wdC9vaHBjL3B1Yi9tb2R1bGV
kZXBzL2dudTEyIiwgIi9vcHQvb2hwYy9w,
LIBFABRIC_LIB=/opt/ohpc/pub/mpi/libfabric/1.13.0/lib,
LESSOPEN=||/usr/bin/lesspipe.sh %s,LMOD_FULL_SETTARG_SUPPORT=no,
LMOD_DIR=/opt/ohpc/admin/lmod/lmod/libexec,LMOD_FAMILY_MPI=openmpi4,
BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/wh
ich --tty-only --read-alias --read-functions --show-tilde --show-dot $@
},
BASH_FUNC_module%%=() { if [ -z "${LMOD_SH_DBG_ON+x}" ]; then
case
"$-" in
vx*)
__lmod_sh_dbg='vx'
;; v)
__lmod_sh_dbg='v'
;; x)
__lmod_sh_dbg='x'
;; esac; fi; if [ -n "${__lmod_sh_dbg:
-}" ]; then
set +$__lmod_sh_dbg; echo "Shell debugging temporarily s
ilenced: export LMOD_SH_DBG_ON=1 for Lmod's output" 1>&2; fi; eval "
$($LMOD_CMD $LMOD_SHELL_PRGM "$@")" && eval "$(${LMOD_SETTARG_CMD:-
:} -s sh)"; __lmod_my_status=$?; if [ -n "${__lmod_sh_dbg:-}" ]; the
n
echo "Shell debugging restarted" 1>&2; set -$__lmod_sh_dbg; fi; un
set __lmod_sh_dbg; return $__lmod_my_status
},
BASH_FUNC_ml%%=() { eval "$($LMOD_DIR/ml_cmd "$@")"
},
_=/opt/pbs/bin/qsub,PBS_O_QUEUE=workq,PBS_O_HOST=cluster
comment = job held, too many failed attempts to run
run_count = 21
Exit_status = -3
Submit_arguments = test.mpi.job
project = _pbs_project_default
Submit_Host = cluster
The job is quite simple:
#PBS -S /bin/bash
#PBS -l select=1:host=compute-1-1:mpiprocs=64
#PBS -l walltime=48:00:00
#PBS -N myscript
#PBS -o myscript.out
#PBS -e myscript.err
cd $PBS_O_WORKDIR/
prun a.out
The a.out file is the classic hello world MPI example.
Looking at the mom_log, it seems that the problem is related to the standard output/error:
08/10/2023 15:16:26;0100;pbs_mom;Job;2023.cluster;compute-1-1 cput=00:00:00 mem=0kb
08/10/2023 15:16:26;0100;pbs_mom;Job;2023.cluster;Obit sent
08/10/2023 15:16:26;0008;pbs_mom;Job;2023.cluster;no active tasks
08/10/2023 15:16:26;0100;pbs_mom;Req;;Type 6 request received from root@10.1.1.1:15001, sock=0
08/10/2023 15:16:26;0080;pbs_mom;Job;2023.cluster;delete job request received
08/10/2023 15:16:26;0008;pbs_mom;Job;2023.cluster;kill_job
(these messages repeat many times)
The folder /var/spool/pbs/undelivered is empty.
The funny part is that this job was running correctly until I decided to add more nodes to the cluster (I was testing if everything was working on a single machine before replicating it). After I added two more nodes, it stopped working, don’t matter which nodes I choose to execute (or even if I don’t select a specific node). The nodes seem to be up:
[test@cluster ~]$ pbsnodes -a
compute-0-0
Mom = compute-0-0.localdomain
ntype = PBS
state = free
pcpus = 64
resources_available.arch = linux
resources_available.host = compute-0-0
resources_available.mem = 131989036kb
resources_available.ncpus = 64
resources_available.vnode = compute-0-0
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
license = l
last_state_change_time = Wed Aug 9 19:30:55 2023
compute-1-0
Mom = compute-1-0.localdomain
ntype = PBS
state = free
pcpus = 128
resources_available.arch = linux
resources_available.host = compute-1-0
resources_available.mem = 527935188kb
resources_available.ncpus = 128
resources_available.vnode = compute-1-0
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
license = l
last_state_change_time = Wed Aug 9 19:59:53 2023
compute-1-1
Mom = compute-1-1.localdomain
ntype = PBS
state = free
pcpus = 256
resources_available.arch = linux
resources_available.host = compute-1-1
resources_available.mem = 527893656kb
resources_available.ncpus = 256
resources_available.vnode = compute-1-1
resources_assigned.accelerator_memory = 0kb
resources_assigned.hbmem = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
license = l
last_state_change_time = Thu Aug 10 15:43:31 2023
Any idea? I’m new in OpenPBS, although I have some experience in cluster administration.
Regards,
Marcelo