Dear adarsh,
Here is the .e file created by the job (as mentionned before, it is killed due to the exceeded walltime):
=>> PBS: job killed: walltime 151 exceeded limit 120
A fatal error has occurred in cfx5solve:
cfx5solve was killed by the user.
The application log file (in this case, the CFX out file) is completely normal. It is just that like the process stops when it reaches the “parallel” phase of the process:
+--------------------------------------------------------------------+
| CPU Time Requirements of Partitioner |
+--------------------------------------------------------------------+
Preparations 1.21E-02 2.7 %
Low-level Mesh Partitioning 1.65E-03 0.4 %
File Reading 1.21E-01 27.3 %
Partition Smoothing 6.82E-03 1.5 %
Topology - Domain Interface 9.00E-06 0.0 %
Topology - Global 4.25E-04 0.1 %
Topology - Element/Face/Patch 1.16E-03 0.3 %
Topology - Vertex 1.03E-04 0.0 %
Data Compression 7.60E-05 0.0 %
Variable Updates 2.05E-03 0.5 %
File Writing 3.66E-03 0.8 %
Miscellaneous 2.93E-01 66.3 %
--------
Total 4.42E-01
+--------------------------------------------------------------------+
| Job Information at End of Run |
+--------------------------------------------------------------------+
Host computer: SERVER2 (PID:2175371)
Job finished: Wed Apr 27 08:55:20 2022
Total wall clock time: 8.980E-01 seconds
or: ( 0: 0: 0: 0.898 )
( Days: Hours: Minutes: Seconds )
+--------------------------------------------------------------------+
| |
| Solver |
| |
+--------------------------------------------------------------------+
+--------------------------------------------------------------------+
| A fatal error has occurred in cfx5solve: |
| |
| cfx5solve was killed by the user. |
+--------------------------------------------------------------------+
Here is the environment variables used in batch
LD_LIBRARY_PATH=/nfs/soft/ansys_inc/v211/licensingclient/linx64/lib/usr/lib64:/opt/pbs/lib
LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:
SSH_CONNECTION=10.42.10.163 33084 10.42.10.164 22
MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD
LANG=en_US.UTF-8
HISTCONTROL=ignoredups
HOSTNAME=SERVER2
OLDPWD=/nfs/users/verdebs
which_declare=declare -f
XDG_SESSION_ID=473
MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl
USER=verdebs
AWP_ROOT211=/nfs/soft/ansys_inc/v211/
PWD=/nfs/users/verdebs/CFXtst
SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass
HOME=/nfs/users/verdebs
SSH_CLIENT=10.42.10.163 33084 22
XDG_DATA_DIRS=/nfs/users/verdebs/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share
LOADEDMODULES=
SSH_TTY=/dev/pts/0
MAIL=/var/spool/mail/verdebs
TERM=xterm
SHELL=/bin/bash
SHLVL=1
MANPATH=::/opt/pbs/share/man
MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles
LOGNAME=verdebs
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/2000/bus
XDG_RUNTIME_DIR=/run/user/2000
MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1
PATH=/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pbs/bin:/nfs/users/verdebs/.local/bin:/nfs/users/verdebs/bin
MODULESHOME=/usr/share/Modules
HISTSIZE=1000
LESSOPEN=||/usr/bin/lesspipe.sh %s
_=/usr/bin/env
Here is the command used
cfx5solve -example StaticMixer -parallel -part 2 -par-local -start-method "Open MPI Local Parallel"
If it can help, here is the environment variables during the PBS run:
PBS_ENVIRONMENT=PBS_INTERACTIVE
LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:
LD_LIBRARY_PATH=/nfs/soft/ansys_inc/v211/licensingclient/linx64/lib/usr/lib64:/opt/pbs/lib
PBS_O_LANG=en_US.UTF-8
SSH_CONNECTION=10.42.47.245 65371 10.42.10.163 22
MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD
LANG=en_US.UTF-8
HISTCONTROL=ignoredups
DISPLAY=localhost:11.0
HOSTNAME=SERVER2
OLDPWD=/nfs/users/verdebs
PBS_O_HOME=/nfs/users/verdebs
PBS_JOBID=239.SERVER1
PBS_JOBNAME=STDIN
NCPUS=2
PBS_O_PATH=/usr/share/Modules/bin:.:/opt/pbs/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pbs/bin:/nfs/users/verdebs/.local/bin:/nfs/users/verdebs/bin
which_declare=declare -f
XDG_SESSION_ID=3070
MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl
PBS_O_WORKDIR=/nfs/users/verdebs/CFXtst
USER=verdebs
AWP_ROOT211=/nfs/soft/ansys_inc/v211/
PBS_NODEFILE=/var/spool/pbs/aux/239.SERVER1
PBS_TASKNUM=1
PWD=/nfs/users/verdebs/CFXtst
SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass
HOME=/nfs/users/verdebs
SSH_CLIENT=10.42.47.245 65371 22
PBS_MOMPORT=15003
XDG_DATA_DIRS=/nfs/users/verdebs/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share
PBS_JOBCOOKIE=418777613DB694897DEB82F50D9D6593
PBS_O_SHELL=/bin/bash
TMPDIR=/var/tmp/pbs.239.SERVER1
LOADEDMODULES=
SSH_TTY=/dev/pts/1
PBS_O_QUEUE=TH
MAIL=/var/spool/mail/verdebs
SHELL=/bin/bash
TERM=xterm
SHLVL=2
PBS_O_HOST=server1.tes.local
PBS_O_SYSTEM=Linux
MANPATH=::/opt/pbs/share/man:/opt/pbs/share/man
PBS_O_LOGNAME=verdebs
PBS_NODENUM=0
MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles
GDK_BACKEND=x11
PBS_JOBDIR=/nfs/users/verdebs
LOGNAME=verdebs
DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-1sdDq9P79D,guid=fcb6389845d24d7ebaf0350462678f96
XDG_RUNTIME_DIR=/run/user/2000
MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1
PATH=/usr/share/Modules/bin:.:/opt/pbs/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pbs/bin:/nfs/users/verdebs/.local/bin:/nfs/users/verdebs/bin:/opt/pbs/bin:/nfs/users/verdebs/.local/bin:/nfs/users/verdebs/bin
PBS_QUEUE=TH
MODULESHOME=/usr/share/Modules
HISTSIZE=1000
OMP_NUM_THREADS=2
LESSOPEN=||/usr/bin/lesspipe.sh %s
PBS_O_MAIL=/var/spool/mail/verdebs
_=/usr/bin/env
Once the job starts, I do see on SERVER2 the process named 239.SERVER1.SC. I also sees the usual ANSYS subprocesses kicked in. Once the “parallel” solving process should appear there is only 1 mpirun process starting instead of the 2 requested…
I do not know if this can be helpful, but we running PBS on a CentOS Stream 8. Could it be that the system prohibits PBS to initiate some process?
I will try to increase the log_events values as suggested and look at the variaous log files trying to detect something unsual…
Thank you in advance.