I’m running openpbs 20.0.1 and am seeing a memsw write permission denied error on all compute nodes (and the nodes are swapping pretty heavily when running array jobs such as molpro 2020):
06/23/2022 09:25:46;0100;pbs_python;Hook;pbs_python;main: Event type is execjob_begin, job ID is 146069.flux
06/23/2022 09:25:46;0100;pbs_python;Hook;pbs_python;create_job: Creating directory /sys/fs/cgroup/blkio,cpuacct,memory,freezer/pbs_jobs.service/jobid/146069.flux/
06/23/2022 09:25:46;0100;pbs_python;Hook;pbs_python;create_job: Creating directory /sys/fs/cgroup/cpuset/pbs_jobs.service/jobid/146069.flux/
** ISSUE HERE **
06/23/2022 09:25:47;0002;pbs_python;Hook;pbs_python;write_value: Permission denied: /sys/fs/cgroup/blkio,cpuacct,memory,freezer/pbs_jobs.service/jobid/146069.flux/memory.memsw.limit_in_bytes
06/23/2022 09:25:47;0008;pbs_python;Job;146069.flux;update_job_usage: CPU percent: 0
06/23/2022 09:25:47;0008;pbs_python;Job;146069.flux;update_job_usage: CPU usage: 0.000 secs
06/23/2022 09:25:47;0008;pbs_python;Job;146069.flux;update_job_usage: Memory usage: mem=0b
06/23/2022 09:25:47;0008;pbs_python;Job;146069.flux;update_job_usage: No max vmem data
06/23/2022 09:25:47;0008;pbs_python;Job;146069.flux;update_job_usage: No vmem fail count data
06/23/2022 09:25:47;0100;pbs_python;Hook;pbs_python;Hook ended: pbs_cgroups, job ID 146069.flux, event_type 64 (elapsed time: 0.3918)
06/23/2022 09:25:47;0008;pbs_mom;Job;146069.flux;no active tasks
06/23/2022 09:25:47;0080;pbs_mom;Job;146069.flux;running prologue
06/23/2022 09:25:47;0100;pbs_python;Hook;pbs_python;main: Event type is execjob_launch, job ID is 146069.flux
06/23/2022 09:25:47;0100;pbs_python;Hook;pbs_python;Hook ended: pbs_cgroups, job ID 146069.flux, event_type 2048 (elapsed time: 0.2916)
06/23/2022 09:25:47;0008;pbs_mom;Job;146069.flux;Started, pid = 29607
06/23/2022 09:26:48;0008;pbs_python;Job;146069.flux;update_job_usage: CPU percent: 0
06/23/2022 09:26:48;0008;pbs_python;Job;146069.flux;update_job_usage: CPU usage: 639.773 secs
06/23/2022 09:26:48;0008;pbs_python;Job;146069.flux;update_job_usage: Memory usage: mem=1600744kb
06/23/2022 09:26:48;0008;pbs_python;Job;146069.flux;update_job_usage: No max vmem data
06/23/2022 09:26:48;0008;pbs_python;Job;146069.flux;update_job_usage: No vmem fail count data
06/23/2022 09:28:50;0008;pbs_python;Job;146069.flux;update_job_usage: CPU percent: 426
06/23/2022 09:28:50;0008;pbs_python;Job;146069.flux;update_job_usage: CPU usage: 2051.048 secs
# file: sys/fs/cgroup/blkio,cpuacct,memory,freezer/pbs_jobs.service/jobid/146069.flux/
# owner: root
# group: root
user::rwx
group::r-x
other::r-x
````Preformatted text`
the pbs_cgroups config is as follows:
{
"cgroup_prefix" : "pbs_jobs",
"exclude_hosts" : [],
"exclude_vntypes" : ["no_cgroups"],
"run_only_on_hosts" : [],
"periodic_resc_update" : true,
"vnode_per_numa_node" : false,
"online_offlined_nodes" : true,
"use_hyperthreads" : false,
"ncpus_are_cores" : false,
"cgroup" : {
"cpuacct" : {
"enabled" : true,
"exclude_hosts" : [],
"exclude_vntypes" : []
},
"cpuset" : {
"enabled" : true,
"exclude_cpus" : [],
"exclude_hosts" : [],
"exclude_vntypes" : [],
"mem_fences" : true,
"mem_hardwall" : false,
"memory_spread_page" : false
},
"devices" : {
"enabled" : false,
"exclude_hosts" : [],
"exclude_vntypes" : [],
"allow" : [
"b *:* rwm",
"c *:* rwm"
]
},
"hugetlb" : {
"enabled" : false,
"exclude_hosts" : [],
"exclude_vntypes" : [],
"default" : "0MB",
"reserve_percent" : 0,
"reserve_amount" : "0MB"
},
"memory" : {
"enabled" : true,
"exclude_hosts" : [],
"exclude_vntypes" : [],
"soft_limit" : false,
"default" : "256MB",
"reserve_percent" : 0,
"reserve_amount" : "64MB"
},
"memsw" : {
"enabled" : true,
"exclude_hosts" : [x],
"exclude_vntypes" : [],
"default" : "256MB",
"reserve_percent" : 0,
"reserve_amount" : "64MB"
}
}
}
Any help and or recommendations would be greatly appreciated.
Thanks