Bug 276001

Summary: sysutils/slurm-wlm: slurmctld fails to start
Product: Ports & Packages Reporter: Jason W. Bacon <jwb>
Component: Individual Port(s)Assignee: freebsd-ports-bugs (Nobody) <ports-bugs>
Status: New ---    
Severity: Affects Only Me    
Priority: ---    
Version: Latest   
Hardware: Any   
OS: Any   

Description Jason W. Bacon freebsd_committer freebsd_triage 2023-12-29 19:25:23 UTC
slurmctld fails to start, with error

[2023-12-29T12:08:15.188] Running as primary controller
[2023-12-29T12:08:15.188] error: Error binding slurm stream socket: Invalid argument
[2023-12-29T12:08:15.188] fatal: slurm_init_msg_engine_port: error Invalid argument

This looks like the same issue I had patched for in a previous version:

https://bugs.schedmd.com/show_bug.cgi?id=12524

Was slurmctld tested before committing 23.11.1?
Comment 1 Jason W. Bacon freebsd_committer freebsd_triage 2023-12-29 20:35:34 UTC
The patches below resolve the invalid argument errors on startup, though there may be other bind() calls that need fixing.  There are also issues with slurmd trying to start the cgroups plugin.

diff -ruN --exclude=CVS --exclude=.svn /usr/ports/sysutils/slurm-wlm/files/patch-src_common_net.c /usr/ports/wip/slurm-wlm-devel/files/patch-src_common_net.c
--- /usr/ports/sysutils/slurm-wlm/files/patch-src_common_net.c  1969-12-31 18:00:00.000000000 -0600
+++ /usr/ports/wip/slurm-wlm-devel/files/patch-src_common_net.c 2023-12-29 14:26:37.856355000 -0600
@@ -0,0 +1,25 @@
+--- src/common/net.c.orig      2023-12-29 19:19:05 UTC
++++ src/common/net.c
+@@ -90,11 +90,12 @@ strong_alias(net_stream_listen,            slurm_net_stream_list
+ int net_stream_listen(int *fd, uint16_t *port)
+ {
+       slurm_addr_t sin;
+-      socklen_t len = sizeof(sin);
++      socklen_t len;
+       int val = 1;
+ 
+       /* bind ephemeral port */
+       slurm_setup_addr(&sin, 0);
++      len = sin.ss_len;
+ 
+       if ((*fd = socket(sin.ss_family, SOCK_STREAM, IPPROTO_TCP)) < 0)
+               return -1;
+@@ -219,7 +220,7 @@ static bool _is_port_ok(int s, uint16_t port, bool loc
+               return false;
+       }
+ 
+-      if (bind(s, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
++      if (bind(s, (struct sockaddr *) &addr, addr.ss_len) < 0) {
+               log_flag(NET, "%s: bind() failed on port:%d fd:%d: %m",
+                        __func__, port, s);
+               return false;
diff -ruN --exclude=CVS --exclude=.svn /usr/ports/sysutils/slurm-wlm/files/patch-src_common_slurm__protocol__socket.c /usr/ports/wip/slurm-wlm-devel/files/patch-src_common_slurm__protocol__socket.c
--- /usr/ports/sysutils/slurm-wlm/files/patch-src_common_slurm__protocol__socket.c      1969-12-31 18:00:00.000000000 -0600
+++ /usr/ports/wip/slurm-wlm-devel/files/patch-src_common_slurm__protocol__socket.c     2023-12-29 14:26:37.856433000 -0600
@@ -0,0 +1,20 @@
+--- src/common/slurm_protocol_socket.c.orig    2023-12-29 19:26:12 UTC
++++ src/common/slurm_protocol_socket.c
+@@ -116,7 +116,7 @@ static void _sock_bind_wild(int sockfd)
+       slurm_setup_addr(&sin, RANDOM_USER_PORT);
+ 
+       for (retry=0; retry < PORT_RETRIES ; retry++) {
+-              rc = bind(sockfd, (struct sockaddr *) &sin, sizeof(sin));
++              rc = bind(sockfd, (struct sockaddr *) &sin, sin.ss_len);
+               if (rc >= 0)
+                       break;
+               slurm_set_port(&sin, RANDOM_USER_PORT);
+@@ -511,7 +511,7 @@ extern int slurm_init_msg_engine(slurm_addr_t *addr, b
+               goto error;
+       }
+ 
+-      rc = bind(fd, (struct sockaddr const *) addr, sizeof(*addr));
++      rc = bind(fd, (struct sockaddr const *) addr, addr->ss_len);
+       if (rc < 0) {
+               format_print(log_lvl, "Error binding slurm stream socket: %m");
+               goto error;