#!/usr/bin/env python # this script helps manage users on the nodes of a cluster # it can be invoked by regular users to change their own password on all nodes # it can also be used by an administrator (a user with sudo authority) # to add or remove users on all nodes or to install packages # Cristian Barbarosie, 2021, 2022, 2025 # cristian.barbarosie@gmail.com # https://webpages.ciencias.ulisboa.pt/~cabarbarosie/cluster-config.html import sys def usage () : # print ( sys .argv ) print ( "usage by a regular user :" ) print ( " cluster password # change your own password" ) print ( " cluster exec command # execute a command on all nodes" ) print ( "usage by a user with sudo privileges :" ) print ( " cluster password john # change john's password" ) print ( " cluster add user # add a user" ) print ( " cluster delete user # delete a user" ) print ( " cluster delete user john # delete john" ) print ( " cluster upgrade # upgrade archlinux, calling pacman" ) print ( " cluster install package # install a package, calling pacman" ) print ( " cluster reboot # reboot all machines" ) print ( " cluster sudo command # execute a command on all nodes" ) sys .exit (1) if len ( sys.argv ) < 2 : usage() remote_nodes = [ "beta1", "beta2", "beta3" ] # beta_nodes all_nodes = [ "localhost" ] # alpha_node for n in remote_nodes : all_nodes .append (n) import os, glob, shutil, string, invoke, fabric, getpass, subprocess if sys .argv [1] == "reboot" : admin_pass_dict = { "password": getpass .getpass ( "your password : " ) } sudo_config = fabric .Config ( overrides = { "sudo": admin_pass_dict } ) delay = 10 for node in remote_nodes : rc = fabric .Connection \ ( host = node, config = sudo_config, connect_kwargs = admin_pass_dict ) rc .sudo ( "shutdown -r +" + str(delay) ) delay += 1 rc = fabric .Connection ( host = "localhost", config = sudo_config, \ connect_kwargs = admin_pass_dict ) rc .sudo ( "shutdown -r" ) print ( "you should logout now, come back in half an hour" ) sys .exit (0) if sys .argv [1] == "password" : if len ( sys .argv ) == 2 : curr_pass = getpass .getpass ( "current password : " ) new_pass = getpass .getpass ( "new password : " ) verif_pass = getpass .getpass ( "please retype new password : " ) if new_pass != verif_pass : sys .stderr .write ( "passwords do not match, old password kept\n" ) sys .exit (1) provide_curr_pass = invoke .Responder \ ( pattern = "Current password:", response = curr_pass +'\n') provide_new_pass = invoke .Responder \ ( pattern = "New password:", response = new_pass +'\n') provide_verif_pass = invoke .Responder \ ( pattern = "Retype new password:", response = new_pass +'\n') nb_successful = 0 for node in all_nodes : rc = fabric .Connection \ ( host = node, connect_kwargs = { "password": curr_pass } ) result = rc .run ( "passwd", asynchronous = True, \ watchers = [ provide_curr_pass, provide_new_pass, provide_verif_pass ] ) # result .join() res = result .join() if res .return_code : sys .stderr.write ( "error : " ) for l in res .tail ( "stderr" ) : sys .stderr .write (l) sys .exit (1) nb_successful += 1 del node, rc else : assert len ( sys .argv ) == 3 admin_pass_dict = { "password": getpass .getpass ( "your password : " ) } sudo_config = fabric .Config ( overrides = { "sudo": admin_pass_dict } ) username = sys .argv [2] new_pass = getpass .getpass ( "New password for " + username + " : " ) verif_pass = getpass .getpass ( "Please retype new password for " + username + " : " ) if new_pass != verif_pass : sys .stderr .write ( "passwords do not match, old password kept\n" ) sys .exit (1) provide_new_pass = invoke .Responder \ ( pattern = "New password:", response = user_pass +'\n') provide_verif_pass = invoke .Responder \ ( pattern = "Retype new password:", response = user_pass +'\n') nb_successful = 0 for node in all_nodes : rc = fabric .Connection \ ( host = node, config = sudo_config, connect_kwargs = admin_pass_dict ) # first, ensure username exists exists = True try : rc .run ( "groups " + username, hide = "both" ) except invoke .exceptions .UnexpectedExit : exists = False if not exists : print ( "no user", username, "on" , node ) sys .exit (1) result = rc .sudo ( "passwd " + username, asynchronous = True, \ watchers = [ provide_new_pass, provide_verif_pass ], \ hide = "stderr" ) # result .join() res = result .join() if ( res .return_code ) : sys .stderr .write ( "error : " ) for l in res .tail ( "stderr" ) : sys .stderr .write (l) sys .exit (1) nb_successful += 1 del node, rc print ( "successfully changed password on ", nb_successful, " nodes" ) sys .exit (0) if sys .argv [1] in [ "add", "delete" ] : if len ( sys .argv ) < 3 : usage() if sys .argv [2] != "user" : usage() forbidden_chars = string .punctuation + string .whitespace i = forbidden_chars .find (' ') assert i >= 0 forbidden_chars_fullname = forbidden_chars [:i] + forbidden_chars [i+1:] admin_pass_dict = { "password": getpass .getpass ( "your password : " ) } sudo_config = fabric .Config ( overrides = { "sudo": admin_pass_dict } ) if sys .argv [1] == "add": username = input ( "username to add : " ) for c in username : assert c not in forbidden_chars fullname = input ( "full name : " ) for c in username : assert c not in forbidden_chars_fullname user_pass = getpass .getpass ( "initial password for " + username + " : " ) provide_new_pass = invoke .Responder \ ( pattern = "New password:", response = user_pass +'\n') provide_verif_pass = invoke .Responder \ ( pattern = "Retype new password:", response = user_pass +'\n') rc = fabric .Connection ( host = "localhost", config = sudo_config, \ connect_kwargs = admin_pass_dict ) # first, ensure username does not exist exists = True try : rc .run ( "groups " + username, hide = "both" ) except invoke .exceptions .UnexpectedExit : exists = False if exists : print ( "user", username, "already exists on local node" ) sys .exit (1) rc .sudo ( "useradd --create-home --comment '" + fullname + \ "' --shell /usr/bin/bash " + username, hide = "stderr" ) rc .sudo ( "mkdir --mode=0750 /sci-data/" + username, hide = "stderr" ) rc .sudo ( "chown " + username + ": /sci-data/" + username, hide = "stderr" ) if user_pass : result = rc .sudo ( "passwd " + username, asynchronous = True, \ watchers = [ provide_new_pass, provide_verif_pass ], \ hide = "stderr" ) result .join() res = result .join() if res .return_code : sys .stderr .write ( "error : " ) for l in res .tail ( "stderr" ) : sys .stderr .write (l) sys .exit (1) for node in remote_nodes : rc = fabric .Connection \ ( host = node, config = sudo_config, connect_kwargs = admin_pass_dict ) # first, ensure username does not exist exists = True try : rc .run ( "groups " + username, hide = "both" ) except invoke .exceptions .UnexpectedExit : exists = False if exists : print ( "user", username, "already exists on" , node ) sys .exit (1) rc .sudo ( "useradd --no-create-home --comment '" + fullname + \ "' --shell /usr/bin/bash " + username, hide = "stderr" ) rc .sudo ( "mkdir --mode=0750 /sci-data/" + username, hide = "stderr" ) rc .sudo ( "chown " + username + ": /sci-data/" + username, hide = "stderr" ) if user_pass : result = c .sudo ( "passwd " + username, asynchronous = True, \ watchers = [ provide_new_pass, provide_verif_pass ], \ hide = "stderr" ) # result .join() res = result .join() if ( res .return_code ) : sys .stderr .write ( "error : " ) for l in res .tail ( "stderr" ) : sys .stderr .write (l) sys .exit (1) del node, rc print ( "successfully added user", username, "on", len (all_nodes), "nodes" ) sys .exit (0) assert sys .argv [1] == "delete" if ( len ( sys .argv ) == 4 ) : username = sys .argv [3] else : assert len ( sys .argv ) == 3 username = input ( "username to delete : " ) for c in username : assert c not in forbidden_chars errors = 0 successes = 0 for node in remote_nodes : rc = fabric .Connection ( host = node, port = 27182, config = sudo_config, \ connect_kwargs = admin_pass_dict ) # first, ensure username exists exists = True try : rc .run ( "groups " + username, hide = "both" ) except invoke .exceptions .UnexpectedExit : exists = False if not exists : print ( "no such user", username, "on", node ) errors += 1 continue rc .sudo ( "userdel " + username, hide = "stderr" ) rc .sudo ( "rm -rf /sci-data/" + username, hide = "stderr") successes += 1 del node, rc rc = fabric .Connection ( host = "localhost", port = 27182, config = sudo_config, \ connect_kwargs = admin_pass_dict ) # first, ensure username exists exists = True try : rc .run ( "groups " + username, hide = "both" ) except invoke .exceptions .UnexpectedExit : exists = False if not exists : print ( "no such user", username, "on local node" ) errors += 1 else : rc .sudo ( "userdel --remove " + username, hide = "stderr" ) rc .sudo ( "rm -rf /sci-data/" + username, hide = "stderr") successes += 1 del rc if errors > 0 : print ( "errors on", errors, "nodes" ) if successes > 0 : print ( "successfully deleted user", username, "from", successes, "nodes" ) sys .exit (0) def compute_len_of_iterable ( it ) : counter = 0 for i in it : counter += 1 return counter if sys .argv [1] in [ "install", "upgrade" ] : cache_dir = "/var/cache/pacman/pkg" nfs_cache_dir = "/nfs-home/cache/pacman/pkg" assert os .path .isdir ( nfs_cache_dir ) assert os .getuid() == os .stat ( nfs_cache_dir ) .st_uid pacman_command = "pacman -Syu" if sys .argv [1] == "install" : for pkg in sys .argv [2:] : pacman_command = pacman_command + ' ' + pkg admin_pass_dict = { "password": getpass .getpass ( "your password : " ) } sudo_config = fabric .Config ( overrides = { "sudo": admin_pass_dict } ) print ( "upgrading local node" ) os .chdir ( cache_dir ) files_before = set() for pkg in glob .iglob ( "*" ) : assert os .path .isfile ( pkg ) files_before .add ( pkg ) print ( len ( files_before ), "files initially in the cache" ) rc = fabric .Connection \ ( host = "localhost", config = sudo_config, connect_kwargs = admin_pass_dict ) rc .sudo ( pacman_command ) print ( compute_len_of_iterable ( glob .iglob ( "*" ) ), end = " " ) print ( "files in the cache after calling pacman" ) rc .sudo ( "paccache -rk1" ) print ( compute_len_of_iterable ( glob .iglob ( "*" ) ), end = " " ) print ( "files in the cache after calling paccache" ) del rc counter = 0 for pkg in glob .iglob ( "*" ) : if pkg in files_before : continue assert os .path .isfile ( pkg ) shutil .copy ( pkg, nfs_cache_dir ) counter += 1 # copied files will belong to calling user, not to root print ( "copied", counter, "files to nfs cache" ) for node in remote_nodes : print ( "upgrading", node ) rc = fabric .Connection \ ( host = node, config = sudo_config, connect_kwargs = admin_pass_dict ) rc .sudo ( "cp " + nfs_cache_dir + "/* " + cache_dir + "/" ) # copied files will belong to root rc .sudo ( pacman_command ) rc .sudo ( "rm " + cache_dir + "/*" ) print () del node, rc os .chdir ( nfs_cache_dir ) for pkg in glob .iglob ( "*" ) : os .remove ( pkg ) sys .exit (0) if sys .argv [1] == "exec" : command = "" for arg in sys .argv [2:] : command = command + " " + arg curr_pass = getpass .getpass ( "your password : " ) for node in all_nodes : print ( "on", node, ": " ) rc = fabric .Connection \ ( host = node, connect_kwargs = { "password": curr_pass } ) rc .run ( command ) del node, rc sys .exit (0) if sys .argv [1] == "sudo" : command = "" for arg in sys .argv [2:] : command = command + " " + arg admin_pass_dict = { "password": getpass .getpass ( "your password : " ) } sudo_config = fabric .Config ( overrides = { "sudo": admin_pass_dict } ) for node in all_nodes : print ( "on", node, ": " ) rc = fabric .Connection \ ( host = node, config = sudo_config, connect_kwargs = admin_pass_dict ) rc .sudo ( command ) print () del node, rc sys .exit (0) usage()