############################################################----------------------------------------------------------------------- Mon Nov 8 14:11:05 PST 1999 Documentation on PIII Linux cluster 3 front-ends 64 nodes # Killing XXX's jobs foreach u (XXX) vnallbgCommand "ps -elf | grep $u | grep -v grep | nth 4 | pre kill -9 | csh" end ############################################################ (1) vn???.physics.ubc.ca 2. the IP information relating to this project is: Subnet: 142.103.237.0 Netmask: 255.255.255.0 Gateway: 142.103.237.254 142.103.237.252 vnswitch.physics.ubc.ca 142.103.237.225 vn225.physics.ubc.ca vnf1.physics.ubc.ca 142.103.237.226 vn226.physics.ubc.ca vnf2.physics.ubc.ca 142.103.237.227 vn227.physics.ubc.ca vnf3.physics.ubc.ca 142.103.237.1 vn1.physics.ubc.ca 142.103.237.2 vn2.physics.ubc.ca 142.103.237.3 142.103.237.4 142.103.237.5 142.103.237.6 142.103.237.7 142.103.237.8 142.103.237.9 142.103.237.10 142.103.237.11 142.103.237.12 142.103.237.13 142.103.237.14 142.103.237.15 142.103.237.16 142.103.237.17 142.103.237.18 142.103.237.19 142.103.237.20 142.103.237.21 142.103.237.22 142.103.237.23 142.103.237.24 142.103.237.25 142.103.237.26 142.103.237.27 142.103.237.28 142.103.237.29 142.103.237.30 142.103.237.31 142.103.237.32 142.103.237.33 142.103.237.34 142.103.237.35 142.103.237.36 142.103.237.37 142.103.237.38 142.103.237.39 142.103.237.40 142.103.237.41 142.103.237.42 142.103.237.43 142.103.237.44 142.103.237.45 142.103.237.46 142.103.237.47 142.103.237.48 142.103.237.49 142.103.237.50 142.103.237.51 142.103.237.52 142.103.237.53 142.103.237.54 142.103.237.55 142.103.237.56 142.103.237.57 142.103.237.58 142.103.237.59 142.103.237.60 142.103.237.61 142.103.237.62 142.103.237.63 142.103.237.64 vn64.physics.ubc.ca ============================================================ Tue Nov 9 16:10:17 PST 1999 Setting up cluster switch ============================================================ # Communicating via serial port attached to laptop minicom Console Main Menu 2 Switch Management Access Configuration Time Protocol Config [DHCP] Disabled IP Config : Manual IP address: 142.103.237.252 Subnet Mask: 255.255.255.0 Gateway: 142.103.237.254 ============================================================ Fri Nov 12 12:49:49 PST 1999 ============================================================ (1) First vnfe[123] and vn[1-16] up and running, configuring laptop as vnlap.physics.ubc.ca (leave hostname as rar052.ubc.ca) IP address 142.103.237.230 Subnet mask 255.255.255.0 Gateway 142.103.237.254 DNS 142.103.237.1 137.82.1.1 (2) Laptop now connected to cluster, continue with secondary setup of vnfe[123], but first need to add laptop to floppy setup and distribute to vnfe1, ... ============================================================ Fri Nov 12 17:30:39 PST 1999 ============================================================ (1) Cluster now on network, re-setting DNS default domain physics.ubc.ca nameserver 1 137.82.1.1 nameserver 2 137.82.28.3 ============================================================ Fri Nov 12 13:10:31 PST 1999 ============================================================ # As root@vnfe[123] THE FOLLOWING IS NOW DONE VIA THE SETUP SCRIPT cd chsh -s /bin/tcsh mv .tcshrc .tcshrc.O mv .vimrc .vimrc.O #Modified /etc/csh.cshrc as per ../LINUX/README ftp 142.103.237.230 # login as matt cd system/UNIX_SETUP/dist/linux_cshrc get csh.cshrc /etc/csh.cshrc quit # Enable Remote Shell vi /etc/inetd.conf shell stream tcp nowait root /usr/sbin/tcpd in.rshd killall -HUP inetd # Make .rhosts cat > ~/.rhosts bh1 bh2 bh3 bh4 bh5 bh6 laplace bh1.physics.ubc.ca matt bh2.physics.ubc.ca matt bh3.physics.ubc.ca matt bh4.physics.ubc.ca matt bh5.physics.ubc.ca matt bh6.physics.ubc.ca matt laplace.physics.ubc.ca matt END OF STUFF DONE BY STARTUP SCRIPT # Install ssh mkdir -p /var/tmp/install cd !$ ftp vnlap cd system/UNIX_SETUP/dist prompt mget ssh-1.2.27.tar.gz mpich.tar.gz quit tar zxf ssh-1.2.27.tar.gz cd ssh-1.2.27 ./configure --prefix=/usr/local make install cd /etc/rc.d/ ftp laplace # login as matt cd /usr2/people/matt/system/UNIX_SETUP/dist/linux_rc.local get rc.local quit /etc/rc.d/rc.local start cd; cd .ssh /usr/local/bin/ssh-keygen # Saved and distributed keys # Goto "as matt" cd cd .ssh cp ~matt/.ssh/authorized_keys . # Installation scripts cd /home/matt/system/UNIX_SETUP/root@bh1.physics.ubc.ca/install scp Install Makefile root@vnfe1.physics.ubc.ca:/usr/tmp/install scp Install Makefile root@vnfe2.physics.ubc.ca:/usr/tmp/install scp Install Makefile root@vnfe3.physics.ubc.ca:/usr/tmp/install # NFS---Typical exportfs entry (different than SGI) ######################################################################## Delay NFS set-up until we get on the network ######################################################################## #----------------------------------------------------------- /etc/fstab #----------------------------------------------------------- #As root@vnfe1 mkdir -p /d/vnfe1 ln -s /home /d/vnfe1/home ln -s /home2 /d/vnfe1/home2 mkdir -p /d/vnfe2/home /d/vnfe2/home2 /d/vnfe3/home /d/vnfe3/home2 vnfe2:/home /d/vnfe2/home nfs rw,bg,hard,intr 0 0 vnfe2:/home2 /d/vnfe2/home2 nfs rw,bg,hard,intr 0 0 vnfe3:/home /d/vnfe3/home nfs rw,bg,hard,intr 0 0 vnfe3:/home2 /d/vnfe3/home2 nfs rw,bg,hard,intr 0 0 #As root@vnfe2 mkdir -p /d/vnfe2 ln -s /home /d/vnfe2/home ln -s /home2 /d/vnfe2/home2 mkdir -p /d/vnfe1/home /d/vnfe1/home2 /d/vnfe3/home /d/vnfe3/home2 vnfe1:/home /d/vnfe1/home nfs rw,bg,hard,intr 0 0 vnfe1:/home2 /d/vnfe1/home2 nfs rw,bg,hard,intr 0 0 vnfe3:/home /d/vnfe3/home nfs rw,bg,hard,intr 0 0 vnfe3:/home2 /d/vnfe3/home2 nfs rw,bg,hard,intr 0 0 #As root@vnfe1 mkdir -p /d/vnfe3 ln -s /home /d/vnfe3/home ln -s /home2 /d/vnfe3/home2 mkdir -p /d/vnfe1/home /d/vnfe1/home2 /d/vnfe2/home /d/vnfe2/home2 vnfe1:/home /d/vnfe1/home nfs rw,bg,hard,intr 0 0 vnfe1:/home2 /d/vnfe1/home2 nfs rw,bg,hard,intr 0 0 vnfe2:/home /d/vnfe2/home nfs rw,bg,hard,intr 0 0 vnfe2:/home2 /d/vnfe2/home2 nfs rw,bg,hard,intr 0 0 # Make sure NFS is enabled (linuxconf) rrvi /usr/local/util/machines/machines.LINUX /etc/exports /home vn*.physics.ubc.ca(rw) bh*.physics.ubc.ca(rw) laplace.physics.ubc.ca(rw) godel.physics.ubc.ca(rw) /home2 vn*.physics.ubc.ca(rw) bh*.physics.ubc.ca(rw) laplace.physics.ubc.ca(rw) godel.physics.ubc.ca(rw) rrcmd 'exportfs -uav; exportfs -av' rrcmd 'mount -a; df' rrvi /etc/fstab ######################################################################## NFS up and apparently working ######################################################################## # MPI including patch to server cdi tar zxf mpich.tar.gz cd mpich ./configure -opt="-O" -nodevdebug --prefix=/usr/local scp matt@vnlap:~/system/UNIX_SETUP/dist/mpich_patch_linux/servers/serv_p4.c /var/tmp/install/mpich/mpid/ch_p4/p4/servers/ scp matt@vnlap:~/system/UNIX_SETUP/dist/mpich_patch_linux/server/serv_p4.c /var/tmp/install/mpich/mpid/server make make install ln -s /usr/local/build/LINUX/ch_p4/lib/libmpich.a /usr/local/lib/libmpi.a cp /var/tmp/install/mpich/include/mpidefs.h /usr/local/include vi /usr/local/util/machines/machines.LINUX vnfe1.physics.ubc.ca vnfe2.physics.ubc.ca vnfe3.physics.ubc.ca setenv HOSTNAMES /usr/local/util/machines/machines.LINUX rrcmd '(cd /var/tmp/install/mpich/examples/basic; make)' # MPI TESTING cd /var/tmp/install/mpich/examples/basic mpirun -np 2 cpi Process 0 on vnfe1.physics.ubc.ca Process 1 on vnfe2.physics.ubc.ca pi is approximately 3.1416009869231241, Error is 0.0000083333333309 wall clock time = 0.000729 mpirun -np 3 cpi Process 0 on vnfe1.physics.ubc.ca Process 1 on vnfe2.physics.ubc.ca Process 2 on vnfe3.physics.ubc.ca pi is approximately 3.1416009869231249, Error is 0.0000083333333318 wall clock time = 0.000848 # Perftest with 1 and 2 rrcmd '(cd /var/tmp/install/mpich/examples/perftest; make)' cd /var/tmp/install/mpich/examples/perftest date; mpirun -np 2 ./mpptest -gnuplot -rate -size 16000 400000 16000 -fname long.gpl; date ********** NEXT #----------------------------------------------------------- # RNPL #----------------------------------------------------------- cdi Install rnpl cd rnpl make full #----------------------------------------------------------- # RNPL BENCHMARKS (see matt@laplace:~/system/BENCHMARKS/workstations_varsity) #----------------------------------------------------------- #----------------------------------------------------------- # HDF (for DAGH/GRACE) #----------------------------------------------------------- cd /usr2/people/matt/system/UNIX_SETUP/dist netscape ftp://ftp.ncsa.uiuc.edu/HDF/HDF/HDF4.1r3/tar/HDF4.1r3.tar.gz # As root@bh[5] cdi ssh matt@laplace "cd /usr2/people/matt/system/UNIX_SETUP/dist; zcat HDF4.1r3.tar.Z" | tar xf - cd HDF4.1r3 configure --prefix=/usr/local make make install ------------------------------------------------------------ As matt@vnfe[123] ------------------------------------------------------------ cd; mkdir .ssh cd .ssh /usr/local/bin/ssh-keygen # Distribute keys to vnlap:/home/matt/system/UNIX_SETUP/ssh/authorized_keys ftp vnlap cd /home/matt/system/UNIX_SETUP/ssh/authorized_keys get authorized_keys # Re-install sshd on laptop cd /home/matt/system/UNIX_SETUP/matt@bh1.physics.ubc.ca/matt tar cf - . | ssh matt@vnfe1 'tar xf -' tar cf - . | ssh matt@vnfe2 'tar xf -' tar cf - . | ssh matt@vnfe3 'tar xf -' ------------------------------------------------------------ As root@bh6 ------------------------------------------------------------ cdi Install rvs setenv PACK rvs; ./Install $PACK; cd $PACK; make install # This only necessary due to advocated setting of RNPL_FLIBS setenv PACK rvsso; cdi; ./Install $PACK; cd $PACK; make install setenv PACK vutil; cdi; ./Install $PACK; cd $PACK; make install setenv PACK utilio; cdi; ./Install $PACK; cd $PACK; make install setenv PACK utilmath; cdi; ./Install $PACK; cd $PACK; make install setenv PACK emkgcnad; cdi; ./Install $PACK; cd $PACK; make install NEXT_ACTIONS INSTALL: Java Development Kit/scivis lapack/linpack/odepack/fftpack IBM data explorer ============================================================ Wed Nov 10 18:24:03 PST 1999 ============================================================ (1) Made ~matt /d/bh1/home/matt on bh[123456] Beware permission problems (particularly no-execute on home directories etc. ******** INCLUDE NUMERIC IP ADDRESSES FOR laplace.physics.ubc.ca godel.physics.ubc.ca bh[123456].physics.ubc.ca rar0502.net.ubc.ca in hosts.allow hosts on vnfe[123] ############################################################ Sat Nov 13 09:39:07 PST 1999 ############################################################ (1) From /usr2/people/matt/system/UNIX_SETUP/cluster/scripts.3/README ============================================================ Fri Nov 12 19:52:34 PST 1999 Scripts for tertiary configuration of nodes (ssh) ============================================================ Need to modify basic set up script to update rc.local cd /etc/rc.d cp rc.local rc.local.O vi rc.local if [ -f /usr/local/sbin/sshd ]; then /usr/local/sbin/sshd & fi /etc/rc.d/rc.local start ============================================================ (1) do_setup ftp laplace matt cd /usr2/people/matt/system/UNIX_SETUP/cluster/scripts.3 get do_setup quit sh do_setup /root # Accumulate authorized_keys in ... (2) Problem with 'vn8' *** Can't find server name for address 142.103.237.1: No response from server *** Can't find server name for address 137.82.1.1: No response from server *** Default servers are not available No router --- 142.103.237.254 route add 142.103.237.254 doesn't work # Enabling -x in /etc/sysconfig/network-scripts/ifup vn[78] (link to # /sbin/ifup # Putting tracing in /sbin/ifup echo "ifup: IPX = <$IPX>" echo "ifup: CONFIG = <$CONFIG>" echo "ifup: DEVICE = <$DEVICE>" # Putting tracing in ifcfg-eth0 # Examination of ifcfg-eth0 suggests possible solution #BEFORE DEVICE="eth0" IPADDR="142.103.237.8" NETMASK="255.255.255.0" ONBOOT="yes" BOOTPROTO="none" IPXNETNUM_802_2="" IPXPRIMARY_802_2="no" IPXACTIVE_802_2="no" IPXNETNUM_802_3="" IPXPRIMARY_802_3="no" IPXACTIVE_802_3="no" IPXNETNUM_ETHERII="" IPXPRIMARY_ETHERII="no" IPXACTIVE_ETHERII="no" IPXNETNUM_SNAP="" IPXPRIMARY_SNAP="no" IPXACTIVE_SNAP="no" #AFTER Manual route add default gw 142.103.237.254 eth0 works, could add to rc.local, but need to figure out what's going on hostname -s doesn't work on vn8 (3) # As matt@laplace.physics.ubc.ca cd .ssh foreach i (`iota 16`) scp authorized_keys root@142.103.237.${i}:~/.ssh end (4) Possible problem with vn5, vn6, ... Don't't prompt for password without authorized_keys (using .rhosts) ########################################################################### NFS ########################################################################### (5) Implemented some "vn" scripts for "remote" execution. Still something wrong with vn8 cds; vnRemote vnTest . . . vn6.physics.ubc.ca -- Sat Nov 13 10:24:30 PST 1999 + scp vnTest root@142.103.237.7:/tmp + ssh root@142.103.237.7 /tmp/vnTest vn7.physics.ubc.ca -- Sat Nov 13 10:23:15 PST 1999 + scp vnTest root@142.103.237.8:/tmp + ssh root@142.103.237.8 /tmp/vnTest /usr/X11R6/bin/xauth: (stdin):1: bad display name "vn8.physics.ubc.ca:10.0" in "add" command vn8.physics.ubc.ca -- Sat Nov 13 10:23:36 PST 1999 + scp vnTest root@142.103.237.9:/tmp + ssh root@142.103.237.9 /tmp/vnTest vn9.physics.ubc.ca -- Sat Nov 13 10:22:27 PST 1999 + scp vnTest root@142.103.237.10:/tmp + ssh root@142.103.237.10 /tmp/vnTest vn10.physics.ubc.ca -- Sat Nov 13 11:21:49 PST 1999 + scp vnTest root@142.103.237.11:/tmp + ssh root@142.103.237.11 /tmp/vnTest . . . vnNFSsetup worked like a charm---modulo Xauthority problem on vn8 ############################################################ Sat Nov 13 13:28:41 PST 1999 ############################################################ (1) vn8 problem may be due to improper network configuration, in particular primary name+domain was not set Missing 142.103.237.8 vn8.physics.ubc.ca vn7 142.103.237.9 vn9.physics.ubc.ca vn7 in /etc/hosts **** vn8 problem apparently fixed RNPL, utilities, start setting up user accounts ############################################################ Sat Nov 13 15:39:27 PST 1999 ############################################################ (1) Setting up matt accounts on all machines via vnRemote vnSetupMatt # vnSetupMatt # Didn't work !! cd .ssh foreach i (`iota 16`) (ssh matt@142.103.237.$i "cat ~/.ssh/identity.pub") >> authorized_keys_matt_vn end # Problem with vn4 # Problem with vn1 (bh1) ############################################################ DO future installation from /usr2/people/matt/system/UNIX_SETUP/matt@vn.physics.ubc.ca/ ############################################################ ############################################################ Sun Nov 14 06:34:40 PST 1999 ############################################################ (0) vnNewUsers Make sure a+rx permission is set on home directories Current procedure somewhat awkward Mastering/mirroring in matt@laplace:/usr2/people/matt/system/vn/image/master/etc Actual creation from matt@vnfe1:/home/matt/system/vn/accounts PROCEDURE (1) # As matt@laplace cd /usr2/people/matt/system/vn/image/master/etc make import (2) # As matt@laplace cd /usr2/people/matt/system/vn/accounts # Make account file foo:Phys^98:690:600:Foo bar:/d/vnfe1/home/foo:/bin/tcsh bar:Phys^98:691:600:Bar barella:/d/vnfe2/home/bar:/bin/tcsh (3) # As matt@vnfe1 cd /home/matt/system make update_vn cd /home/matt/system/vn/accounts vnNewUsers SEE HOWTO FOR CURRENT "STREAMLINED" PROCEDURE (1) HOMEMWC set to /home/matt instead of /d/vnfe1/home/matt Keep master .?? files for root in /usr2/people/matt/system/vn/image/master/etc distribute via vnDistEtc Mirror in /usr2/people/matt/system/UNIX_SETUP/cluster/scripts (2) SOFTWARE INSTALLATION # Note: Encountered difficulties "Install"ing via ssh and tar, # modified Install to work directly from ~matt on vnfe1; # must copy .Z files from matt@laplace to matt@vnfe1 prior to # install MPI (finish and test later) RNPL rnpl vnallCommand "cdi; Install rnpl" MWC_UTIL rvs vnallCommand "cdi; Install rvs" vnallCommand "cd /usr/local/lib; strings libvs.a" > /tmp/rvs vi /tmp/rvs OK rvsso vnallCommand "cdi; Install rvsso" vnallCommand "cd /usr/local/lib; strings libvsso.a" > /tmp/rvsso vi /tmp/rvsso OK vutil vnallCommand "cdi; Install vutil" vnallCommand "cd /usr/local/lib; strings libvutil.a" > /tmp/vutil vi /tmp/vutil OK utilio (x 2) vnallCommand "cdi; Install utilio" vnallCommand "cd /usr/local/lib; strings libutilio.a" > /tmp/utilio vi /tmp/utilio OK utilmath vnallCommand "cdi; Install utilmath" vnallCommand "cd /usr/local/lib; strings libutilmath.a" > /tmp/utilmath vi /tmp/utilmath OK emkgcnad vnallCommand "cdi; Install emkgcnad" vnallCommand "cd /usr/local/bin; ls -lt emkgcnad femkgcnad" > /tmp/emkgcnad vi /tmp/emkgcnad vnallCommand "test -f /usr/local/bin/emkgcnad && /bin/rm -f /usr/local/bin/emkgcnad; ls -lt /usr/local/bin/emkgcnad || echo Deleted" vnallCommand "test -f /usr/local/bin/femkgcnad && /bin/rm -f /usr/local/bin/femkgcnad; ls -lt /usr/local/bin/femkgcnad || echo Deleted" # Difficulty may be with 'remote tar'---try explicit copy of file from # vnfe1 # Modified and distributed 'Install' (3) /etc/csh.cshrc BE VERY CAREFUL MODIFYING THIS FILE! #ADDED setenv TMP /tmp/${USER} test -d $TMP || mkdir $TMP setenv RNPL_RNPL "/usr/local/bin/rnpl" setenv RNPL_F77 "f77 -O2" setenv RNPL_F77LOAD "f77 -O2 -L/home/matt/lib" setenv RNPL_F77PP "touch" setenv RNPL_FLIBS "-lrnpl -lvsso" setenv CFLAGS "-O2" setenv F77FLAGS "-O2" setenv HOSTNAMES /usr/local/util/machines/machines.LINUX TODO --- WEB SITE WITH BRIEF OPERATING INSTRUCTIONS (done) --- BANNERS --- SCIENTIFIC SOFTWARE ############################################################ Sun Nov 14 17:02:55 PST 1999 ############################################################ (1) # Distribution status # Mastering system/vn # on vn and system/UNIX_SETUP/cluster # on laplace. # Change system/vn -> update matt@laplace:~/system/vn cd system make export_vn # matt@laplace:~/system/vn -> matt@laplace:~/system/UNIX_SETUP/cluster/scripts‰ cd /usr2/people/matt/system/vn/image/master/etc make export (1a) NOTE ***: Need distinct system/Makefiles on distinct systems, time to start using CVS! Aliased # Master as matt@vnfe1 etc -> cd's to /home/matt/system/vn/image/master/etc exetc -> etc and 'make export' vn -> cd's to system/vn exvn -> cd system and 'make update_vn' ############################################################ (2) # To add user (with plain text password) ssh matt@vnfe1.physics.ubc.ca ssh matt@142.103.237.225 nu # Make new user file vnNewUsers # Mirror master info on laplace/floppy cd system make update_vn # To change passwd---as matt@vnfe1 # Unprotect/edit/protect shadow then distribute. etc sola vs vnDistEtc shadow ############################################################ Current password hack root@vnfe1:/usr/bin/passwd is script which invokes real passwd /usr/bin/passwd.real, then distributes via vnDistEtc ############################################################ (3) # As root@vnfe1 cd /usr/bin cp mv passwd passwd.real cat< /usr/bin/passwd #!/bin/sh passwd.real $* trap '' 1 2 15 echo "Please be patient while the network passwd files are updated" (ssh matt@142.103.237.225 "cd /home/matt/system/vn/image/master/etc; make import" 2>&1 /dev/null) > /dev/null /d/vnfe1/home/matt/scripts/vnDistEtc shadow 2>&1 > /dev/null END chmod u+xs /usr/bin/passwd ############################################################ Defeating Password aging matt:$1$6y4t41/.$rSBbYDbLXHomm3qQe4Ujt1:10910:0:99999:7::: ehonda:tK.CuDBGxcU9U:10910:0:99999:7::: man 5 shadow shadow contains the encrypted password information for user's accounts and optional the password aging information. Included is Login name Encrypted password Days since Jan 1, 1970 that password was last changed Days before password may be changed Days after which password must be changed Days before password is to expire that user is warned Days after password expires that account is disabled Days since Jan 1, 1970 that account is disabled A reserved field The password field must be filled. The encryped password consists of 13 to 24 char- acters from the 64 character alphabet a thru z, A thru Z, 0 thru 9, . and /. Refer to crypt(3) for details on how this string is interpreted. The date of the last password change is given as the number of days since Jan 1, 1970. The password may not be changed again until the proper number of days have passed, and must be changed after the maximum number of days. If the minimum number of days required is greater than the maximum number of day allowed, this password may not be changed by the user. ############################################################ Mon Nov 15 07:33:54 PST 1999 ############################################################ (1) Updating secondary set-up floppy. matt@rar0502:/home/matt/system/UNIX_SETUP/cluster/ cd scripts # Change 'matt's home directory to /home/matt (temporarily) # Actually, safer *NOT* to install passwd/shadow in this manner # Will leave do_setup as is ############################################################ Mon Nov 15 10:23:14 PST 1999 ############################################################ (1) Installing gmp as per roman's request # As root@bh1 cdi scp gmp-2.0.2.tar.gz matt@142.103.237.225:~/autoconf/ # Master Install for vn /home/matt/system/vn/image/master/install/Install # Make duplicate version for g-zipped archives /home/matt/system/vn/image/master/install/Installz make export (2) vnallCommand 'cdi; ./Installz gmp-2.0.2' vnallCommand 'ls -lt /usr/local/lib/libgmp.a' ############################################################ Tue Nov 16 11:03:45 PST 1999 Scientific Software ############################################################ (1) Create configurable versions of linpack vnallbgCommand 'cdi; Installz netlib_linpack' vnallCommand 'ls /usr/local/lib/liblinpack.a' odepack vnallbgCommand 'cdi; Installz netlib_odepack' vnallCommand 'ls /usr/local/lib/libodepack.a' lapack fftpack vnallbgCommand 'cdi; Installz netlib_fftpack' vnallCommand 'ls /usr/local/lib/libfftpack.a' # Master on matt@laplace:~/autoconf ############################################################ Tue Nov 16 19:14:48 PST 1999 ############################################################ Something wrong with vn1 SEE README.CRASH ############################################################ Tue Nov 16 23:34:50 PST 1999 ############################################################ (1) More MPI setup. Wrote vnMakeMPIMachines generates machines.vnfe1 machines.vnfe2 machines.vnfe3 machines.vn1 . . . machines.vn16 and scp's to /usr/local/util/machines/machines.LINUX # As root@vnfe1 cd /usr/tmp/install/mpich/examples/basic/; mpirun -np 19 cpi Process 0 on vnfe1.physics.ubc.ca Process 1 on vnfe2.physics.ubc.ca Process 2 on vnfe3.physics.ubc.ca Process 5 on vn3.physics.ubc.ca Process 3 on vn1.physics.ubc.ca Process 8 on vn6.physics.ubc.ca Process 4 on vn2.physics.ubc.ca Process 7 on vn5.physics.ubc.ca Process 6 on vn4.physics.ubc.ca Process 10 on vn8.physics.ubc.ca Process 9 on vn7.physics.ubc.ca Process 11 on vn9.physics.ubc.ca Process 12 on vn10.physics.ubc.ca Process 14 on vn12.physics.ubc.ca Process 13 on vn11.physics.ubc.ca Process 15 on vn13.physics.ubc.ca Process 16 on vn14.physics.ubc.ca Process 17 on vn15.physics.ubc.ca Process 18 on vn16.physics.ubc.ca pi is approximately 3.1416009869231245, Error is 0.0000083333333314 wall clock time = 0.209264 # As root@vn16 Process 0 on vn16.physics.ubc.ca Process 2 on vn2.physics.ubc.ca Process 10 on vn10.physics.ubc.ca Process 1 on vn1.physics.ubc.ca Process 4 on vn4.physics.ubc.ca Process 5 on vn5.physics.ubc.ca Process 3 on vn3.physics.ubc.ca Process 8 on vn8.physics.ubc.ca Process 7 on vn7.physics.ubc.ca Process 6 on vn6.physics.ubc.ca Process 9 on vn9.physics.ubc.ca Process 12 on vn12.physics.ubc.ca Process 11 on vn11.physics.ubc.ca Process 13 on vn13.physics.ubc.ca Process 18 on vnfe3.physics.ubc.ca Process 14 on vn14.physics.ubc.ca Process 15 on vn15.physics.ubc.ca Process 16 on vnfe1.physics.ubc.ca Process 17 on vnfe2.physics.ubc.ca pi is approximately 3.1416009869231245, Error is 0.0000083333333314 wall clock time = 0.006312 ############################################################ Wed Nov 17 15:45:25 PST 1999 ############################################################ (1) vn17-vn48 set up in a pretty straightforward fashion. 3 nodes had BIOS set to require keyboard for boot. Remedied, and had to properly seat a couple of ethernet cables. Apart from that pretty uneventful. (2) Wrote vnSshInstall to install ssh, ending with root's identity.pub which will be continue to be accumulated in matt@laplace.physics.ubc.ca:~/.ssh/master_authorized_keys cd /tmp cat > vnSshInstall chmod a+x vnSshInstall ./vnSshInstall telnet vn17 telnet vn18 telnet vn19 telnet vn20 telnet vn21 telnet vn22 telnet vn23 telnet vn24 telnet vn25 telnet vn26 telnet vn27 telnet vn28 telnet vn29 telnet vn30 telnet vn31 telnet vn32 telnet vn33 telnet vn34 telnet vn35 telnet vn36 telnet vn37 telnet vn38 telnet vn39 telnet vn40 telnet vn41 telnet vn42 telnet vn43 telnet vn44 telnet vn45 telnet vn46 telnet vn47 telnet vn48 # ssh set up # Check /usr/local/sbin/sshd on # vn18, vn19, vn20, vn21 # FOUND BUG IN do_setup which had .cshrc -> .aliases etc. # Secondary floppy apparently didn't get executed on # vn18 --- bad rc.local cd /tmp /usr/local/bin/scp matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/system/UNIX_SETUP/cluster/scripts_as_matt.tar.gz . tar xfz scripts_as_matt.tar.gz cd scripts_as_matt sh do_setup /root /etc/rc.d/rc.local start # . files bad .cshrc .rhosts .aliases .exrc cd /root/.ssh scp matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/.ssh/authorized_keys . # vn19 --- bad rc.local cd /root /bin/rm -f .aliases .rhosts .exrc cd /tmp/scripts_as_matt cp .aliases .rhosts .exrc /root cd /root/.ssh /usr/local/bin/scp matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/.ssh/authorized_keys . # vn20 --- bad rc.local # vn21 --- bad rc.local /etc/rc.d/rc.local start As matt@vnfe1 cd .ssh foreach i (`ifor 48 1 -1`) scp authorized_keys root@vn${i}:~/.ssh end ############################################################ Wed Nov 17 15:45:25 PST 1999 ############################################################ (1) fstab and NFS # temp. modified vnCommand, vnRemote to use 'vnnewN' -> 17-48 ssh root@vnfe1 "cat /d/vnfe1/home/matt/system/vn/image/master/etc/fstab_frag" vnRemote vnNFSsetup (2) Matt setup vnCommand /d/vnfe1/home/matt/scripts/vnSetupMatt vnCommand 'cat ~matt/.ssh/identity.pub' > /tmp/IDENTITY # Update master_authorized_keys (3) /etc setup etc vnDistEtc group vnDistEtc passwd vnDistEtc shadow ############################################################ Thu Nov 18 19:45:23 PST 1999 ############################################################ (1) vnfe1 ran out of processes (too many defunct), had to be manually reset 1316 ? Z 0:00 [kwmsound ] ############################################################ Thu Nov 18 20:24:57 PST 1999 ############################################################ (1) vn18 down Took 18 out of vnN test $i = 18 || printf "142.103.237.%d " $i >>> Executing as root@142.103.237.225 >>> Executing as root@142.103.237.226 564 ? Z 0:15 [mysqld ] >>> Executing as root@142.103.237.227 >>> Executing as root@142.103.237.1 >>> Executing as root@142.103.237.2 >>> Executing as root@142.103.237.3 491 ? Z 0:00 [qmgr ] 544 ? Z 0:14 [mysqld ] 601 ? Z 0:13 [actived ] 602 ? Z 0:05 [innd ] 646 ? Z 0:01 [prefdm ] 750 ? Z 0:12 [rwhod ] 16351 ? Z 0:00 [tcsh ] 16364 ? Z 0:00 [cactus_linux_mp ] 23975 ? Z 0:00 [pickup ] 24023 ? Z 0:00 [tcsh ] 24034 ? Z 0:00 [wave ] 24141 ? Z 0:00 [ssh1 ] >>> Executing as root@142.103.237.4 >>> Executing as root@142.103.237.5 290 ? Z 0:10 [syslogd ] 300 ? Z 0:07 [klogd ] 486 ? Z 0:10 [master ] 494 ? Z 0:01 [qmgr ] 509 ? Z 0:29 [gpm ] 544 ? Z 0:12 [mysqld ] 601 ? Z 0:12 [actived ] 752 ? Z 0:13 [rwhod ] 15705 ? Z 0:00 [tcsh ] 15718 ? Z 0:00 [cactus_linux_mp ] 15727 ? Z 0:00 [tcsh ] 15739 ? Z 0:00 [cactus_linux_mp ] 22332 ? Z 0:00 [pickup ] 22380 ? Z 0:00 [tcsh ] 22390 ? Z 16:00 [wave ] 22391 ? Z 0:00 [wave ] 22416 ? Z 0:00 [tcsh ] 22431 ? Z 0:00 [tcsh ] 22447 ? Z 0:00 [anacron ] 22482 ? Z 0:00 [tcsh ] 22489 ? Z 0:00 [tcsh ] 22506 ? Z 0:00 [tcsh ] 22516 ? Z 0:00 [anacron ] 22548 ? Z 0:00 [tcsh ] 22562 ? Z 0:00 [tcsh ] 22578 ? Z 0:00 [tcsh ] 22584 ? Z 0:00 [ssh1 ] 22597 ? Z 0:00 [tcsh ] 22612 ? Z 0:00 [tcsh ] 22636 ? Z 0:00 [tcsh ] 22652 ? Z 0:00 [tcsh ] 22665 ? Z 0:00 [anacron ] 22700 ? Z 0:00 [tcsh ] 22718 ? Z 0:00 [tcsh ] 22734 ? Z 0:00 [tcsh ] 22749 ? Z 0:00 [tcsh ] 22767 ? Z 0:00 [tcsh ] 22785 ? Z 0:00 [tcsh ] 22801 ? Z 0:00 [tcsh ] 22818 ? Z 0:00 [tcsh ] 22833 ? Z 0:00 [tcsh ] 22848 ? Z 0:00 [tcsh ] 22863 ? Z 0:00 [tcsh ] 22878 ? Z 0:00 [tcsh ] 22893 ? Z 0:00 [tcsh ] 22906 ? Z 0:00 [tcsh ] 22921 ? Z 0:00 [tcsh ] 22936 ? Z 0:00 [tcsh ] 22948 ? Z 0:00 [anacron ] 22980 ? Z 0:00 [tcsh ] 22995 ? Z 0:00 [tcsh ] 23019 ? Z 0:00 [tcsh ] 23035 ? Z 0:00 [tcsh ] 23050 ? Z 0:00 [tcsh ] 23064 ? Z 0:00 [anacron ] 23101 ? Z 0:00 [tcsh ] 23121 ? Z 0:00 [anacron ] 23158 ? Z 0:00 [tcsh ]